0
Fork 0
mirror of https://github.com/project-zot/zot.git synced 2024-12-30 22:34:13 -05:00

fix: excessive memory usage (#2164)

instead of reading entire files before calculating their digests
stream them by using their Reader method.

Signed-off-by: Petu Eusebiu <peusebiu@cisco.com>
This commit is contained in:
peusebiu 2024-01-16 19:04:36 +02:00 committed by GitHub
parent d7f2429c01
commit d1bf713573
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 114 additions and 143 deletions

View file

@ -1,7 +1,6 @@
package imagestore package imagestore
import ( import (
"bytes"
"context" "context"
"crypto/sha256" "crypto/sha256"
"encoding/json" "encoding/json"
@ -951,25 +950,25 @@ func (is *ImageStore) FullBlobUpload(repo string, body io.Reader, dstDigest godi
uuid := u.String() uuid := u.String()
src := is.BlobUploadPath(repo, uuid) src := is.BlobUploadPath(repo, uuid)
digester := sha256.New() digester := sha256.New()
buf := new(bytes.Buffer)
_, err = buf.ReadFrom(body) blobFile, err := is.storeDriver.Writer(src, false)
if err != nil { if err != nil {
is.log.Error().Err(err).Msg("failed to read blob") is.log.Error().Err(err).Str("blob", src).Msg("failed to open blob")
return "", -1, zerr.ErrUploadNotFound
}
defer blobFile.Close()
mw := io.MultiWriter(blobFile, digester)
nbytes, err := io.Copy(mw, body)
if err != nil {
return "", -1, err return "", -1, err
} }
nbytes, err := is.storeDriver.WriteFile(src, buf.Bytes()) if err := blobFile.Commit(); err != nil {
if err != nil { is.log.Error().Err(err).Str("blob", src).Msg("failed to commit blob")
is.log.Error().Err(err).Msg("failed to write blob")
return "", -1, err
}
_, err = digester.Write(buf.Bytes())
if err != nil {
is.log.Error().Err(err).Str("component", "digester").Msg("failed to write")
return "", -1, err return "", -1, err
} }
@ -1008,7 +1007,7 @@ func (is *ImageStore) FullBlobUpload(repo string, body io.Reader, dstDigest godi
} }
} }
return uuid, int64(nbytes), nil return uuid, nbytes, nil
} }
func (is *ImageStore) DedupeBlob(src string, dstDigest godigest.Digest, dstRepo string, dst string) error { func (is *ImageStore) DedupeBlob(src string, dstDigest godigest.Digest, dstRepo string, dst string) error {
@ -1210,36 +1209,9 @@ func (is *ImageStore) StatBlob(repo string, digest godigest.Digest) (bool, int64
return false, -1, time.Time{}, err return false, -1, time.Time{}, err
} }
blobPath := is.BlobPath(repo, digest) binfo, err := is.originalBlobInfo(repo, digest)
binfo, err := is.storeDriver.Stat(blobPath)
if err == nil && binfo.Size() > 0 {
is.log.Debug().Str("blob path", blobPath).Msg("blob path found")
return true, binfo.Size(), binfo.ModTime(), nil
}
if err != nil { if err != nil {
is.log.Error().Err(err).Str("blob", blobPath).Msg("failed to stat blob") return false, -1, time.Time{}, err
return false, -1, time.Time{}, zerr.ErrBlobNotFound
}
// then it's a 'deduped' blob
// Check blobs in cache
dstRecord, err := is.checkCacheBlob(digest)
if err != nil {
is.log.Warn().Err(err).Str("digest", digest.String()).Msg("not found in cache")
return false, -1, time.Time{}, zerr.ErrBlobNotFound
}
binfo, err = is.storeDriver.Stat(dstRecord)
if err != nil {
is.log.Error().Err(err).Str("blob", blobPath).Msg("failed to stat blob")
return false, -1, time.Time{}, zerr.ErrBlobNotFound
} }
return true, binfo.Size(), binfo.ModTime(), nil return true, binfo.Size(), binfo.ModTime(), nil
@ -1318,34 +1290,12 @@ func (is *ImageStore) GetBlobPartial(repo string, digest godigest.Digest, mediaT
return nil, -1, -1, err return nil, -1, -1, err
} }
blobPath := is.BlobPath(repo, digest)
is.RLock(&lockLatency) is.RLock(&lockLatency)
defer is.RUnlock(&lockLatency) defer is.RUnlock(&lockLatency)
binfo, err := is.storeDriver.Stat(blobPath) binfo, err := is.originalBlobInfo(repo, digest)
if err != nil { if err != nil {
is.log.Error().Err(err).Str("blob", blobPath).Msg("failed to stat blob") return nil, -1, -1, err
return nil, -1, -1, zerr.ErrBlobNotFound
}
// is a deduped blob
if binfo.Size() == 0 {
// Check blobs in cache
blobPath, err = is.checkCacheBlob(digest)
if err != nil {
is.log.Debug().Err(err).Str("digest", digest.String()).Msg("not found in cache")
return nil, -1, -1, zerr.ErrBlobNotFound
}
binfo, err = is.storeDriver.Stat(blobPath)
if err != nil {
is.log.Error().Err(err).Str("blob", blobPath).Msg("failed to stat blob")
return nil, -1, -1, zerr.ErrBlobNotFound
}
} }
end := to end := to
@ -1354,16 +1304,16 @@ func (is *ImageStore) GetBlobPartial(repo string, digest godigest.Digest, mediaT
end = binfo.Size() - 1 end = binfo.Size() - 1
} }
blobHandle, err := is.storeDriver.Reader(blobPath, from) blobHandle, err := is.storeDriver.Reader(binfo.Path(), from)
if err != nil { if err != nil {
is.log.Error().Err(err).Str("blob", blobPath).Msg("failed to open blob") is.log.Error().Err(err).Str("blob", binfo.Path()).Msg("failed to open blob")
return nil, -1, -1, err return nil, -1, -1, err
} }
blobReadCloser, err := newBlobStream(blobHandle, from, end) blobReadCloser, err := newBlobStream(blobHandle, from, end)
if err != nil { if err != nil {
is.log.Error().Err(err).Str("blob", blobPath).Msg("failed to open blob stream") is.log.Error().Err(err).Str("blob", binfo.Path()).Msg("failed to open blob stream")
return nil, -1, -1, err return nil, -1, -1, err
} }
@ -1372,6 +1322,42 @@ func (is *ImageStore) GetBlobPartial(repo string, digest godigest.Digest, mediaT
return blobReadCloser, end - from + 1, binfo.Size(), nil return blobReadCloser, end - from + 1, binfo.Size(), nil
} }
/*
In the case of s3(which doesn't support links) we link them in our cache by
keeping a reference to the original blob and its duplicates
On the storage, original blobs are those with contents, and duplicates one are just empty files.
This function helps handling this situation, by using this one you can make sure you always get the original blob.
*/
func (is *ImageStore) originalBlobInfo(repo string, digest godigest.Digest) (driver.FileInfo, error) {
blobPath := is.BlobPath(repo, digest)
binfo, err := is.storeDriver.Stat(blobPath)
if err != nil {
is.log.Error().Err(err).Str("blob", blobPath).Msg("failed to stat blob")
return nil, zerr.ErrBlobNotFound
}
if binfo.Size() == 0 {
dstRecord, err := is.checkCacheBlob(digest)
if err != nil {
is.log.Debug().Err(err).Str("digest", digest.String()).Msg("not found in cache")
return nil, zerr.ErrBlobNotFound
}
binfo, err = is.storeDriver.Stat(dstRecord)
if err != nil {
is.log.Error().Err(err).Str("blob", dstRecord).Msg("failed to stat blob")
return nil, zerr.ErrBlobNotFound
}
}
return binfo, nil
}
// GetBlob returns a stream to read the blob. // GetBlob returns a stream to read the blob.
// blob selector instead of directly downloading the blob. // blob selector instead of directly downloading the blob.
func (is *ImageStore) GetBlob(repo string, digest godigest.Digest, mediaType string) (io.ReadCloser, int64, error) { func (is *ImageStore) GetBlob(repo string, digest godigest.Digest, mediaType string) (io.ReadCloser, int64, error) {
@ -1381,91 +1367,40 @@ func (is *ImageStore) GetBlob(repo string, digest godigest.Digest, mediaType str
return nil, -1, err return nil, -1, err
} }
blobPath := is.BlobPath(repo, digest)
is.RLock(&lockLatency) is.RLock(&lockLatency)
defer is.RUnlock(&lockLatency) defer is.RUnlock(&lockLatency)
binfo, err := is.storeDriver.Stat(blobPath) binfo, err := is.originalBlobInfo(repo, digest)
if err != nil { if err != nil {
is.log.Error().Err(err).Str("blob", blobPath).Msg("failed to stat blob")
return nil, -1, zerr.ErrBlobNotFound
}
blobReadCloser, err := is.storeDriver.Reader(blobPath, 0)
if err != nil {
is.log.Error().Err(err).Str("blob", blobPath).Msg("failed to open blob")
return nil, -1, err return nil, -1, err
} }
// is a 'deduped' blob? blobReadCloser, err := is.storeDriver.Reader(binfo.Path(), 0)
if binfo.Size() == 0 {
// Check blobs in cache
dstRecord, err := is.checkCacheBlob(digest)
if err != nil { if err != nil {
is.log.Debug().Err(err).Str("digest", digest.String()).Msg("not found in cache") is.log.Error().Err(err).Str("blob", binfo.Path()).Msg("failed to open blob")
return nil, -1, zerr.ErrBlobNotFound
}
binfo, err := is.storeDriver.Stat(dstRecord)
if err != nil {
is.log.Error().Err(err).Str("blob", dstRecord).Msg("failed to stat blob")
return nil, -1, zerr.ErrBlobNotFound
}
blobReadCloser, err := is.storeDriver.Reader(dstRecord, 0)
if err != nil {
is.log.Error().Err(err).Str("blob", dstRecord).Msg("failed to open blob")
return nil, -1, err return nil, -1, err
} }
return blobReadCloser, binfo.Size(), nil
}
// The caller function is responsible for calling Close() // The caller function is responsible for calling Close()
return blobReadCloser, binfo.Size(), nil return blobReadCloser, binfo.Size(), nil
} }
// GetBlobContent returns blob contents, the caller function MUST lock from outside. // GetBlobContent returns blob contents, the caller function MUST lock from outside.
// Should be used for small files(manifests/config blobs).
func (is *ImageStore) GetBlobContent(repo string, digest godigest.Digest) ([]byte, error) { func (is *ImageStore) GetBlobContent(repo string, digest godigest.Digest) ([]byte, error) {
if err := digest.Validate(); err != nil { if err := digest.Validate(); err != nil {
return []byte{}, err return []byte{}, err
} }
blobPath := is.BlobPath(repo, digest) binfo, err := is.originalBlobInfo(repo, digest)
binfo, err := is.storeDriver.Stat(blobPath)
if err != nil { if err != nil {
is.log.Error().Err(err).Str("blob", blobPath).Msg("failed to stat blob")
return []byte{}, zerr.ErrBlobNotFound
}
blobBuf, err := is.storeDriver.ReadFile(blobPath)
if err != nil {
is.log.Error().Err(err).Str("blob", blobPath).Msg("failed to open blob")
return nil, err return nil, err
} }
// is a 'deduped' blob? blobBuf, err := is.storeDriver.ReadFile(binfo.Path())
if binfo.Size() == 0 {
// Check blobs in cache
dstRecord, err := is.checkCacheBlob(digest)
if err != nil { if err != nil {
is.log.Debug().Err(err).Str("digest", digest.String()).Msg("not found in cache") is.log.Error().Err(err).Str("blob", binfo.Path()).Msg("failed to open blob")
return nil, zerr.ErrBlobNotFound
}
blobBuf, err := is.storeDriver.ReadFile(dstRecord)
if err != nil {
is.log.Error().Err(err).Str("blob", dstRecord).Msg("failed to open blob")
return nil, err return nil, err
} }
@ -1473,7 +1408,36 @@ func (is *ImageStore) GetBlobContent(repo string, digest godigest.Digest) ([]byt
return blobBuf, nil return blobBuf, nil
} }
return blobBuf, nil // VerifyBlobDigestValue verifies that the blob which is addressed by given digest has a equivalent computed digest.
func (is *ImageStore) VerifyBlobDigestValue(repo string, digest godigest.Digest) error {
if err := digest.Validate(); err != nil {
return err
}
binfo, err := is.originalBlobInfo(repo, digest)
if err != nil {
return err
}
blobReadCloser, err := is.storeDriver.Reader(binfo.Path(), 0)
if err != nil {
return err
}
defer blobReadCloser.Close()
// compute its real digest
computedDigest, err := godigest.FromReader(blobReadCloser)
if err != nil {
return err
}
// if the computed digest is different than the blob name(its initial digest) then the blob has been corrupted.
if computedDigest != digest {
return zerr.ErrBadBlobDigest
}
return nil
} }
func (is *ImageStore) GetReferrers(repo string, gdigest godigest.Digest, artifactTypes []string, func (is *ImageStore) GetReferrers(repo string, gdigest godigest.Digest, artifactTypes []string,

View file

@ -3798,6 +3798,9 @@ func TestS3DedupeErr(t *testing.T) {
SizeFn: func() int64 { SizeFn: func() int64 {
return 0 return 0
}, },
PathFn: func() string {
return "repo1/dst1"
},
}, nil }, nil
}, },
ReaderFn: func(ctx context.Context, path string, offset int64) (io.ReadCloser, error) { ReaderFn: func(ctx context.Context, path string, offset int64) (io.ReadCloser, error) {

View file

@ -279,21 +279,12 @@ func CheckLayers(
} }
for _, layer := range man.Layers { for _, layer := range man.Layers {
layerContent, err := imgStore.GetBlobContent(imageName, layer.Digest) if err := imgStore.VerifyBlobDigestValue(imageName, layer.Digest); err != nil {
if err != nil {
imageRes = getResult(imageName, tagName, layer.Digest, err) imageRes = getResult(imageName, tagName, layer.Digest, err)
break break
} }
computedDigest := godigest.FromBytes(layerContent)
if computedDigest != layer.Digest {
imageRes = getResult(imageName, tagName, layer.Digest, errors.ErrBadBlobDigest)
break
}
imageRes = getResult(imageName, tagName, "", nil) imageRes = getResult(imageName, tagName, "", nil)
} }

View file

@ -215,6 +215,9 @@ func TestStorageAPIs(t *testing.T) {
So(err, ShouldBeNil) So(err, ShouldBeNil)
So(n, ShouldEqual, len(body)) So(n, ShouldEqual, len(body))
So(upload, ShouldNotBeEmpty) So(upload, ShouldNotBeEmpty)
err = imgStore.VerifyBlobDigestValue("test", digest)
So(err, ShouldBeNil)
}) })
Convey("New blob upload", func() { Convey("New blob upload", func() {

View file

@ -64,6 +64,7 @@ type ImageStore interface { //nolint:interfacebloat
GetNextDigestWithBlobPaths(repos []string, lastDigests []godigest.Digest) (godigest.Digest, []string, error) GetNextDigestWithBlobPaths(repos []string, lastDigests []godigest.Digest) (godigest.Digest, []string, error)
GetAllBlobs(repo string) ([]string, error) GetAllBlobs(repo string) ([]string, error)
PopulateStorageMetrics(interval time.Duration, sch *scheduler.Scheduler) PopulateStorageMetrics(interval time.Duration, sch *scheduler.Scheduler)
VerifyBlobDigestValue(repo string, digest godigest.Digest) error
} }
type Driver interface { //nolint:interfacebloat type Driver interface { //nolint:interfacebloat

View file

@ -59,6 +59,7 @@ type MockedImageStore struct {
PutIndexContentFn func(repo string, index ispec.Index) error PutIndexContentFn func(repo string, index ispec.Index) error
PopulateStorageMetricsFn func(interval time.Duration, sch *scheduler.Scheduler) PopulateStorageMetricsFn func(interval time.Duration, sch *scheduler.Scheduler)
StatIndexFn func(repo string) (bool, int64, time.Time, error) StatIndexFn func(repo string) (bool, int64, time.Time, error)
VerifyBlobDigestValueFn func(repo string, digest godigest.Digest) error
} }
func (is MockedImageStore) StatIndex(repo string) (bool, int64, time.Time, error) { func (is MockedImageStore) StatIndex(repo string) (bool, int64, time.Time, error) {
@ -425,3 +426,11 @@ func (is MockedImageStore) PopulateStorageMetrics(interval time.Duration, sch *s
is.PopulateStorageMetricsFn(interval, sch) is.PopulateStorageMetricsFn(interval, sch)
} }
} }
func (is MockedImageStore) VerifyBlobDigestValue(repo string, digest godigest.Digest) error {
if is.VerifyBlobDigestValueFn != nil {
return is.VerifyBlobDigestValueFn(repo, digest)
}
return nil
}