From f7829d647079b3cb3a6604a705092ac9e5e5443b Mon Sep 17 00:00:00 2001 From: Ramkumar Chinchani Date: Sat, 17 Apr 2021 20:30:23 +0000 Subject: [PATCH] dedupe: optimize check-blob with hard links In use cases, when there are large images with shared layers across repositories, clients may benefit from not re-uploading the same blobs over and over again. We ensure this by hard linking when check-blob api is called. Signed-off-by: Ramkumar Chinchani --- pkg/storage/storage.go | 63 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 52 insertions(+), 11 deletions(-) diff --git a/pkg/storage/storage.go b/pkg/storage/storage.go index 30a4744e..71a29868 100644 --- a/pkg/storage/storage.go +++ b/pkg/storage/storage.go @@ -106,14 +106,9 @@ func (is *ImageStore) Lock() { func (is *ImageStore) Unlock() { is.lock.Unlock() } - -// InitRepo creates an image repository under this store. -func (is *ImageStore) InitRepo(name string) error { +func (is *ImageStore) initRepo(name string) error { repoDir := path.Join(is.rootDir, name) - is.Lock() - defer is.Unlock() - if fi, err := os.Stat(repoDir); err == nil && fi.IsDir() { return nil } @@ -159,6 +154,14 @@ func (is *ImageStore) InitRepo(name string) error { return nil } +// InitRepo creates an image repository under this store. +func (is *ImageStore) InitRepo(name string) error { + is.Lock() + defer is.Unlock() + + return is.initRepo(name) +} + // ValidateRepo validates that the repository layout is complaint with the OCI repo layout. func (is *ImageStore) ValidateRepo(name string) (bool, error) { // https://github.com/opencontainers/image-spec/blob/master/image-layout.md#content @@ -975,16 +978,54 @@ func (is *ImageStore) CheckBlob(repo string, digest string, blobPath := is.BlobPath(repo, d) - is.RLock() - defer is.RUnlock() + if is.dedupe && is.cache != nil { + is.Lock() + defer is.Unlock() + } else { + is.RLock() + defer is.RUnlock() + } blobInfo, err := os.Stat(blobPath) - if err != nil { - is.log.Error().Err(err).Str("blob", blobPath).Msg("failed to stat blob") + if err == nil { + return true, blobInfo.Size(), nil + } + + is.log.Error().Err(err).Str("blob", blobPath).Msg("failed to stat blob") + + if !is.dedupe || is.cache == nil { return false, -1, errors.ErrBlobNotFound } - return true, blobInfo.Size(), nil + // lookup cache and if found, dedupe here + dstRecord, err := is.cache.GetBlob(digest) + if err != nil { + return false, -1, errors.ErrBlobNotFound + } + + dstRecord = path.Join(is.rootDir, dstRecord) + + is.log.Debug().Str("digest", digest).Str("dstRecord", dstRecord).Msg("cache: found dedupe record") + + if err := is.initRepo(repo); err != nil { + is.log.Error().Err(err).Str("repo", repo).Msg("unable to initialize an empty repo") + return false, -1, err + } + + ensureDir(filepath.Dir(blobPath), is.log) + + if err := os.Link(dstRecord, blobPath); err != nil { + is.log.Error().Err(err).Str("blobPath", blobPath).Str("link", dstRecord).Msg("dedupe: unable to hard link") + + return false, -1, errors.ErrBlobNotFound + } + + blobInfo, err = os.Stat(blobPath) + if err == nil { + return true, blobInfo.Size(), nil + } + + return false, -1, errors.ErrBlobNotFound } // GetBlob returns a stream to read the blob.