From 53b5fa6493af0e8510001d8b07a9efe2cf7db584 Mon Sep 17 00:00:00 2001 From: Shivam Mishra Date: Tue, 20 Jul 2021 14:04:10 -0700 Subject: [PATCH] dedupe: stat blob path before creating link --- errors/errors.go | 1 + go.mod | 1 + go.sum | 2 + pkg/storage/cache.go | 10 +-- pkg/storage/cache_test.go | 5 ++ pkg/storage/storage.go | 14 +++++ pkg/storage/storage_test.go | 117 ++++++++++++++++++++++++++++++++++++ 7 files changed, 146 insertions(+), 4 deletions(-) diff --git a/errors/errors.go b/errors/errors.go index bb3c26b2..915b0628 100644 --- a/errors/errors.go +++ b/errors/errors.go @@ -39,4 +39,5 @@ var ( ErrDuplicateConfigName = errors.New("cli: cli config name already added") ErrInvalidRoute = errors.New("routes: invalid route prefix") ErrImgStoreNotFound = errors.New("routes: image store not found corresponding to given route") + ErrEmptyValue = errors.New("cache: empty value") ) diff --git a/go.mod b/go.mod index fad9e683..b2a8bcc9 100644 --- a/go.mod +++ b/go.mod @@ -22,6 +22,7 @@ require ( github.com/json-iterator/go v1.1.10 github.com/klauspost/compress v1.11.13 // indirect github.com/klauspost/pgzip v1.2.5 // indirect + github.com/libopenstorage/openstorage v8.0.0+incompatible github.com/mitchellh/mapstructure v1.1.2 github.com/nmcclain/asn1-ber v0.0.0-20170104154839-2661553a0484 // indirect github.com/nmcclain/ldap v0.0.0-20191021200707-3b3b69a7e9e3 diff --git a/go.sum b/go.sum index fe9e57d3..c91f4a91 100644 --- a/go.sum +++ b/go.sum @@ -606,6 +606,8 @@ github.com/kylelemons/godebug v0.0.0-20170820004349-d65d576e9348/go.mod h1:B69LE github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/leodido/go-urn v1.1.0/go.mod h1:+cyI34gQWZcE1eQU7NVgKkkzdXDQHr1dBMtdAPozLkw= +github.com/libopenstorage/openstorage v8.0.0+incompatible h1:C4cFpStNOhsqShZSDRtW+4t5uCHbcdyPszG5ADwIRTM= +github.com/libopenstorage/openstorage v8.0.0+incompatible/go.mod h1:Sp1sIObHjat1BeXhfMqLZ14wnOzEhNx2YQedreMcUyc= github.com/logrusorgru/aurora v0.0.0-20200102142835-e9ef32dff381/go.mod h1:7rIyQOR62GCctdiQpZ/zOJlFyk6y+94wXzv6RNZgaR4= github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= github.com/magiconair/properties v1.8.1 h1:ZC2Vc7/ZFkGmsVC9KvOjumD+G5lXy2RtTKyzRKO2BQ4= diff --git a/pkg/storage/cache.go b/pkg/storage/cache.go index 6eaaf1e7..69eb3b25 100644 --- a/pkg/storage/cache.go +++ b/pkg/storage/cache.go @@ -51,6 +51,12 @@ func NewCache(rootDir string, name string, log zlog.Logger) *Cache { } func (c *Cache) PutBlob(digest string, path string) error { + if path == "" { + c.log.Error().Err(errors.ErrEmptyValue).Str("digest", digest).Msg("empty path provided") + + return errors.ErrEmptyValue + } + // use only relative (to rootDir) paths on blobs relp, err := filepath.Rel(c.rootDir, path) if err != nil { @@ -109,10 +115,6 @@ func (c *Cache) GetBlob(digest string) (string, error) { return "", err } - if len(blobPath.String()) == 0 { - return "", nil - } - return blobPath.String(), nil } diff --git a/pkg/storage/cache_test.go b/pkg/storage/cache_test.go index 0e49dc4e..82c54989 100644 --- a/pkg/storage/cache_test.go +++ b/pkg/storage/cache_test.go @@ -49,5 +49,10 @@ func TestCache(t *testing.T) { err = c.DeleteBlob("key", "bogusValue") So(err, ShouldBeNil) + + // try to insert empty path + err = c.PutBlob("key", "") + So(err, ShouldNotBeNil) + So(err, ShouldEqual, errors.ErrEmptyValue) }) } diff --git a/pkg/storage/storage.go b/pkg/storage/storage.go index aa4f42c0..9e8f9200 100644 --- a/pkg/storage/storage.go +++ b/pkg/storage/storage.go @@ -956,6 +956,7 @@ retry: } if dstRecord == "" { + // cache record doesn't exist, so first disk and cache entry for this digest if err := is.cache.PutBlob(dstDigest.String(), dst); err != nil { is.log.Error().Err(err).Str("blobPath", dst).Msg("dedupe: unable to insert blob record") @@ -971,6 +972,8 @@ retry: is.log.Debug().Str("src", src).Str("dst", dst).Msg("dedupe: rename") } else { + // cache record exists, but due to GC and upgrades from older versions, + // disk content and cache records may go out of sync dstRecord = path.Join(is.rootDir, dstRecord) dstRecordFi, err := os.Stat(dstRecord) @@ -985,19 +988,30 @@ retry: } goto retry } + dstFi, err := os.Stat(dst) if err != nil && !os.IsNotExist(err) { is.log.Error().Err(err).Str("blobPath", dstRecord).Msg("dedupe: unable to stat") return err } + if !os.SameFile(dstFi, dstRecordFi) { + // blob lookup cache out of sync with actual disk contents + if err := os.Remove(dst); err != nil && !os.IsNotExist(err) { + is.log.Error().Err(err).Str("dst", dst).Msg("dedupe: unable to remove blob") + return err + } + + is.log.Debug().Str("blobPath", dst).Msg("dedupe: creating hard link") + if err := os.Link(dstRecord, dst); err != nil { is.log.Error().Err(err).Str("blobPath", dst).Str("link", dstRecord).Msg("dedupe: unable to hard link") return err } } + if err := os.Remove(src); err != nil { is.log.Error().Err(err).Str("src", src).Msg("dedupe: uname to remove blob") return err diff --git a/pkg/storage/storage_test.go b/pkg/storage/storage_test.go index b48de1cd..fe6c93fb 100644 --- a/pkg/storage/storage_test.go +++ b/pkg/storage/storage_test.go @@ -6,11 +6,13 @@ import ( "encoding/json" "io/ioutil" "os" + "os/exec" "path" "strings" "sync" "testing" + "github.com/anuvu/zot/errors" "github.com/anuvu/zot/pkg/log" "github.com/anuvu/zot/pkg/storage" godigest "github.com/opencontainers/go-digest" @@ -532,6 +534,51 @@ func TestNegativeCases(t *testing.T) { il := storage.NewImageStore(dir, true, true, log.Logger{Logger: zerolog.New(os.Stdout)}) So(il, ShouldNotBeNil) So(il.InitRepo("test"), ShouldBeNil) + + err = os.MkdirAll(path.Join(dir, "invalid-test"), 0755) + So(err, ShouldBeNil) + + err = os.Chmod(path.Join(dir, "invalid-test"), 0000) // remove all perms + So(err, ShouldBeNil) + + _, err = il.ValidateRepo("invalid-test") + So(err, ShouldNotBeNil) + So(err, ShouldEqual, errors.ErrRepoNotFound) + + err = os.Chmod(path.Join(dir, "invalid-test"), 0755) // remove all perms + So(err, ShouldBeNil) + + err = ioutil.WriteFile(path.Join(dir, "invalid-test", "blobs"), []byte{}, 0755) // nolint: gosec + So(err, ShouldBeNil) + + err = ioutil.WriteFile(path.Join(dir, "invalid-test", "index.json"), []byte{}, 0755) // nolint: gosec + So(err, ShouldBeNil) + + err = ioutil.WriteFile(path.Join(dir, "invalid-test", ispec.ImageLayoutFile), []byte{}, 0755) // nolint: gosec + So(err, ShouldBeNil) + + isValid, err := il.ValidateRepo("invalid-test") + So(err, ShouldBeNil) + So(isValid, ShouldEqual, false) + + err = os.Remove(path.Join(dir, "invalid-test", "blobs")) + So(err, ShouldBeNil) + + err = os.Mkdir(path.Join(dir, "invalid-test", "blobs"), 0755) + So(err, ShouldBeNil) + + isValid, err = il.ValidateRepo("invalid-test") + So(err, ShouldNotBeNil) + So(isValid, ShouldEqual, false) + + err = ioutil.WriteFile(path.Join(dir, "invalid-test", ispec.ImageLayoutFile), []byte("{}"), 0755) // nolint: gosec + So(err, ShouldBeNil) + + isValid, err = il.ValidateRepo("invalid-test") + So(err, ShouldNotBeNil) + So(err, ShouldEqual, errors.ErrRepoBadVersion) + So(isValid, ShouldEqual, false) + files, err := ioutil.ReadDir(path.Join(dir, "test")) So(err, ShouldBeNil) for _, f := range files { @@ -597,6 +644,76 @@ func TestNegativeCases(t *testing.T) { _, _, _, err = il.GetImageManifest("test", "") So(err, ShouldNotBeNil) }) + + Convey("Invalid dedupe sceanrios", t, func() { + dir, err := ioutil.TempDir("", "oci-repo-test") + if err != nil { + panic(err) + } + defer os.RemoveAll(dir) + + il := storage.NewImageStore(dir, true, true, log.Logger{Logger: zerolog.New(os.Stdout)}) + v, err := il.NewBlobUpload("dedupe1") + So(err, ShouldBeNil) + So(v, ShouldNotBeEmpty) + + content := []byte("test-data3") + buf := bytes.NewBuffer(content) + l := buf.Len() + d := godigest.FromBytes(content) + b, err := il.PutBlobChunkStreamed("dedupe1", v, buf) + So(err, ShouldBeNil) + So(b, ShouldEqual, l) + + blobDigest1 := strings.Split(d.String(), ":")[1] + So(blobDigest1, ShouldNotBeEmpty) + + err = il.FinishBlobUpload("dedupe1", v, buf, d.String()) + So(err, ShouldBeNil) + So(b, ShouldEqual, l) + + // Create a file at the same place where FinishBlobUpload will create + err = il.InitRepo("dedupe2") + So(err, ShouldBeNil) + + err = os.MkdirAll(path.Join(dir, "dedupe2", "blobs/sha256"), 0755) + So(err, ShouldBeNil) + + err = ioutil.WriteFile(path.Join(dir, "dedupe2", "blobs/sha256", blobDigest1), content, 0755) // nolint: gosec + So(err, ShouldBeNil) + + v, err = il.NewBlobUpload("dedupe2") + So(err, ShouldBeNil) + So(v, ShouldNotBeEmpty) + + content = []byte("test-data3") + buf = bytes.NewBuffer(content) + l = buf.Len() + d = godigest.FromBytes(content) + b, err = il.PutBlobChunkStreamed("dedupe2", v, buf) + So(err, ShouldBeNil) + So(b, ShouldEqual, l) + + cmd := exec.Command("sudo", "chattr", "+i", path.Join(dir, "dedupe2", "blobs/sha256", blobDigest1)) // nolint: gosec + _, err = cmd.Output() + if err != nil { + panic(err) + } + + err = il.FinishBlobUpload("dedupe2", v, buf, d.String()) + So(err, ShouldNotBeNil) + So(b, ShouldEqual, l) + + cmd = exec.Command("sudo", "chattr", "-i", path.Join(dir, "dedupe2", "blobs/sha256", blobDigest1)) // nolint: gosec + _, err = cmd.Output() + if err != nil { + panic(err) + } + + err = il.FinishBlobUpload("dedupe2", v, buf, d.String()) + So(err, ShouldBeNil) + So(b, ShouldEqual, l) + }) } func TestHardLink(t *testing.T) {