0
Fork 0
mirror of https://github.com/project-zot/zot.git synced 2025-01-06 22:40:28 -05:00
zot/pkg/extensions/search/cve/scan.go
Andrei Aaron 7c78f80a96
feat(cve): implement CVE scanning as background tasks (#1833)
1. Move existing CVE DB download generator/task login under the cve package
2. Add a new CVE scanner task generator and task type to run in the background, as well as tests for it
3. Move the CVE cache in its own package
4. Add a CVE scanner methods to check if an entry is present in the cache, and to retreive the results
5. Modify the FilterTags MetaDB method to not exit on first error
This is needed in order to pass all tags to the generator,
instead of the generator stopping at the first set of invalid data
6. Integrate the new scanning task generator with the existing zot code.
7. Fix an issue where the CVE scan results for multiarch images was not cached
8. Rewrite some of the older CVE tests to use the new image-utils test package
9. Use the CVE scanner as attribute of the controller instead of CveInfo.
Remove functionality of CVE DB update from CveInfo, it is now responsible,
as the name states, only for providing CVE information.
10. The logic to get maximum severity and cve count for image sumaries now uses only the scanner cache.
11. Removed the GetCVESummaryForImage method from CveInfo as it was only used in tests

Signed-off-by: Andrei Aaron <aaaron@luxoft.com>
2023-09-22 11:49:17 -07:00

207 lines
5.4 KiB
Go

package cveinfo
import (
"context"
"sync"
godigest "github.com/opencontainers/go-digest"
"zotregistry.io/zot/pkg/log"
mTypes "zotregistry.io/zot/pkg/meta/types"
reqCtx "zotregistry.io/zot/pkg/requestcontext"
"zotregistry.io/zot/pkg/scheduler"
)
func NewScanTaskGenerator(
metaDB mTypes.MetaDB,
scanner Scanner,
log log.Logger,
) scheduler.TaskGenerator {
return &scanTaskGenerator{
log: log,
metaDB: metaDB,
scanner: scanner,
lock: &sync.Mutex{},
scanErrors: map[string]error{},
scheduled: map[string]bool{},
done: false,
}
}
// scanTaskGenerator takes all manifests from repodb and runs the CVE scanner on them.
// If the scanner already has results cached for a specific manifests, or it cannot be
// scanned, the manifest will be skipped.
// If there are no manifests missing from the cache, the generator finishes.
type scanTaskGenerator struct {
log log.Logger
metaDB mTypes.MetaDB
scanner Scanner
lock *sync.Mutex
scanErrors map[string]error
scheduled map[string]bool
done bool
}
func (gen *scanTaskGenerator) getMatcherFunc() mTypes.FilterFunc {
return func(repoMeta mTypes.RepoMetadata, manifestMeta mTypes.ManifestMetadata) bool {
// Note this matcher will return information based on scan status of manifests
// An index scan aggregates results of manifest scans
// If at least one of its manifests can be scanned,
// the index and its tag will be returned by the caller function too
repoName := repoMeta.Name
manifestDigest := godigest.FromBytes(manifestMeta.ManifestBlob).String()
if gen.isScheduled(manifestDigest) {
// We skip this manifest as it has already scheduled
return false
}
if gen.hasError(manifestDigest) {
// We skip this manifest as it has already been scanned and errored
// This is to prevent the generator attempting to run a scan
// in a loop of the same image which would consistently fail
return false
}
if gen.scanner.IsResultCached(manifestDigest) {
// We skip this manifest, it was already scanned
return false
}
ok, err := gen.scanner.IsImageFormatScannable(repoName, manifestDigest)
if !ok || err != nil {
// We skip this manifest, we cannot scan it
return false
}
return true
}
}
func (gen *scanTaskGenerator) addError(digest string, err error) {
gen.lock.Lock()
defer gen.lock.Unlock()
gen.scanErrors[digest] = err
}
func (gen *scanTaskGenerator) hasError(digest string) bool {
gen.lock.Lock()
defer gen.lock.Unlock()
_, ok := gen.scanErrors[digest]
return ok
}
func (gen *scanTaskGenerator) setScheduled(digest string, isScheduled bool) {
gen.lock.Lock()
defer gen.lock.Unlock()
if _, ok := gen.scheduled[digest]; ok && !isScheduled {
delete(gen.scheduled, digest)
} else if isScheduled {
gen.scheduled[digest] = true
}
}
func (gen *scanTaskGenerator) isScheduled(digest string) bool {
gen.lock.Lock()
defer gen.lock.Unlock()
_, ok := gen.scheduled[digest]
return ok
}
func (gen *scanTaskGenerator) Next() (scheduler.Task, error) {
// metaRB requires us to use a context for authorization
userAc := reqCtx.NewUserAccessControl()
userAc.SetUsername("scheduler")
userAc.SetIsAdmin(true)
ctx := userAc.DeriveContext(context.Background())
// Obtain a list of repos with unscanned scannable manifests
// We may implement a method to return just 1 match at some point
reposMeta, _, _, err := gen.metaDB.FilterTags(ctx, gen.getMatcherFunc())
if err != nil {
// Do not crash the generator for potential repodb inconistencies
// as there may be scannable images not yet scanned
gen.log.Warn().Err(err).Msg("Scheduled CVE scan: error while obtaining repo metadata")
}
// no reposMeta are returned, all results are in already in cache
// or manifests cannot be scanned
if len(reposMeta) == 0 {
gen.log.Info().Msg("Scheduled CVE scan: finished for available images")
gen.done = true
return nil, nil
}
// Since reposMeta will always contain just unscanned images we can pick
// any repo and any tag out of the resulting matches
repoMeta := reposMeta[0]
var digest string
// Pick any tag
for _, descriptor := range repoMeta.Tags {
digest = descriptor.Digest
break
}
// Mark the digest as scheduled so it is skipped on next generator run
gen.setScheduled(digest, true)
return newScanTask(gen, repoMeta.Name, digest), nil
}
func (gen *scanTaskGenerator) IsDone() bool {
return gen.done
}
func (gen *scanTaskGenerator) IsReady() bool {
return true
}
func (gen *scanTaskGenerator) Reset() {
gen.lock.Lock()
defer gen.lock.Unlock()
gen.scheduled = map[string]bool{}
gen.scanErrors = map[string]error{}
gen.done = false
}
type scanTask struct {
generator *scanTaskGenerator
repo string
digest string
}
func newScanTask(generator *scanTaskGenerator, repo string, digest string) *scanTask {
return &scanTask{generator, repo, digest}
}
func (st *scanTask) DoWork(ctx context.Context) error {
// When work finished clean this entry from the generator
defer st.generator.setScheduled(st.digest, false)
image := st.repo + "@" + st.digest
// We cache the results internally in the scanner
// so we can discard the actual results for now
if _, err := st.generator.scanner.ScanImage(image); err != nil {
st.generator.log.Error().Err(err).Str("image", image).Msg("Scheduled CVE scan errored for image")
st.generator.addError(st.digest, err)
return err
}
st.generator.log.Debug().Str("image", image).Msg("Scheduled CVE scan completed successfully for image")
return nil
}