mirror of
https://github.com/project-zot/zot.git
synced 2024-12-23 22:27:35 -05:00
d325c8b5f4
PR (linter: upgrade linter version #405) triggered lint job which failed with many errors generated by various linters. Configurations were added to golangcilint.yaml and several refactorings were made in order to improve the results of the linter. maintidx linter disabled Signed-off-by: Alex Stan <alexandrustan96@yahoo.ro>
531 lines
13 KiB
Go
531 lines
13 KiB
Go
//go:build minimal
|
|
// +build minimal
|
|
|
|
// nolint: varnamelen,forcetypeassert
|
|
package monitoring
|
|
|
|
import (
|
|
"fmt"
|
|
"math"
|
|
"path"
|
|
"regexp"
|
|
"strconv"
|
|
"time"
|
|
|
|
"zotregistry.io/zot/pkg/log"
|
|
)
|
|
|
|
const (
|
|
metricsNamespace = "zot"
|
|
// Counters.
|
|
httpConnRequests = metricsNamespace + ".http.requests"
|
|
repoDownloads = metricsNamespace + ".repo.downloads"
|
|
repoUploads = metricsNamespace + ".repo.uploads"
|
|
// Gauge.
|
|
repoStorageBytes = metricsNamespace + ".repo.storage.bytes"
|
|
serverInfo = metricsNamespace + ".info"
|
|
// Summary.
|
|
httpRepoLatencySeconds = metricsNamespace + ".http.repo.latency.seconds"
|
|
// Histogram.
|
|
httpMethodLatencySeconds = metricsNamespace + ".http.method.latency.seconds"
|
|
storageLockLatencySeconds = metricsNamespace + ".storage.lock.latency.seconds"
|
|
|
|
metricsScrapeTimeout = 2 * time.Minute
|
|
metricsScrapeCheckInterval = 30 * time.Second
|
|
)
|
|
|
|
type metricServer struct {
|
|
enabled bool
|
|
lastCheck time.Time
|
|
reqChan chan interface{}
|
|
cache *MetricsInfo
|
|
cacheChan chan *MetricsInfo
|
|
bucketsF2S map[float64]string // float64 to string conversion of buckets label
|
|
log log.Logger
|
|
}
|
|
|
|
type MetricsInfo struct {
|
|
Counters []*CounterValue
|
|
Gauges []*GaugeValue
|
|
Summaries []*SummaryValue
|
|
Histograms []*HistogramValue
|
|
}
|
|
|
|
// CounterValue stores info about a metric that is incremented over time,
|
|
// such as the number of requests to an HTTP endpoint.
|
|
type CounterValue struct {
|
|
Name string
|
|
Count int
|
|
LabelNames []string
|
|
LabelValues []string
|
|
}
|
|
|
|
// GaugeValue stores one value that is updated as time goes on, such as
|
|
// the amount of memory allocated.
|
|
type GaugeValue struct {
|
|
Name string
|
|
Value float64
|
|
LabelNames []string
|
|
LabelValues []string
|
|
}
|
|
|
|
// SummaryValue stores info about a metric that is incremented over time,
|
|
// such as the number of requests to an HTTP endpoint.
|
|
type SummaryValue struct {
|
|
Name string
|
|
Count int
|
|
Sum float64
|
|
LabelNames []string
|
|
LabelValues []string
|
|
}
|
|
|
|
type HistogramValue struct {
|
|
Name string
|
|
Count int
|
|
Sum float64
|
|
Buckets map[string]int
|
|
LabelNames []string
|
|
LabelValues []string
|
|
}
|
|
|
|
func GetDefaultBuckets() []float64 {
|
|
return []float64{.05, .5, 1, 5, 30, 60, 600, math.MaxFloat64}
|
|
}
|
|
|
|
func GetStorageLatencyBuckets() []float64 {
|
|
return []float64{.001, .01, 0.1, 1, 5, 10, 15, 30, 60, math.MaxFloat64}
|
|
}
|
|
|
|
// implements the MetricServer interface.
|
|
func (ms *metricServer) SendMetric(metric interface{}) {
|
|
if ms.enabled {
|
|
ms.reqChan <- metric
|
|
}
|
|
}
|
|
|
|
func (ms *metricServer) ForceSendMetric(metric interface{}) {
|
|
ms.reqChan <- metric
|
|
}
|
|
|
|
func (ms *metricServer) ReceiveMetrics() interface{} {
|
|
if !ms.enabled {
|
|
ms.enabled = true
|
|
}
|
|
ms.cacheChan <- &MetricsInfo{}
|
|
|
|
return <-ms.cacheChan
|
|
}
|
|
|
|
func (ms *metricServer) IsEnabled() (b bool) {
|
|
// send a bool value on the request channel to avoid data race
|
|
ms.reqChan <- b
|
|
|
|
return (<-ms.reqChan).(bool)
|
|
}
|
|
|
|
func (ms *metricServer) Run() {
|
|
sendAfter := make(chan time.Duration, 1)
|
|
// periodically send a notification to the metric server to check if we can disable metrics
|
|
go func() {
|
|
for {
|
|
t := metricsScrapeCheckInterval
|
|
time.Sleep(t)
|
|
sendAfter <- t
|
|
}
|
|
}()
|
|
|
|
for {
|
|
select {
|
|
case <-ms.cacheChan:
|
|
ms.lastCheck = time.Now()
|
|
ms.cacheChan <- ms.cache
|
|
case m := <-ms.reqChan:
|
|
switch v := m.(type) {
|
|
case CounterValue:
|
|
cv := m.(CounterValue)
|
|
ms.CounterInc(&cv)
|
|
case GaugeValue:
|
|
gv := m.(GaugeValue)
|
|
ms.GaugeSet(&gv)
|
|
case SummaryValue:
|
|
sv := m.(SummaryValue)
|
|
ms.SummaryObserve(&sv)
|
|
case HistogramValue:
|
|
hv := m.(HistogramValue)
|
|
ms.HistogramObserve(&hv)
|
|
case bool:
|
|
ms.reqChan <- ms.enabled
|
|
default:
|
|
ms.log.Error().Msgf("unexpected type %T", v)
|
|
}
|
|
case <-sendAfter:
|
|
// Check if we didn't receive a metrics scrape in a while and if so,
|
|
// disable metrics (possible node exporter down/crashed)
|
|
if ms.enabled {
|
|
lastCheckInterval := time.Since(ms.lastCheck)
|
|
if lastCheckInterval > metricsScrapeTimeout {
|
|
ms.enabled = false
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func NewMetricsServer(enabled bool, log log.Logger) MetricServer {
|
|
mi := &MetricsInfo{
|
|
Counters: make([]*CounterValue, 0),
|
|
Gauges: make([]*GaugeValue, 0),
|
|
Summaries: make([]*SummaryValue, 0),
|
|
Histograms: make([]*HistogramValue, 0),
|
|
}
|
|
// convert to a map for returning easily the string corresponding to a bucket
|
|
bucketsFloat2String := map[float64]string{}
|
|
|
|
for _, fvalue := range append(GetDefaultBuckets(), GetStorageLatencyBuckets()...) {
|
|
if fvalue == math.MaxFloat64 {
|
|
bucketsFloat2String[fvalue] = "+Inf"
|
|
} else {
|
|
s := strconv.FormatFloat(fvalue, 'f', -1, 64)
|
|
bucketsFloat2String[fvalue] = s
|
|
}
|
|
}
|
|
|
|
ms := &metricServer{
|
|
enabled: enabled,
|
|
reqChan: make(chan interface{}),
|
|
cacheChan: make(chan *MetricsInfo),
|
|
cache: mi,
|
|
bucketsF2S: bucketsFloat2String,
|
|
log: log,
|
|
}
|
|
|
|
go ms.Run()
|
|
|
|
return ms
|
|
}
|
|
|
|
// contains a map with key=CounterName and value=CounterLabels.
|
|
func GetCounters() map[string][]string {
|
|
return map[string][]string{
|
|
httpConnRequests: {"method", "code"},
|
|
repoDownloads: {"repo"},
|
|
repoUploads: {"repo"},
|
|
}
|
|
}
|
|
|
|
func GetGauges() map[string][]string {
|
|
return map[string][]string{
|
|
repoStorageBytes: {"repo"},
|
|
serverInfo: {"commit", "binaryType", "goVersion", "version"},
|
|
}
|
|
}
|
|
|
|
func GetSummaries() map[string][]string {
|
|
return map[string][]string{
|
|
httpRepoLatencySeconds: {"repo"},
|
|
}
|
|
}
|
|
|
|
func GetHistograms() map[string][]string {
|
|
return map[string][]string{
|
|
httpMethodLatencySeconds: {"method"},
|
|
storageLockLatencySeconds: {"storageName", "lockType"},
|
|
}
|
|
}
|
|
|
|
// return true if a metric does not have any labels or if the label
|
|
// values for searched metric corresponds to the one in the cached slice.
|
|
func isMetricMatch(lValues, metricValues []string) bool {
|
|
if len(lValues) == len(metricValues) {
|
|
for i, v := range metricValues {
|
|
if v != lValues[i] {
|
|
return false
|
|
}
|
|
}
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
// returns {-1, false} in case metric was not found in the slice.
|
|
func findCounterValueIndex(metricSlice []*CounterValue, name string, labelValues []string) (int, bool) {
|
|
for i, m := range metricSlice {
|
|
if m.Name == name {
|
|
if isMetricMatch(labelValues, m.LabelValues) {
|
|
return i, true
|
|
}
|
|
}
|
|
}
|
|
|
|
return -1, false
|
|
}
|
|
|
|
// returns {-1, false} in case metric was not found in the slice.
|
|
func findGaugeValueIndex(metricSlice []*GaugeValue, name string, labelValues []string) (int, bool) {
|
|
for i, m := range metricSlice {
|
|
if m.Name == name {
|
|
if isMetricMatch(labelValues, m.LabelValues) {
|
|
return i, true
|
|
}
|
|
}
|
|
}
|
|
|
|
return -1, false
|
|
}
|
|
|
|
// returns {-1, false} in case metric was not found in the slice.
|
|
func findSummaryValueIndex(metricSlice []*SummaryValue, name string, labelValues []string) (int, bool) {
|
|
for i, m := range metricSlice {
|
|
if m.Name == name {
|
|
if isMetricMatch(labelValues, m.LabelValues) {
|
|
return i, true
|
|
}
|
|
}
|
|
}
|
|
|
|
return -1, false
|
|
}
|
|
|
|
// returns {-1, false} in case metric was not found in the slice.
|
|
func findHistogramValueIndex(metricSlice []*HistogramValue, name string, labelValues []string) (int, bool) {
|
|
for i, m := range metricSlice {
|
|
if m.Name == name {
|
|
if isMetricMatch(labelValues, m.LabelValues) {
|
|
return i, true
|
|
}
|
|
}
|
|
}
|
|
|
|
return -1, false
|
|
}
|
|
|
|
func (ms *metricServer) CounterInc(cv *CounterValue) {
|
|
labels, ok := GetCounters()[cv.Name] // known label names for the 'name' counter
|
|
|
|
err := sanityChecks(cv.Name, labels, ok, cv.LabelNames, cv.LabelValues)
|
|
if err != nil {
|
|
// The last thing we want is to panic/stop the server due to instrumentation
|
|
// thus log a message (should be detected during development of new metrics)
|
|
ms.log.Error().Err(err).Msg("Instrumentation error")
|
|
|
|
return
|
|
}
|
|
|
|
index, ok := findCounterValueIndex(ms.cache.Counters, cv.Name, cv.LabelValues)
|
|
if !ok {
|
|
// cv not found in cache: add it
|
|
cv.Count = 1
|
|
ms.cache.Counters = append(ms.cache.Counters, cv)
|
|
} else {
|
|
ms.cache.Counters[index].Count++
|
|
}
|
|
}
|
|
|
|
func (ms *metricServer) GaugeSet(gv *GaugeValue) {
|
|
labels, ok := GetGauges()[gv.Name] // known label names for the 'name' counter
|
|
|
|
err := sanityChecks(gv.Name, labels, ok, gv.LabelNames, gv.LabelValues)
|
|
if err != nil {
|
|
ms.log.Error().Err(err).Msg("Instrumentation error")
|
|
|
|
return
|
|
}
|
|
|
|
index, ok := findGaugeValueIndex(ms.cache.Gauges, gv.Name, gv.LabelValues)
|
|
if !ok {
|
|
// gv not found in cache: add it
|
|
ms.cache.Gauges = append(ms.cache.Gauges, gv)
|
|
} else {
|
|
ms.cache.Gauges[index].Value = gv.Value
|
|
}
|
|
}
|
|
|
|
func (ms *metricServer) SummaryObserve(sv *SummaryValue) {
|
|
labels, ok := GetSummaries()[sv.Name] // known label names for the 'name' summary
|
|
|
|
err := sanityChecks(sv.Name, labels, ok, sv.LabelNames, sv.LabelValues)
|
|
if err != nil {
|
|
ms.log.Error().Err(err).Msg("Instrumentation error")
|
|
|
|
return
|
|
}
|
|
|
|
index, ok := findSummaryValueIndex(ms.cache.Summaries, sv.Name, sv.LabelValues)
|
|
if !ok {
|
|
// The SampledValue not found: add it
|
|
sv.Count = 1 // First value, no need to increment
|
|
ms.cache.Summaries = append(ms.cache.Summaries, sv)
|
|
} else {
|
|
ms.cache.Summaries[index].Count++
|
|
ms.cache.Summaries[index].Sum += sv.Sum
|
|
}
|
|
}
|
|
|
|
func (ms *metricServer) HistogramObserve(hv *HistogramValue) {
|
|
labels, ok := GetHistograms()[hv.Name] // known label names for the 'name' counter
|
|
|
|
err := sanityChecks(hv.Name, labels, ok, hv.LabelNames, hv.LabelValues)
|
|
if err != nil {
|
|
ms.log.Error().Err(err).Msg("Instrumentation error")
|
|
|
|
return
|
|
}
|
|
|
|
index, ok := findHistogramValueIndex(ms.cache.Histograms, hv.Name, hv.LabelValues)
|
|
if !ok {
|
|
// The HistogramValue not found: add it
|
|
buckets := make(map[string]int)
|
|
|
|
for _, fvalue := range GetBuckets(hv.Name) {
|
|
if hv.Sum <= fvalue {
|
|
buckets[ms.bucketsF2S[fvalue]] = 1
|
|
} else {
|
|
buckets[ms.bucketsF2S[fvalue]] = 0
|
|
}
|
|
}
|
|
|
|
hv.Count = 1 // First value, no need to increment
|
|
hv.Buckets = buckets
|
|
ms.cache.Histograms = append(ms.cache.Histograms, hv)
|
|
} else {
|
|
cachedH := ms.cache.Histograms[index]
|
|
cachedH.Count++
|
|
cachedH.Sum += hv.Sum
|
|
for _, fvalue := range GetBuckets(hv.Name) {
|
|
if hv.Sum <= fvalue {
|
|
cachedH.Buckets[ms.bucketsF2S[fvalue]]++
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// nolint: goerr113
|
|
func sanityChecks(name string, knownLabels []string, found bool, labelNames, labelValues []string) error {
|
|
if !found {
|
|
return fmt.Errorf("metric %s: not found", name)
|
|
}
|
|
|
|
if len(labelNames) != len(labelValues) ||
|
|
len(labelNames) != len(knownLabels) {
|
|
return fmt.Errorf("metric %s: label size mismatch", name)
|
|
}
|
|
// The list of label names defined in init() for the counter must match what was provided in labelNames
|
|
for i, label := range labelNames {
|
|
if label != knownLabels[i] {
|
|
return fmt.Errorf("metric %s: label size mismatch", name)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func IncHTTPConnRequests(ms MetricServer, lvs ...string) {
|
|
req := CounterValue{
|
|
Name: httpConnRequests,
|
|
LabelNames: []string{"method", "code"},
|
|
LabelValues: lvs,
|
|
}
|
|
ms.SendMetric(req)
|
|
}
|
|
|
|
func ObserveHTTPRepoLatency(ms MetricServer, path string, latency time.Duration) {
|
|
if ms.(*metricServer).enabled {
|
|
var lvs []string
|
|
|
|
re := regexp.MustCompile(`\/v2\/(.*?)\/(blobs|tags|manifests)\/(.*)$`)
|
|
match := re.FindStringSubmatch(path)
|
|
|
|
if len(match) > 1 {
|
|
lvs = []string{match[1]}
|
|
} else {
|
|
lvs = []string{"N/A"}
|
|
}
|
|
|
|
sv := SummaryValue{
|
|
Name: httpRepoLatencySeconds,
|
|
Sum: latency.Seconds(),
|
|
LabelNames: []string{"repo"},
|
|
LabelValues: lvs,
|
|
}
|
|
ms.SendMetric(sv)
|
|
}
|
|
}
|
|
|
|
func ObserveHTTPMethodLatency(ms MetricServer, method string, latency time.Duration) {
|
|
h := HistogramValue{
|
|
Name: httpMethodLatencySeconds,
|
|
Sum: latency.Seconds(), // convenient temporary store for Histogram latency value
|
|
LabelNames: []string{"method"},
|
|
LabelValues: []string{method},
|
|
}
|
|
ms.SendMetric(h)
|
|
}
|
|
|
|
func IncDownloadCounter(ms MetricServer, repo string) {
|
|
dCounter := CounterValue{
|
|
Name: repoDownloads,
|
|
LabelNames: []string{"repo"},
|
|
LabelValues: []string{repo},
|
|
}
|
|
ms.SendMetric(dCounter)
|
|
}
|
|
|
|
func IncUploadCounter(ms MetricServer, repo string) {
|
|
uCounter := CounterValue{
|
|
Name: repoUploads,
|
|
LabelNames: []string{"repo"},
|
|
LabelValues: []string{repo},
|
|
}
|
|
ms.SendMetric(uCounter)
|
|
}
|
|
|
|
func SetStorageUsage(ms MetricServer, rootDir, repo string) {
|
|
dir := path.Join(rootDir, repo)
|
|
|
|
repoSize, err := getDirSize(dir)
|
|
if err != nil {
|
|
ms.(*metricServer).log.Error().Err(err).Msg("failed to set storage usage")
|
|
}
|
|
|
|
storage := GaugeValue{
|
|
Name: repoStorageBytes,
|
|
Value: float64(repoSize),
|
|
LabelNames: []string{"repo"},
|
|
LabelValues: []string{repo},
|
|
}
|
|
ms.ForceSendMetric(storage)
|
|
}
|
|
|
|
func SetServerInfo(ms MetricServer, lvs ...string) {
|
|
info := GaugeValue{
|
|
Name: serverInfo,
|
|
Value: 0,
|
|
LabelNames: []string{"commit", "binaryType", "goVersion", "version"},
|
|
LabelValues: lvs,
|
|
}
|
|
// This metric is set once at zot startup (set it regardless of metrics enabled)
|
|
ms.ForceSendMetric(info)
|
|
}
|
|
|
|
func ObserveStorageLockLatency(ms MetricServer, latency time.Duration, storageName, lockType string) {
|
|
h := HistogramValue{
|
|
Name: storageLockLatencySeconds,
|
|
Sum: latency.Seconds(), // convenient temporary store for Histogram latency value
|
|
LabelNames: []string{"storageName", "lockType"},
|
|
LabelValues: []string{storageName, lockType},
|
|
}
|
|
ms.SendMetric(h)
|
|
}
|
|
|
|
func GetMaxIdleScrapeInterval() time.Duration {
|
|
return metricsScrapeTimeout + metricsScrapeCheckInterval
|
|
}
|
|
|
|
func GetBuckets(metricName string) []float64 {
|
|
switch metricName {
|
|
case storageLockLatencySeconds:
|
|
return GetStorageLatencyBuckets()
|
|
default:
|
|
return GetDefaultBuckets()
|
|
}
|
|
}
|