0
Fork 0
mirror of https://codeberg.org/forgejo/forgejo.git synced 2024-12-28 18:34:10 -05:00
forgejo/services/repository/contributors_graph.go
Gusted d60152587c [BUG] Store JSON in contributors commit cache
- The code that gets contributor stats tried to store an
`map[string]*ContributorData` type in the cache, this works for the
memory cache but not for other caches such as Redis.
- The cache implementation for Redis would convert this map via
`fmt.Sprintf` to an string, which would simply print the pointer and not
the value of the pointer. Storing pointers is a no-go as this will get
GC-ed eventually within a few minutes. Therefore store everything with
json, that does properly store the value of the pointers.
- Adds unit test that verifies JSON is being used.
- Resolves https://codeberg.org/forgejo/forgejo/issues/3158

(cherry picked from commit 4c8b67c4b2)
2024-04-11 12:06:19 +00:00

328 lines
9.7 KiB
Go

// Copyright 2023 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package repository
import (
"bufio"
"context"
"errors"
"fmt"
"os"
"strconv"
"strings"
"sync"
"time"
"code.gitea.io/gitea/models/avatars"
repo_model "code.gitea.io/gitea/models/repo"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/gitrepo"
"code.gitea.io/gitea/modules/graceful"
"code.gitea.io/gitea/modules/json"
"code.gitea.io/gitea/modules/log"
api "code.gitea.io/gitea/modules/structs"
"gitea.com/go-chi/cache"
)
const (
contributorStatsCacheKey = "GetContributorStats/%s/%s"
contributorStatsCacheTimeout int64 = 60 * 10
)
var (
ErrAwaitGeneration = errors.New("generation took longer than ")
awaitGenerationTime = time.Second * 5
generateLock = sync.Map{}
)
type WeekData struct {
Week int64 `json:"week"` // Starting day of the week as Unix timestamp
Additions int `json:"additions"` // Number of additions in that week
Deletions int `json:"deletions"` // Number of deletions in that week
Commits int `json:"commits"` // Number of commits in that week
}
// ContributorData represents statistical git commit count data
type ContributorData struct {
Name string `json:"name"` // Display name of the contributor
Login string `json:"login"` // Login name of the contributor in case it exists
AvatarLink string `json:"avatar_link"`
HomeLink string `json:"home_link"`
TotalCommits int64 `json:"total_commits"`
Weeks map[int64]*WeekData `json:"weeks"`
}
// ExtendedCommitStats contains information for commit stats with author data
type ExtendedCommitStats struct {
Author *api.CommitUser `json:"author"`
Stats *api.CommitStats `json:"stats"`
}
const layout = time.DateOnly
func findLastSundayBeforeDate(dateStr string) (string, error) {
date, err := time.Parse(layout, dateStr)
if err != nil {
return "", err
}
weekday := date.Weekday()
daysToSubtract := int(weekday) - int(time.Sunday)
if daysToSubtract < 0 {
daysToSubtract += 7
}
lastSunday := date.AddDate(0, 0, -daysToSubtract)
return lastSunday.Format(layout), nil
}
// GetContributorStats returns contributors stats for git commits for given revision or default branch
func GetContributorStats(ctx context.Context, cache cache.Cache, repo *repo_model.Repository, revision string) (map[string]*ContributorData, error) {
// as GetContributorStats is resource intensive we cache the result
cacheKey := fmt.Sprintf(contributorStatsCacheKey, repo.FullName(), revision)
if !cache.IsExist(cacheKey) {
genReady := make(chan struct{})
// dont start multible async generations
_, run := generateLock.Load(cacheKey)
if run {
return nil, ErrAwaitGeneration
}
generateLock.Store(cacheKey, struct{}{})
// run generation async
go generateContributorStats(genReady, cache, cacheKey, repo, revision)
select {
case <-time.After(awaitGenerationTime):
return nil, ErrAwaitGeneration
case <-genReady:
// we got generation ready before timeout
break
}
}
// TODO: renew timeout of cache cache.UpdateTimeout(cacheKey, contributorStatsCacheTimeout)
switch v := cache.Get(cacheKey).(type) {
case error:
return nil, v
case string:
var cachedStats map[string]*ContributorData
return cachedStats, json.Unmarshal([]byte(v), &cachedStats)
default:
return nil, fmt.Errorf("unexpected type in cache detected")
}
}
// getExtendedCommitStats return the list of *ExtendedCommitStats for the given revision
func getExtendedCommitStats(repo *git.Repository, revision string /*, limit int */) ([]*ExtendedCommitStats, error) {
baseCommit, err := repo.GetCommit(revision)
if err != nil {
return nil, err
}
stdoutReader, stdoutWriter, err := os.Pipe()
if err != nil {
return nil, err
}
defer func() {
_ = stdoutReader.Close()
_ = stdoutWriter.Close()
}()
gitCmd := git.NewCommand(repo.Ctx, "log", "--shortstat", "--no-merges", "--pretty=format:---%n%aN%n%aE%n%as", "--reverse")
// AddOptionFormat("--max-count=%d", limit)
gitCmd.AddDynamicArguments(baseCommit.ID.String())
var extendedCommitStats []*ExtendedCommitStats
stderr := new(strings.Builder)
err = gitCmd.Run(&git.RunOpts{
Dir: repo.Path,
Stdout: stdoutWriter,
Stderr: stderr,
PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error {
_ = stdoutWriter.Close()
scanner := bufio.NewScanner(stdoutReader)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line != "---" {
continue
}
scanner.Scan()
authorName := strings.TrimSpace(scanner.Text())
scanner.Scan()
authorEmail := strings.TrimSpace(scanner.Text())
scanner.Scan()
date := strings.TrimSpace(scanner.Text())
scanner.Scan()
stats := strings.TrimSpace(scanner.Text())
if authorName == "" || authorEmail == "" || date == "" || stats == "" {
// FIXME: find a better way to parse the output so that we will handle this properly
log.Warn("Something is wrong with git log output, skipping...")
log.Warn("authorName: %s, authorEmail: %s, date: %s, stats: %s", authorName, authorEmail, date, stats)
continue
}
// 1 file changed, 1 insertion(+), 1 deletion(-)
fields := strings.Split(stats, ",")
commitStats := api.CommitStats{}
for _, field := range fields[1:] {
parts := strings.Split(strings.TrimSpace(field), " ")
value, contributionType := parts[0], parts[1]
amount, _ := strconv.Atoi(value)
if strings.HasPrefix(contributionType, "insertion") {
commitStats.Additions = amount
} else {
commitStats.Deletions = amount
}
}
commitStats.Total = commitStats.Additions + commitStats.Deletions
scanner.Text() // empty line at the end
res := &ExtendedCommitStats{
Author: &api.CommitUser{
Identity: api.Identity{
Name: authorName,
Email: authorEmail,
},
Date: date,
},
Stats: &commitStats,
}
extendedCommitStats = append(extendedCommitStats, res)
}
_ = stdoutReader.Close()
return nil
},
})
if err != nil {
return nil, fmt.Errorf("Failed to get ContributorsCommitStats for repository.\nError: %w\nStderr: %s", err, stderr)
}
return extendedCommitStats, nil
}
func generateContributorStats(genDone chan struct{}, cache cache.Cache, cacheKey string, repo *repo_model.Repository, revision string) {
ctx := graceful.GetManager().HammerContext()
gitRepo, closer, err := gitrepo.RepositoryFromContextOrOpen(ctx, repo)
if err != nil {
err := fmt.Errorf("OpenRepository: %w", err)
_ = cache.Put(cacheKey, err, contributorStatsCacheTimeout)
return
}
defer closer.Close()
if len(revision) == 0 {
revision = repo.DefaultBranch
}
extendedCommitStats, err := getExtendedCommitStats(gitRepo, revision)
if err != nil {
err := fmt.Errorf("ExtendedCommitStats: %w", err)
_ = cache.Put(cacheKey, err, contributorStatsCacheTimeout)
return
}
if len(extendedCommitStats) == 0 {
err := fmt.Errorf("no commit stats returned for revision '%s'", revision)
_ = cache.Put(cacheKey, err, contributorStatsCacheTimeout)
return
}
layout := time.DateOnly
unknownUserAvatarLink := user_model.NewGhostUser().AvatarLinkWithSize(ctx, 0)
contributorsCommitStats := make(map[string]*ContributorData)
contributorsCommitStats["total"] = &ContributorData{
Name: "Total",
Weeks: make(map[int64]*WeekData),
}
total := contributorsCommitStats["total"]
for _, v := range extendedCommitStats {
userEmail := v.Author.Email
if len(userEmail) == 0 {
continue
}
u, _ := user_model.GetUserByEmail(ctx, userEmail)
if u != nil {
// update userEmail with user's primary email address so
// that different mail addresses will linked to same account
userEmail = u.GetEmail()
}
// duplicated logic
if _, ok := contributorsCommitStats[userEmail]; !ok {
if u == nil {
avatarLink := avatars.GenerateEmailAvatarFastLink(ctx, userEmail, 0)
if avatarLink == "" {
avatarLink = unknownUserAvatarLink
}
contributorsCommitStats[userEmail] = &ContributorData{
Name: v.Author.Name,
AvatarLink: avatarLink,
Weeks: make(map[int64]*WeekData),
}
} else {
contributorsCommitStats[userEmail] = &ContributorData{
Name: u.DisplayName(),
Login: u.LowerName,
AvatarLink: u.AvatarLinkWithSize(ctx, 0),
HomeLink: u.HomeLink(),
Weeks: make(map[int64]*WeekData),
}
}
}
// Update user statistics
user := contributorsCommitStats[userEmail]
startingOfWeek, _ := findLastSundayBeforeDate(v.Author.Date)
val, _ := time.Parse(layout, startingOfWeek)
week := val.UnixMilli()
if user.Weeks[week] == nil {
user.Weeks[week] = &WeekData{
Additions: 0,
Deletions: 0,
Commits: 0,
Week: week,
}
}
if total.Weeks[week] == nil {
total.Weeks[week] = &WeekData{
Additions: 0,
Deletions: 0,
Commits: 0,
Week: week,
}
}
user.Weeks[week].Additions += v.Stats.Additions
user.Weeks[week].Deletions += v.Stats.Deletions
user.Weeks[week].Commits++
user.TotalCommits++
// Update overall statistics
total.Weeks[week].Additions += v.Stats.Additions
total.Weeks[week].Deletions += v.Stats.Deletions
total.Weeks[week].Commits++
total.TotalCommits++
}
data, err := json.Marshal(contributorsCommitStats)
if err != nil {
err := fmt.Errorf("couldn't marshal the data: %w", err)
_ = cache.Put(cacheKey, err, contributorStatsCacheTimeout)
return
}
// Store the data as an string, to make it uniform what data type is returned
// from caches.
_ = cache.Put(cacheKey, string(data), contributorStatsCacheTimeout)
generateLock.Delete(cacheKey)
if genDone != nil {
genDone <- struct{}{}
}
}