0
Fork 0
mirror of https://codeberg.org/forgejo/forgejo.git synced 2025-01-18 12:22:36 -05:00
forgejo/services/mirror/mirror_push.go
wxiaoguang 6bc3079c00
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)

## Review without space diff

https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1

## Purpose of this PR

1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command

## The main idea of this PR

* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
    * Before: `AddArguments("-m").AddDynamicArguments(message)`
    * After: `AddOptionValues("-m", message)`
    * -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`

## FAQ

### Why these changes were not done in #21535 ?

#21535 is mainly a search&replace, it did its best to not change too
much logic.

Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.


### The naming of `AddOptionXxx`

According to git's manual, the `--xxx` part is called `option`.

### How can it guarantee that `internal.CmdArg` won't be not misused?

Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.

And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.

### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?

Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.

### Why there was a `CmdArgCheck` and why it's removed?

At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.


### Why many codes for `signArg == ""` is deleted?

Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.

---------

Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 10:30:43 +08:00

255 lines
7 KiB
Go

// Copyright 2021 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package mirror
import (
"context"
"errors"
"fmt"
"io"
"regexp"
"strings"
"time"
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/lfs"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/process"
"code.gitea.io/gitea/modules/repository"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/util"
)
var stripExitStatus = regexp.MustCompile(`exit status \d+ - `)
// AddPushMirrorRemote registers the push mirror remote.
func AddPushMirrorRemote(ctx context.Context, m *repo_model.PushMirror, addr string) error {
addRemoteAndConfig := func(addr, path string) error {
cmd := git.NewCommand(ctx, "remote", "add", "--mirror=push").AddDynamicArguments(m.RemoteName, addr)
if strings.Contains(addr, "://") && strings.Contains(addr, "@") {
cmd.SetDescription(fmt.Sprintf("remote add %s --mirror=push %s [repo_path: %s]", m.RemoteName, util.SanitizeCredentialURLs(addr), path))
} else {
cmd.SetDescription(fmt.Sprintf("remote add %s --mirror=push %s [repo_path: %s]", m.RemoteName, addr, path))
}
if _, _, err := cmd.RunStdString(&git.RunOpts{Dir: path}); err != nil {
return err
}
if _, _, err := git.NewCommand(ctx, "config", "--add").AddDynamicArguments("remote."+m.RemoteName+".push", "+refs/heads/*:refs/heads/*").RunStdString(&git.RunOpts{Dir: path}); err != nil {
return err
}
if _, _, err := git.NewCommand(ctx, "config", "--add").AddDynamicArguments("remote."+m.RemoteName+".push", "+refs/tags/*:refs/tags/*").RunStdString(&git.RunOpts{Dir: path}); err != nil {
return err
}
return nil
}
if err := addRemoteAndConfig(addr, m.Repo.RepoPath()); err != nil {
return err
}
if m.Repo.HasWiki() {
wikiRemoteURL := repository.WikiRemoteURL(ctx, addr)
if len(wikiRemoteURL) > 0 {
if err := addRemoteAndConfig(wikiRemoteURL, m.Repo.WikiPath()); err != nil {
return err
}
}
}
return nil
}
// RemovePushMirrorRemote removes the push mirror remote.
func RemovePushMirrorRemote(ctx context.Context, m *repo_model.PushMirror) error {
cmd := git.NewCommand(ctx, "remote", "rm").AddDynamicArguments(m.RemoteName)
_ = m.GetRepository()
if _, _, err := cmd.RunStdString(&git.RunOpts{Dir: m.Repo.RepoPath()}); err != nil {
return err
}
if m.Repo.HasWiki() {
if _, _, err := cmd.RunStdString(&git.RunOpts{Dir: m.Repo.WikiPath()}); err != nil {
// The wiki remote may not exist
log.Warn("Wiki Remote[%d] could not be removed: %v", m.ID, err)
}
}
return nil
}
// SyncPushMirror starts the sync of the push mirror and schedules the next run.
func SyncPushMirror(ctx context.Context, mirrorID int64) bool {
log.Trace("SyncPushMirror [mirror: %d]", mirrorID)
defer func() {
err := recover()
if err == nil {
return
}
// There was a panic whilst syncPushMirror...
log.Error("PANIC whilst syncPushMirror[%d] Panic: %v\nStacktrace: %s", mirrorID, err, log.Stack(2))
}()
m, err := repo_model.GetPushMirror(ctx, repo_model.PushMirrorOptions{ID: mirrorID})
if err != nil {
log.Error("GetPushMirrorByID [%d]: %v", mirrorID, err)
return false
}
_ = m.GetRepository()
m.LastError = ""
ctx, _, finished := process.GetManager().AddContext(ctx, fmt.Sprintf("Syncing PushMirror %s/%s to %s", m.Repo.OwnerName, m.Repo.Name, m.RemoteName))
defer finished()
log.Trace("SyncPushMirror [mirror: %d][repo: %-v]: Running Sync", m.ID, m.Repo)
err = runPushSync(ctx, m)
if err != nil {
log.Error("SyncPushMirror [mirror: %d][repo: %-v]: %v", m.ID, m.Repo, err)
m.LastError = stripExitStatus.ReplaceAllLiteralString(err.Error(), "")
}
m.LastUpdateUnix = timeutil.TimeStampNow()
if err := repo_model.UpdatePushMirror(ctx, m); err != nil {
log.Error("UpdatePushMirror [%d]: %v", m.ID, err)
return false
}
log.Trace("SyncPushMirror [mirror: %d][repo: %-v]: Finished", m.ID, m.Repo)
return err == nil
}
func runPushSync(ctx context.Context, m *repo_model.PushMirror) error {
timeout := time.Duration(setting.Git.Timeout.Mirror) * time.Second
performPush := func(path string) error {
remoteURL, err := git.GetRemoteURL(ctx, path, m.RemoteName)
if err != nil {
log.Error("GetRemoteAddress(%s) Error %v", path, err)
return errors.New("Unexpected error")
}
if setting.LFS.StartServer {
log.Trace("SyncMirrors [repo: %-v]: syncing LFS objects...", m.Repo)
gitRepo, err := git.OpenRepository(ctx, path)
if err != nil {
log.Error("OpenRepository: %v", err)
return errors.New("Unexpected error")
}
defer gitRepo.Close()
endpoint := lfs.DetermineEndpoint(remoteURL.String(), "")
lfsClient := lfs.NewClient(endpoint, nil)
if err := pushAllLFSObjects(ctx, gitRepo, lfsClient); err != nil {
return util.SanitizeErrorCredentialURLs(err)
}
}
log.Trace("Pushing %s mirror[%d] remote %s", path, m.ID, m.RemoteName)
if err := git.Push(ctx, path, git.PushOptions{
Remote: m.RemoteName,
Force: true,
Mirror: true,
Timeout: timeout,
}); err != nil {
log.Error("Error pushing %s mirror[%d] remote %s: %v", path, m.ID, m.RemoteName, err)
return util.SanitizeErrorCredentialURLs(err)
}
return nil
}
err := performPush(m.Repo.RepoPath())
if err != nil {
return err
}
if m.Repo.HasWiki() {
wikiPath := m.Repo.WikiPath()
_, err := git.GetRemoteAddress(ctx, wikiPath, m.RemoteName)
if err == nil {
err := performPush(wikiPath)
if err != nil {
return err
}
} else {
log.Trace("Skipping wiki: No remote configured")
}
}
return nil
}
func pushAllLFSObjects(ctx context.Context, gitRepo *git.Repository, lfsClient lfs.Client) error {
contentStore := lfs.NewContentStore()
pointerChan := make(chan lfs.PointerBlob)
errChan := make(chan error, 1)
go lfs.SearchPointerBlobs(ctx, gitRepo, pointerChan, errChan)
uploadObjects := func(pointers []lfs.Pointer) error {
err := lfsClient.Upload(ctx, pointers, func(p lfs.Pointer, objectError error) (io.ReadCloser, error) {
if objectError != nil {
return nil, objectError
}
content, err := contentStore.Get(p)
if err != nil {
log.Error("Error reading LFS object %v: %v", p, err)
}
return content, err
})
if err != nil {
select {
case <-ctx.Done():
return nil
default:
}
}
return err
}
var batch []lfs.Pointer
for pointerBlob := range pointerChan {
exists, err := contentStore.Exists(pointerBlob.Pointer)
if err != nil {
log.Error("Error checking if LFS object %v exists: %v", pointerBlob.Pointer, err)
return err
}
if !exists {
log.Trace("Skipping missing LFS object %v", pointerBlob.Pointer)
continue
}
batch = append(batch, pointerBlob.Pointer)
if len(batch) >= lfsClient.BatchSize() {
if err := uploadObjects(batch); err != nil {
return err
}
batch = nil
}
}
if len(batch) > 0 {
if err := uploadObjects(batch); err != nil {
return err
}
}
err, has := <-errChan
if has {
log.Error("Error enumerating LFS objects for repository: %v", err)
return err
}
return nil
}