From 33f9fb815097f2c179f31892a940e5e0231bb435 Mon Sep 17 00:00:00 2001
From: Brecht Van Lommel <brecht@blender.org>
Date: Thu, 4 Jul 2024 20:57:11 +0200
Subject: [PATCH] Fix slow patch checking with commits that add or remove many
 files (#31548)

Running git update-index for every individual file is slow, so add and
remove everything with a single git command.

When such a big commit lands in the default branch, it could cause PR
creation and patch checking for all open PRs to be slow, or time out
entirely. For example, a commit that removes 1383 files was measured to
take more than 60 seconds and timed out. With this change checking took
about a second.

This is related to #27967, though this will not help with commits that
change many lines in few files.

(cherry picked from commit b88e5fc72d99e9d4a0aa9c13f70e0a9e967fe057)
---
 modules/git/repo_index.go | 35 +++++++++++++++++++++++++++--------
 services/pull/patch.go    | 29 +++++++++++++++++++++--------
 2 files changed, 48 insertions(+), 16 deletions(-)

diff --git a/modules/git/repo_index.go b/modules/git/repo_index.go
index 6aaab242c1..8390570098 100644
--- a/modules/git/repo_index.go
+++ b/modules/git/repo_index.go
@@ -104,11 +104,8 @@ func (repo *Repository) RemoveFilesFromIndex(filenames ...string) error {
 	buffer := new(bytes.Buffer)
 	for _, file := range filenames {
 		if file != "" {
-			buffer.WriteString("0 ")
-			buffer.WriteString(objectFormat.EmptyObjectID().String())
-			buffer.WriteByte('\t')
-			buffer.WriteString(file)
-			buffer.WriteByte('\000')
+			// using format: mode SP type SP sha1 TAB path
+			buffer.WriteString("0 blob " + objectFormat.EmptyObjectID().String() + "\t" + file + "\000")
 		}
 	}
 	return cmd.Run(&RunOpts{
@@ -119,11 +116,33 @@ func (repo *Repository) RemoveFilesFromIndex(filenames ...string) error {
 	})
 }
 
+type IndexObjectInfo struct {
+	Mode     string
+	Object   ObjectID
+	Filename string
+}
+
+// AddObjectsToIndex adds the provided object hashes to the index at the provided filenames
+func (repo *Repository) AddObjectsToIndex(objects ...IndexObjectInfo) error {
+	cmd := NewCommand(repo.Ctx, "update-index", "--add", "--replace", "-z", "--index-info")
+	stdout := new(bytes.Buffer)
+	stderr := new(bytes.Buffer)
+	buffer := new(bytes.Buffer)
+	for _, object := range objects {
+		// using format: mode SP type SP sha1 TAB path
+		buffer.WriteString(object.Mode + " blob " + object.Object.String() + "\t" + object.Filename + "\000")
+	}
+	return cmd.Run(&RunOpts{
+		Dir:    repo.Path,
+		Stdin:  bytes.NewReader(buffer.Bytes()),
+		Stdout: stdout,
+		Stderr: stderr,
+	})
+}
+
 // AddObjectToIndex adds the provided object hash to the index at the provided filename
 func (repo *Repository) AddObjectToIndex(mode string, object ObjectID, filename string) error {
-	cmd := NewCommand(repo.Ctx, "update-index", "--add", "--replace", "--cacheinfo").AddDynamicArguments(mode, object.String(), filename)
-	_, _, err := cmd.RunStdString(&RunOpts{Dir: repo.Path})
-	return err
+	return repo.AddObjectsToIndex(IndexObjectInfo{Mode: mode, Object: object, Filename: filename})
 }
 
 // WriteTree writes the current index as a tree to the object db and returns its hash
diff --git a/services/pull/patch.go b/services/pull/patch.go
index 12b79a0625..e90b4bdbbe 100644
--- a/services/pull/patch.go
+++ b/services/pull/patch.go
@@ -128,7 +128,7 @@ func (e *errMergeConflict) Error() string {
 	return fmt.Sprintf("conflict detected at: %s", e.filename)
 }
 
-func attemptMerge(ctx context.Context, file *unmergedFile, tmpBasePath string, gitRepo *git.Repository) error {
+func attemptMerge(ctx context.Context, file *unmergedFile, tmpBasePath string, filesToRemove *[]string, filesToAdd *[]git.IndexObjectInfo) error {
 	log.Trace("Attempt to merge:\n%v", file)
 
 	switch {
@@ -142,14 +142,13 @@ func attemptMerge(ctx context.Context, file *unmergedFile, tmpBasePath string, g
 		}
 
 		// Not a genuine conflict and we can simply remove the file from the index
-		return gitRepo.RemoveFilesFromIndex(file.stage1.path)
+		*filesToRemove = append(*filesToRemove, file.stage1.path)
+		return nil
 	case file.stage1 == nil && file.stage2 != nil && (file.stage3 == nil || file.stage2.SameAs(file.stage3)):
 		// 2. Added in ours but not in theirs or identical in both
 		//
 		// Not a genuine conflict just add to the index
-		if err := gitRepo.AddObjectToIndex(file.stage2.mode, git.MustIDFromString(file.stage2.sha), file.stage2.path); err != nil {
-			return err
-		}
+		*filesToAdd = append(*filesToAdd, git.IndexObjectInfo{Mode: file.stage2.mode, Object: git.MustIDFromString(file.stage2.sha), Filename: file.stage2.path})
 		return nil
 	case file.stage1 == nil && file.stage2 != nil && file.stage3 != nil && file.stage2.sha == file.stage3.sha && file.stage2.mode != file.stage3.mode:
 		// 3. Added in both with the same sha but the modes are different
@@ -160,7 +159,8 @@ func attemptMerge(ctx context.Context, file *unmergedFile, tmpBasePath string, g
 		// 4. Added in theirs but not ours:
 		//
 		// Not a genuine conflict just add to the index
-		return gitRepo.AddObjectToIndex(file.stage3.mode, git.MustIDFromString(file.stage3.sha), file.stage3.path)
+		*filesToAdd = append(*filesToAdd, git.IndexObjectInfo{Mode: file.stage3.mode, Object: git.MustIDFromString(file.stage3.sha), Filename: file.stage3.path})
+		return nil
 	case file.stage1 == nil:
 		// 5. Created by new in both
 		//
@@ -221,7 +221,8 @@ func attemptMerge(ctx context.Context, file *unmergedFile, tmpBasePath string, g
 			return err
 		}
 		hash = strings.TrimSpace(hash)
-		return gitRepo.AddObjectToIndex(file.stage2.mode, git.MustIDFromString(hash), file.stage2.path)
+		*filesToAdd = append(*filesToAdd, git.IndexObjectInfo{Mode: file.stage2.mode, Object: git.MustIDFromString(hash), Filename: file.stage2.path})
+		return nil
 	default:
 		if file.stage1 != nil {
 			return &errMergeConflict{file.stage1.path}
@@ -245,6 +246,9 @@ func AttemptThreeWayMerge(ctx context.Context, gitPath string, gitRepo *git.Repo
 		return false, nil, fmt.Errorf("unable to run read-tree -m! Error: %w", err)
 	}
 
+	var filesToRemove []string
+	var filesToAdd []git.IndexObjectInfo
+
 	// Then we use git ls-files -u to list the unmerged files and collate the triples in unmergedfiles
 	unmerged := make(chan *unmergedFile)
 	go unmergedFiles(ctx, gitPath, unmerged)
@@ -270,7 +274,7 @@ func AttemptThreeWayMerge(ctx context.Context, gitPath string, gitRepo *git.Repo
 		}
 
 		// OK now we have the unmerged file triplet attempt to merge it
-		if err := attemptMerge(ctx, file, gitPath, gitRepo); err != nil {
+		if err := attemptMerge(ctx, file, gitPath, &filesToRemove, &filesToAdd); err != nil {
 			if conflictErr, ok := err.(*errMergeConflict); ok {
 				log.Trace("Conflict: %s in %s", conflictErr.filename, description)
 				conflict = true
@@ -283,6 +287,15 @@ func AttemptThreeWayMerge(ctx context.Context, gitPath string, gitRepo *git.Repo
 			return false, nil, err
 		}
 	}
+
+	// Add and remove files in one command, as this is slow with many files otherwise
+	if err := gitRepo.RemoveFilesFromIndex(filesToRemove...); err != nil {
+		return false, nil, err
+	}
+	if err := gitRepo.AddObjectsToIndex(filesToAdd...); err != nil {
+		return false, nil, err
+	}
+
 	return conflict, conflictedFiles, nil
 }