Skip to content

Commit

Permalink
feat: Skip large diffs when generating context for the LLM
Browse files Browse the repository at this point in the history
This skips diffs larger than 1000 characters when generating input
for @WELCOME and @review. Large diffs are replaced with a message,
for example

    diff --git a/file2.txt b/file2.txt
    [Change of size 2078]

Fixes #2173
  • Loading branch information
dividedmind committed Jan 23, 2025
1 parent 355d931 commit abaf705
Show file tree
Hide file tree
Showing 4 changed files with 187 additions and 5 deletions.
24 changes: 24 additions & 0 deletions packages/cli/src/lib/git.ts
Original file line number Diff line number Diff line change
Expand Up @@ -233,3 +233,27 @@ export async function getDiffLog(
cwd,
});
}

/**
* Examine all the diffs in a git diff output and replace long ones with a simple
* message indicating that there is a change and its size.
* @param patchset - The patchset to process.
* @param [maxDiffLength=1000] - The maximum length of a diff to include in the patchset.
* @returns The processed patchset.
*/
export function processPatchset(patchset: string, maxDiffLength = 1000): string {
const parts = patchset.split(/^(?=diff|commit)/m);
return parts
.map((part) => {
if (part.startsWith('diff')) {
const lines = part.split('\n');
const header = lines[0];
const body = lines.slice(1).join('\n');
if (body.length > maxDiffLength) {
return `${header}\n[Change of size ${body.trimEnd().length}]\n`;
}
}
return part;
})
.join('');
}
8 changes: 6 additions & 2 deletions packages/cli/src/rpc/explain/review.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { parseOptions, REVIEW_DIFF_LOCATION, UserContext } from '@appland/navie';

import { getDiffLog, getWorkingDiff, processPatchset } from '../../lib/git';
import configuration from '../configuration';
import { getDiffLog, getWorkingDiff } from '../../lib/git';

/**
* This function is responsible for transforming user context to include diff content when the
Expand All @@ -27,7 +28,10 @@ export default async function handleReview(
{
type: 'code-snippet',
location: REVIEW_DIFF_LOCATION, // eslint-disable-line @typescript-eslint/no-unsafe-assignment
content: diffContent.filter(Boolean).join('\n\n'),
content: diffContent
.filter(Boolean)
.map((diff) => processPatchset(diff))
.join('\n\n'),
},
],
};
Expand Down
4 changes: 2 additions & 2 deletions packages/cli/src/rpc/navie/welcome-suggestion.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import configuration from '../configuration';
import { getDiffLog, getWorkingDiff } from '../../lib/git';
import INavie, { INavieProvider } from '../explain/navie/inavie';
import { UserContext } from '@appland/navie';
import isCustomWelcomeMessageEnabled from './isCustomWelcomeMessageEnabled';
import { NavieRpc } from '@appland/rpc';
import { getDiffLog, getWorkingDiff, processPatchset } from '../../lib/git';

interface WelcomeSuggestion {
activity: string;
Expand Down Expand Up @@ -40,7 +40,7 @@ async function getChangeDiffs(projectDirectories: string[]): Promise<string[]> {
);
return diffs
.filter((result): result is PromiseFulfilledResult<string> => result.status === 'fulfilled')
.map((result) => result.value);
.map((result) => processPatchset(result.value));
}

export async function getWelcomeSuggestion(
Expand Down
156 changes: 155 additions & 1 deletion packages/cli/tests/unit/lib/git.spec.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import { mkdir, writeFile } from 'fs/promises';
import { join } from 'path';
import tmp from 'tmp-promise';
import { findRepository } from '../../../src/lib/git';

import { findRepository, processPatchset } from '../../../src/lib/git';

describe('git.ts', () => {
test.each([
Expand Down Expand Up @@ -38,6 +39,159 @@ describe('git.ts', () => {
{ unsafeCleanup: true }
);
});

describe('processPatchset', () => {
it('should handle patchset with small diffs correctly', () => {
const patchset = `diff --git a/file1.txt b/file1.txt
index 83db48f..f735c4e 100644
--- a/file1.txt
+++ b/file1.txt
@@ -1,3 +1,3 @@
-Hello World
+Hello AppMap
This is a test file.`;

const result = processPatchset(patchset, 1000);
expect(result).toContain('Hello AppMap');
});

it('should have the correct output format', () => {
const largeDiff = 'a'.repeat(2000);
const patchset = `Here's some header.
diff --git a/file1.txt b/file1.txt
index 83db48f..f735c4e 100644
--- a/file1.txt
+++ b/file1.txt
@@ -1,3 +1,3 @@
-Hello World
+Hello AppMap
This is a test file.
diff --git a/file2.txt b/file2.txt
index 83db48f..f735c4e 100644
--- a/file2.txt
+++ b/file2.txt
@@ -1,3 +1,3 @@
${largeDiff}
diff --git a/file3.txt b/file3.txt
index 83db48f..f735c4e 100644
--- a/file3.txt
+++ b/file3.txt
@@ -1,3 +1,3 @@
-Hello World
+Hello AppMap
This is another test file.`;

const result = processPatchset(patchset, 1000);
expect(result).toMatchInlineSnapshot(`
"Here's some header.
diff --git a/file1.txt b/file1.txt
index 83db48f..f735c4e 100644
--- a/file1.txt
+++ b/file1.txt
@@ -1,3 +1,3 @@
-Hello World
+Hello AppMap
This is a test file.
diff --git a/file2.txt b/file2.txt
[Change of size 2078]
diff --git a/file3.txt b/file3.txt
index 83db48f..f735c4e 100644
--- a/file3.txt
+++ b/file3.txt
@@ -1,3 +1,3 @@
-Hello World
+Hello AppMap
This is another test file."
`);
});

it('should handle patchsets with multiple commit headers like from git log', () => {
const patchset = `commit 1
Author: John Doe <[email protected]>
Date: Mon Jan 1 12:00:00 2023 +0000
Initial commit
diff --git a/file1.txt b/file1.txt
index 83db48f..f735c4e 100644
--- a/file1.txt
+++ b/file1.txt
@@ -1,3 +1,3 @@
-Hello World
+Hello AppMap
This is a test file.
commit 2
Author: Jane Doe <[email protected]>
Date: Tue Jan 2 12:00:00 2023 +0000
Second commit
diff --git a/file2.txt b/file2.txt
index 83db48f..f735c4e 100644
--- a/file2.txt
+++ b/file2.txt
@@ -1,3 +1,3 @@
-Hello World
+Hello AppMap
This is another test file.`;

const result = processPatchset(patchset, 10);
expect(result).toMatchInlineSnapshot(`
"commit 1
Author: John Doe <[email protected]>
Date: Mon Jan 1 12:00:00 2023 +0000
Initial commit
diff --git a/file1.txt b/file1.txt
[Change of size 126]
commit 2
Author: Jane Doe <[email protected]>
Date: Tue Jan 2 12:00:00 2023 +0000
Second commit
diff --git a/file2.txt b/file2.txt
[Change of size 132]
"
`);
});

it('should handle patchset with large diffs correctly', () => {
const largeDiff = 'a'.repeat(2000);
const patchset = `diff --git a/file2.txt b/file2.txt
index 83db48f..f735c4e 100644
--- a/file2.txt
+++ b/file2.txt
@@ -1,3 +1,3 @@
${largeDiff}`;

const result = processPatchset(patchset, 1000);
expect(result).toContain('[Change of size 2078]');
});

it('should handle patchset with mixed diff sizes correctly', () => {
const largeDiff = 'a'.repeat(2000);
const smallDiff = 'b'.repeat(500);
const patchset = `diff --git a/file3.txt b/file3.txt
index 83db48f..f735c4e 100644
--- a/file3.txt
+++ b/file3.txt
@@ -1,3 +1,3 @@
${largeDiff}
diff --git a/file4.txt b/file4.txt
index 83db48f..f735c4e 100644
--- a/file4.txt
+++ b/file4.txt
@@ -1,3 +1,3 @@
${smallDiff}`;

const result = processPatchset(patchset, 1000);
expect(result).toContain('[Change of size 2078]');
expect(result).toContain(smallDiff);
});
});
});

function unindent(str: string) {
Expand Down

0 comments on commit abaf705

Please sign in to comment.