Skip to content

Commit

Permalink
feat: Refactor full-text search into its own package
Browse files Browse the repository at this point in the history
  • Loading branch information
kgilpin committed Nov 16, 2023
1 parent 00a66d3 commit bf66b0d
Show file tree
Hide file tree
Showing 4 changed files with 361 additions and 205 deletions.
266 changes: 61 additions & 205 deletions packages/cli/src/cmds/ask/ask.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@ import OpenAI from 'openai';
import lunr from 'lunr';
import { ChatCompletionMessageParam } from 'openai/resources';
import { readFile } from 'fs/promises';
import { dirname, join } from 'path';
import { AppMapFilter, CodeObject, Event, Metadata, buildAppMap } from '@appland/models';
import { Action, NodeType, Specification, buildDiagram, nodeName } from '@appland/sequence-diagram';
import { Action, Specification, buildDiagram, nodeName } from '@appland/sequence-diagram';

import { handleWorkingDirectory } from '../../lib/handleWorkingDirectory';
import { locateAppMapDir } from '../../lib/locateAppMapDir';
import { exists, processNamedFiles, verbose } from '../../utils';
import assert, { match } from 'assert';
import { exists, verbose } from '../../utils';
import FindAppMaps, { SearchResult as FindAppMapSearchResult } from '../../fulltext/FindAppMaps';
import FindEvents, { SearchResult as FindEventSearchResult } from '../../fulltext/FindEvents';

export const command = 'ask <question>';
export const describe =
Expand All @@ -20,6 +20,16 @@ export const builder = (args) => {
args.positional('question', {
describe: 'plain text question about the code base',
});
args.option('max-diagram-matches', {
describe: 'maximum number of diagram matches to return',
type: 'number',
default: 5,
});
args.option('max-code-object-matches', {
describe: 'maximum number of code objects matches to return for each diagram',
type: 'number',
default: 5,
});
args.option('directory', {
describe: 'program working directory',
type: 'string',
Expand Down Expand Up @@ -47,7 +57,7 @@ type SerializedCodeObject = {

type ActionInfo = {
elapsed?: number;
eventIds?: string;
eventId: number;
location?: string;
};

Expand Down Expand Up @@ -77,7 +87,7 @@ type EventInfo = {
type DiagramDetailsResult = {
summary: string;
metadata: Metadata;
keyEvents: EventInfo[];
keyEvents: FindEventSearchResult[];
};

const isCamelized = (str: string): boolean => {
Expand Down Expand Up @@ -105,88 +115,14 @@ const splitCamelized = (str: string): string => {
return result.join(' ');
};

class Ask {
idx: lunr.Index | undefined;
public search: string | undefined;

constructor(public appmapDir: string) {}

async initialize() {
const { appmapDir } = this;

const documents = new Array<any>();
await processNamedFiles(appmapDir, 'metadata.json', async (metadataFile) => {
const metadata = JSON.parse(await readFile(metadataFile, 'utf-8')) as Metadata;
const diagramId = dirname(metadataFile);
const classMap = JSON.parse(
await readFile(join(diagramId, 'classMap.json'), 'utf-8')
) as SerializedCodeObject[];
const queries = new Array<string>();
const codeObjects = new Array<string>();
const routes = new Array<string>();
const externalRoutes = new Array<string>();

const collectFunction = (co: SerializedCodeObject) => {
if (co.type === 'query') queries.push(co.name);
else if (co.type === 'route') routes.push(co.name);
else if (co.type === 'external-route') externalRoutes.push(co.name);
else codeObjects.push(splitCamelized(co.name));

co.children?.forEach((child) => {
collectFunction(child);
});
};
classMap.forEach((co) => collectFunction(co));

documents.push({
id: diagramId,
name: metadata.name,
source_location: metadata.source_location,
code_objects: codeObjects.join(' '),
queries: queries.join(' '),
routes: routes.join(' '),
external_routes: externalRoutes.join(' '),
});
});

warn(`Indexing ${documents.length} diagrams`);

this.idx = lunr(function () {
this.ref('id');
this.field('name');
this.field('source_location');
this.field('code_objects');
this.field('queries');
this.field('routes');
this.field('external_routes');

this.tokenizer.separator = /[\s/-_:#.]+/;

for (const doc of documents) this.add(doc);
});
}

async fetchDiagrams(): Promise<SearchDiagramResult[]> {
const { search } = this;
assert(this.idx);
assert(search);
let matches = this.idx.search(search);
warn(`Got ${matches.length} matches for search ${search}`);
if (matches.length > 5) {
warn(`Limiting to the top 5 matches`);
matches = matches.slice(0, 5);
}
return matches.map((match) => ({ diagramId: match.ref }));
}
}

export const handler = async (argv: any) => {
verbose(argv.verbose);
handleWorkingDirectory(argv.directory);
const { question, maxCodeObjectMatches, maxDiagramMatches } = argv;
const appmapDir = await locateAppMapDir(argv.appmapDir);

const ask = new Ask(appmapDir);
await ask.initialize();
const findAppMaps = new FindAppMaps(appmapDir);
await findAppMaps.initialize();

function showPlan(paramStr: string) {
let params: any;
Expand All @@ -199,132 +135,50 @@ export const handler = async (argv: any) => {
warn(`AI Plan: ${params.plan}`);
}

async function fetchDiagrams(paramStr: string): Promise<SearchDiagramResult[]> {
warn(`Fecching diagrams`);
return await ask.fetchDiagrams();
function fetchDiagrams(): FindAppMapSearchResult[] {
warn(`Fetching diagrams`);
return findAppMaps.search(question, { maxResults: maxDiagramMatches });
}

const diagramDetailsResults = new Array<FindEventSearchResult>();

async function getDiagramDetails(paramStr: string): Promise<DiagramDetailsResult[]> {
const params = JSON.parse(paramStr) as DiagramDetailsParam;
const { diagramIds } = params;
warn(`Getting details for diagram ${diagramIds}, retrieved by "${ask.search}"`);
warn(`Getting details for diagram ${diagramIds}, retrieved by "${question}"`);
const result = new Array<DiagramDetailsResult>();
for (const diagramId of diagramIds) {
warn(`Loading AppMap ${diagramId} and pruning to 1MB`);

const appmapFile = [diagramId, 'appmap.json'].join('.');
const prunedAppMap = buildAppMap()
.source(await readFile(appmapFile, 'utf-8'))
.prune(1 * 1000 * 1000)
.build();

warn(`Built AppMap with ${prunedAppMap.events.length} events.`);
warn(`Applying default AppMap filters.`);
const filter = new AppMapFilter();
if (prunedAppMap.metadata.language?.name !== 'java')
filter.declutter.hideExternalPaths.on = true;
filter.declutter.limitRootEvents.on = true;
const filteredAppMap = filter.filter(prunedAppMap, []);
warn(`Filtered AppMap has ${filteredAppMap.events.length} events.`);

const codeObjectsByFqid = new Map<string, CodeObject>();
const eventsById = filteredAppMap.events.reduce((map, event) => {
map.set(event.id, event);
return map;
}, new Map<number, Event>());
const specification = Specification.build(filteredAppMap, { loops: true });

warn(`Indexing AppMap`);
const index = new FindEvents(diagramId);
index.maxSize = 1024 * 1024;
await index.initialize();
const searchResults = index.search(question, { maxResults: maxCodeObjectMatches });
diagramDetailsResults.push(...searchResults);

const diagramText = new Array<string>();
const idx = lunr(function () {
this.ref('fqid');
this.field('name');
this.tokenizer.separator = /[\s/\-_:#.]+/;

const self = this;
const indexEvent = (event: Event, depth = 0) => {
// These will already be well-represented by the diagram summary.
if (depth > 0) {
const co = event.codeObject;
if (!codeObjectsByFqid.has(co.fqid)) {
codeObjectsByFqid.set(co.fqid, co);
let boost = 1;
if (co.location) boost *= 2;
self.add(
{
fqid: co.fqid,
name: splitCamelized(co.id),
},
{
boost,
}
);
}
}
event.children.forEach((child) => indexEvent(child, depth + 1));
};
filteredAppMap.rootEvents().forEach((event) => indexEvent(event));

const diagram = buildDiagram(appmapFile, filteredAppMap, specification);
const MAX_DEPTH = 0;
const collectAction = (action: Action, depth = 0) => {
if (depth <= MAX_DEPTH) {
const actionInfo: ActionInfo = {};
if (action.eventIds.length > 0) {
actionInfo.eventIds = action.eventIds.join(',');
const co = eventsById.get(action.eventIds[0])?.codeObject;
if (co) {
if (co.location) actionInfo.location = co.location;
} else {
warn(`No code object for event ${action.eventIds[0]}`);
}
}
const actionInfoStr = Object.keys(actionInfo)
.sort()
.map((key) => {
const value = actionInfo[key];
return `${key}=${value}`;
})
.join(',');
const indent = ' '.repeat(depth);
diagramText.push(
`${indent}${nodeName(action)}${actionInfoStr !== '' ? ` (${actionInfoStr})` : ''}`
);
}
if (action.children) {
action.children.forEach((child) => collectAction(child, depth + 1));
}
};
diagram.rootActions.forEach((action) => collectAction(action));
});

assert(ask.search);
let searchResult = idx.search(ask.search);
warn(`Matched ${searchResult.length} code objects in the diagram`);
if (searchResult.length > 5) {
warn(`Limiting to the top 5 matches`);
searchResult = searchResult.slice(0, 5);
for (const event of index.appmap.rootEvents()) {
const actionInfo: ActionInfo = { eventId: event.id };
if (event.elapsedTime) actionInfo.elapsed = event.elapsedTime;
if (event.codeObject.location) actionInfo.location = event.codeObject.location;
const actionInfoStr = Object.keys(actionInfo)
.sort()
.map((key) => {
const value = actionInfo[key];
return `${key}=${value}`;
})
.join(',');
diagramText.push(
`${event.codeObject.id}${actionInfoStr !== '' ? ` (${actionInfoStr})` : ''}`
);
}
const keyEvents = searchResult.map((match) => {
const co = codeObjectsByFqid.get(match.ref);
assert(co);
const result: EventInfo = {
name: co.id,
fqid: co.fqid,
elapsed: co.allEvents.reduce((sum, event) => sum + (event.elapsedTime || 0), 0),
};
if (co.location) result.sourceLocation = co.location;

return result;
});

const metadata = prunedAppMap.metadata;

const metadata = index.appmap.metadata;
delete metadata['git'];
delete (metadata as any)['client'];
// TODO: Do we want the AI to read the source code of the test case?
delete metadata['source_location'];
result.push({ metadata, summary: diagramText.join('\n'), keyEvents });
result.push({ metadata, summary: diagramText.join('\n'), keyEvents: searchResults });
}

return result;
Expand Down Expand Up @@ -379,20 +233,15 @@ export const handler = async (argv: any) => {
return result;
}

const question = argv.question;
ask.search = question;

const systemMessages: ChatCompletionMessageParam[] = [
'You are an assistant that answers questions about the design and architecture of code.',
'You answer these questions by accessing a knowledge base of sequence diagrams.',
'Each sequence diagram conists of a series of events, such as function calls, HTTP server requests, SQL queries, etc.',
'Before each function call, call "showPlan" function with a Markdown document that describes your strategy for answering the question.',
`After the first "showPlan", begin by calling the "fetchDiagrams" function to obtain the diagrams that are most relevant to the user's question.`,
'Next, call "showPlan", then call "getDiagramDetails" function get details about the events that occur with in the matching diagrams.',
`Evaluate which diagrams are most relevant to the user's problem.`,
`Begin by calling the "fetchDiagrams" function to obtain the diagrams that are most relevant to the user's question.`,
'Next, use the "getDiagramDetails" function get details about the events that occur with in the matching diagrams.',
'Enhance your answer by using "lookupSourceCode" function to get the source code for the most relevant functions.',
'Finally, respond with a Markdown document that summarizes the diagrams and answers the question.',
'Subsequent mentions of the function should use backticks but should not be links.',
'Never emit phrases like "note that the actual behavior may vary between different applications"',
].map((msg) => ({
content: msg,
Expand Down Expand Up @@ -428,7 +277,7 @@ export const handler = async (argv: any) => {
},
{
function: fetchDiagrams,
description: `List sequence diagrams that match a keyword. Each response includes a diagram id, plus information about the events (function calls, HTTP server requests, SQL queries, etc) within that diagram that match the search term.`,
description: `Obtain sequence diagrams that are relevant to the user's question. The response is a list of diagram ids.`,
parameters: {
type: 'object',
properties: {},
Expand Down Expand Up @@ -472,16 +321,16 @@ export const handler = async (argv: any) => {
});

runFunctions.on('functionCall', (data) => {
warn(JSON.stringify(data));
warn(JSON.stringify(data, null, 2));
});
runFunctions.on('finalFunctionCall', (data) => {
warn(JSON.stringify(data));
warn(JSON.stringify(data, null, 2));
});
runFunctions.on('functionCallResult', (data) => {
warn(JSON.stringify(data));
if (verbose()) warn(JSON.stringify(data));
});
runFunctions.on('finalFunctionCallResult', (data) => {
warn(JSON.stringify(data));
if (verbose()) warn(JSON.stringify(data));
});

const response = await runFunctions.finalContent();
Expand All @@ -490,4 +339,11 @@ export const handler = async (argv: any) => {
return;
}
console.log(response);
console.log('');
console.log('The best matching sequence diagram events are:');
console.log('');
diagramDetailsResults.sort((a, b) => b.score - a.score);
for (const event of diagramDetailsResults) {
console.log(` ${event.fqid} (${event.score})`);
}
};
Loading

0 comments on commit bf66b0d

Please sign in to comment.