Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Ask OpenAI #1464

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion packages/cli/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
"@types/jest": "^29.5.4",
"@types/jest-sinon": "^1.0.2",
"@types/jsdom": "^16.2.13",
"@types/lunr": "^2.3.7",
"@types/moo": "^0.5.5",
"@types/node": "^16",
"@types/semver": "^7.3.10",
"@types/sinon": "^10.0.2",
Expand Down Expand Up @@ -86,7 +88,6 @@
"@appland/sequence-diagram": "workspace:^1.7.0",
"@octokit/rest": "^20.0.1",
"@sidvind/better-ajv-errors": "^0.9.1",
"@types/moo": "^0.5.5",
"JSONStream": "^1.3.5",
"ajv": "^8.6.3",
"applicationinsights": "^2.1.4",
Expand All @@ -109,9 +110,11 @@
"inquirer": "^8.1.2",
"js-yaml": "^4.0.3",
"jsdom": "^16.6.0",
"lunr": "^2.3.9",
"minimatch": "^5.1.2",
"moo": "^0.5.1",
"open": "^8.2.1",
"openai": "^4.19.0",
"openapi-diff": "^0.23.6",
"openapi-types": "^12.1.3",
"ora": "^5.4.1",
Expand Down
2 changes: 2 additions & 0 deletions packages/cli/src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ const CompareCommand = require('./cmds/compare/compare');
const CompareReportCommand = require('./cmds/compare-report/compareReport');
const InventoryCommand = require('./cmds/inventory/inventory');
const InventoryReportCommand = require('./cmds/inventory-report/inventoryReport');
const Ask = require('./cmds/ask/ask');
import UploadCommand from './cmds/upload';
import { default as sqlErrorLog } from './lib/sqlErrorLog';

Expand Down Expand Up @@ -192,6 +193,7 @@ yargs(process.argv.slice(2))
.command(CompareReportCommand)
.command(InventoryCommand)
.command(InventoryReportCommand)
.command(Ask)
.option('verbose', {
alias: 'v',
type: 'boolean',
Expand Down
349 changes: 349 additions & 0 deletions packages/cli/src/cmds/ask/ask.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,349 @@
import { warn } from 'console';
import OpenAI from 'openai';
import lunr from 'lunr';
import { ChatCompletionMessageParam } from 'openai/resources';
import { readFile } from 'fs/promises';
import { AppMapFilter, CodeObject, Event, Metadata, buildAppMap } from '@appland/models';
import { Action, Specification, buildDiagram, nodeName } from '@appland/sequence-diagram';

import { handleWorkingDirectory } from '../../lib/handleWorkingDirectory';
import { locateAppMapDir } from '../../lib/locateAppMapDir';
import { exists, verbose } from '../../utils';
import FindAppMaps, { SearchResult as FindAppMapSearchResult } from '../../fulltext/FindAppMaps';
import FindEvents, { SearchResult as FindEventSearchResult } from '../../fulltext/FindEvents';

export const command = 'ask <question>';
export const describe =
'Ask a plain text question and get a filtered and configured AppMap as a response';

export const builder = (args) => {
args.positional('question', {
describe: 'plain text question about the code base',
});
args.option('max-diagram-matches', {
describe: 'maximum number of diagram matches to return',
type: 'number',
default: 5,
});
args.option('max-code-object-matches', {
describe: 'maximum number of code objects matches to return for each diagram',
type: 'number',
default: 5,
});
args.option('directory', {
describe: 'program working directory',
type: 'string',
alias: 'd',
});
return args.strict();
};

function buildOpenAI(): OpenAI {
const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
if (!OPENAI_API_KEY) {
throw new Error('OPENAI_API_KEY environment variable must be set');
}
return new OpenAI({ apiKey: OPENAI_API_KEY });
}

type SerializedCodeObject = {
name: string;
type: string;
labels: string[];
children: SerializedCodeObject[];
static?: boolean;
sourceLocation?: string;
};

type ActionInfo = {
elapsed?: number;
eventId: number;
location?: string;
};

type SearchDiagramResult = {
diagramId: string;
};

type DiagramDetailsParam = {
search: string;
diagramIds: string[];
};

type LookupSourceCodeParam = {
locations: string[];
};

type LookupSourceCodeResult = Record<string, string>;

type EventInfo = {
name: string;
fqid?: string;
sourceLocation?: string;
elapsed?: number;
eventIds?: number[];
};

type DiagramDetailsResult = {
summary: string;
metadata: Metadata;
keyEvents: FindEventSearchResult[];
};

const isCamelized = (str: string): boolean => {
if (str.length < 3) return false;

const testStr = str.slice(1);
return /[a-z][A-Z]/.test(testStr);
};

const splitCamelized = (str: string): string => {
if (!isCamelized(str)) return str;

const result = new Array<string>();
let last = 0;
for (let i = 1; i < str.length; i++) {
const pc = str[i - 1];
const c = str[i];
const isUpper = c >= 'A' && c <= 'Z';
if (isUpper) {
result.push(str.slice(last, i));
last = i;
}
}
result.push(str.slice(last));
return result.join(' ');
};

export const handler = async (argv: any) => {
verbose(argv.verbose);
handleWorkingDirectory(argv.directory);
const { question, maxCodeObjectMatches, maxDiagramMatches } = argv;
const appmapDir = await locateAppMapDir(argv.appmapDir);

const findAppMaps = new FindAppMaps(appmapDir);
await findAppMaps.initialize();

function showPlan(paramStr: string) {
let params: any;
try {
params = JSON.parse(paramStr) as { plan: string };
} catch (e) {
warn(`Failed to parse plan: ${paramStr}: ${e}`);
return;
}
warn(`AI Plan: ${params.plan}`);
}

function fetchDiagrams(): FindAppMapSearchResult[] {
warn(`Fetching diagrams`);
return findAppMaps.search(question, { maxResults: maxDiagramMatches });
}

const diagramDetailsResults = new Array<FindEventSearchResult>();

async function getDiagramDetails(paramStr: string): Promise<DiagramDetailsResult[]> {
const params = JSON.parse(paramStr) as DiagramDetailsParam;
const { diagramIds } = params;
warn(`Getting details for diagram ${diagramIds}, retrieved by "${question}"`);
const result = new Array<DiagramDetailsResult>();
for (const diagramId of diagramIds) {
warn(`Loading AppMap ${diagramId} and pruning to 1MB`);

const index = new FindEvents(diagramId);
index.maxSize = 1024 * 1024;
await index.initialize();
const searchResults = index.search(question, { maxResults: maxCodeObjectMatches });
diagramDetailsResults.push(...searchResults);

const diagramText = new Array<string>();
for (const event of index.appmap.rootEvents()) {
const actionInfo: ActionInfo = { eventId: event.id };
if (event.elapsedTime) actionInfo.elapsed = event.elapsedTime;
if (event.codeObject.location) actionInfo.location = event.codeObject.location;
const actionInfoStr = Object.keys(actionInfo)
.sort()
.map((key) => {
const value = actionInfo[key];
return `${key}=${value}`;
})
.join(',');
diagramText.push(
`${event.codeObject.id}${actionInfoStr !== '' ? ` (${actionInfoStr})` : ''}`
);
}

const metadata = index.appmap.metadata;
delete metadata['git'];
delete (metadata as any)['client'];
// TODO: Do we want the AI to read the source code of the test case?
delete metadata['source_location'];
result.push({ metadata, summary: diagramText.join('\n'), keyEvents: searchResults });
}

return result;
}

async function lookupSourceCode(
locationStr: string
): Promise<LookupSourceCodeResult | undefined> {
const params = JSON.parse(locationStr) as LookupSourceCodeParam;

const languageRegexMap: Record<string, RegExp> = {
'.rb': new RegExp(`def\\s+\\w+.*?\\n(.*?\\n)*?^end\\b`, 'gm'),
'.java': new RegExp(
`(?:public|private|protected)?\\s+(?:static\\s+)?(?:final\\s+)?(?:synchronized\\s+)?(?:abstract\\s+)?(?:native\\s+)?(?:strictfp\\s+)?(?:transient\\s+)?(?:volatile\\s+)?(?:\\w+\\s+)*\\w+\\s+\\w+\\s*\\([^)]*\\)\\s*(?:throws\\s+\\w+(?:,\\s*\\w+)*)?\\s*\\{(?:[^{}]*\\{[^{}]*\\})*[^{}]*\\}`,
'gm'
),
'.py': new RegExp(`def\\s+\\w+.*?:\\n(.*?\\n)*?`, 'gm'),
'.js': new RegExp(
`(?:async\\s+)?function\\s+\\w+\\s*\\([^)]*\\)\\s*\\{(?:[^{}]*\\{[^{}]*\\})*[^{}]*\\}`,
'gm'
),
};

const result: LookupSourceCodeResult = {};
for (const location of params.locations) {
const [path, lineno] = location.split(':');

if (await exists(path)) {
const fileContent = await readFile(path, 'utf-8');
let functionContent: string | undefined;
if (lineno) {
const extension = path.substring(path.lastIndexOf('.'));
const regex = languageRegexMap[extension];

if (regex) {
const match = regex.exec(fileContent);
if (match) {
const lines = match[0].split('\n');
const startLine = parseInt(lineno, 10);
const endLine = startLine + lines.length - 1;
if (startLine <= endLine) {
functionContent = lines.slice(startLine - 1, endLine).join('\n');
}
}
}
} else {
functionContent = fileContent;
}
if (functionContent) result[location] = functionContent;
}
}
return result;
}

const systemMessages: ChatCompletionMessageParam[] = [
'You are an assistant that answers questions about the design and architecture of code.',
'You answer these questions by accessing a knowledge base of sequence diagrams.',
'Each sequence diagram conists of a series of events, such as function calls, HTTP server requests, SQL queries, etc.',
'Before each function call, call "showPlan" function with a Markdown document that describes your strategy for answering the question.',
`Begin by calling the "fetchDiagrams" function to obtain the diagrams that are most relevant to the user's question.`,
'Next, use the "getDiagramDetails" function get details about the events that occur with in the matching diagrams.',
'Enhance your answer by using "lookupSourceCode" function to get the source code for the most relevant functions.',
'Finally, respond with a Markdown document that summarizes the diagrams and answers the question.',
'Never emit phrases like "note that the actual behavior may vary between different applications"',
].map((msg) => ({
content: msg,
role: 'system',
}));

const userMessage: ChatCompletionMessageParam = {
content: question,
role: 'user',
};

const messages = [...systemMessages, userMessage];

const openai = buildOpenAI();
const runFunctions = openai.beta.chat.completions.runFunctions({
model: 'gpt-4',
messages,
function_call: 'auto',
functions: [
{
function: showPlan,
description: 'Print the plan for answering the question',
parameters: {
type: 'object',
properties: {
plan: {
type: 'string',
description: 'The plan in Markdown format',
},
},
required: ['plan'],
},
},
{
function: fetchDiagrams,
description: `Obtain sequence diagrams that are relevant to the user's question. The response is a list of diagram ids.`,
parameters: {
type: 'object',
properties: {},
},
},
{
function: getDiagramDetails,
description: `Get details about diagrams, including their name, code language, frameworks, source location, exceptions raised.`,
parameters: {
type: 'object',
properties: {
diagramIds: {
type: 'array',
description: 'Array of diagram ids',
items: {
type: 'string',
},
},
},
required: ['search', 'diagramIds'],
},
},
{
function: lookupSourceCode,
description: `Get the source code for a specific function.`,
parameters: {
type: 'object',
properties: {
locations: {
type: 'array',
description: `An array of source code locations in the format <path>[:<line number>]. Line number can be omitted if it's not known.`,
items: {
type: 'string',
},
},
},
required: ['locations'],
},
},
],
});

runFunctions.on('functionCall', (data) => {
warn(JSON.stringify(data, null, 2));
});
runFunctions.on('finalFunctionCall', (data) => {
warn(JSON.stringify(data, null, 2));
});
runFunctions.on('functionCallResult', (data) => {
if (verbose()) warn(JSON.stringify(data));
});
runFunctions.on('finalFunctionCallResult', (data) => {
if (verbose()) warn(JSON.stringify(data));
});

const response = await runFunctions.finalContent();
if (!response) {
warn(`No response from OpenAI`);
return;
}
console.log(response);
console.log('');
console.log('The best matching sequence diagram events are:');
console.log('');
diagramDetailsResults.sort((a, b) => b.score - a.score);
for (const event of diagramDetailsResults) {
console.log(` ${event.fqid} (${event.score})`);
}
};
Loading