diff --git a/packages/cli/src/cmds/search/search.ts b/packages/cli/src/cmds/search/search.ts index db0e4106bf..0cf5d1f634 100644 --- a/packages/cli/src/cmds/search/search.ts +++ b/packages/cli/src/cmds/search/search.ts @@ -16,7 +16,7 @@ import { } from '../../fulltext/FindEvents'; import { openInBrowser } from '../open/openers'; import { buildAppMapIndex, search } from '../../rpc/explain/index/appmap-index'; -import buildIndexInTempDir from '../../rpc/explain/build-index-in-temp-dir'; +import buildIndexInTempDir from '../../rpc/explain/index/build-index-in-temp-dir'; export const command = 'search '; export const describe = diff --git a/packages/cli/src/cmds/search/searchSingleAppMap.ts b/packages/cli/src/cmds/search/searchSingleAppMap.ts index 21358a5a25..bdf10832f6 100644 --- a/packages/cli/src/cmds/search/searchSingleAppMap.ts +++ b/packages/cli/src/cmds/search/searchSingleAppMap.ts @@ -12,6 +12,7 @@ export default async function searchSingleAppMap( query: string, options: SearchOptions = {} ): Promise { + // eslint-disable-next-line no-param-reassign if (appmap.endsWith('.appmap.json')) appmap = appmap.slice(0, -'.appmap.json'.length); const findEvents = new FindEvents(appmap); diff --git a/packages/cli/src/fulltext/ref.ts b/packages/cli/src/fulltext/ref.ts deleted file mode 100644 index 6e73384cb9..0000000000 --- a/packages/cli/src/fulltext/ref.ts +++ /dev/null @@ -1,14 +0,0 @@ -import { join } from 'path'; - -export function packRef(directory: string, appmapId: string): string { - return JSON.stringify({ directory, appmapId }); -} - -export function refToAppMapDir(ref: string): string { - const { directory, appmapId } = unpackRef(ref); - return join(directory, appmapId); -} - -export function unpackRef(ref: string): { directory: string; appmapId: string } { - return JSON.parse(ref); -} diff --git a/packages/cli/src/rpc/explain/EventCollector.ts b/packages/cli/src/rpc/explain/EventCollector.ts deleted file mode 100644 index f5c3410118..0000000000 --- a/packages/cli/src/rpc/explain/EventCollector.ts +++ /dev/null @@ -1,78 +0,0 @@ -import { isAbsolute, join } from 'path'; -import { ContextV2 } from '@appland/navie'; -import { SearchRpc } from '@appland/rpc'; -import { SearchResponse as AppMapSearchResponse } from './index/appmap-match'; -import FindEvents, { - SearchResponse as EventSearchResponse, - SearchOptions as EventsSearchOptions, - SearchOptions, -} from '../../fulltext/FindEvents'; -import buildContext from './buildContext'; -import { textSearchResultToRpcSearchResult } from './textSearchResultToRpcSearchResult'; - -export default class EventCollector { - appmapIndexes = new Map(); - - constructor(private query: string, private appmapSearchResponse: AppMapSearchResponse) {} - - async collectEvents( - maxEvents: number, - excludePatterns?: RegExp[], - includePatterns?: RegExp[], - includeTypes?: ContextV2.ContextItemType[] - ): Promise<{ - results: SearchRpc.SearchResult[]; - context: ContextV2.ContextResponse; - contextSize: number; - }> { - const results = new Array(); - - for (const result of this.appmapSearchResponse.results) { - let { appmap } = result; - if (!isAbsolute(appmap)) appmap = join(result.directory, appmap); - - const options: EventsSearchOptions = { - maxResults: maxEvents, - }; - if (includePatterns) options.includePatterns = includePatterns; - if (excludePatterns) options.excludePatterns = excludePatterns; - - const eventsSearchResponse = await this.findEvents(appmap, options); - results.push({ - appmap: appmap, - directory: result.directory, - events: eventsSearchResponse.results.map(textSearchResultToRpcSearchResult), - score: result.score, - }); - } - - const isIncludedType = (item: ContextV2.ContextItem) => { - if (includeTypes && !includeTypes.some((type) => type === item.type)) return false; - - return true; - }; - - const context = (await buildContext(results)).filter(isIncludedType); - - const contextSize = context.reduce((acc, item) => acc + item.content.length, 0); - - return { results, context, contextSize }; - } - - async appmapIndex(appmap: string): Promise { - let index = this.appmapIndexes.get(appmap); - if (!index) { - index = new FindEvents(appmap); - await index.initialize(); - this.appmapIndexes.set(appmap, index); - } - return index; - } - - async findEvents(appmap: string, options: SearchOptions): Promise { - if (appmap.endsWith('.appmap.json')) appmap = appmap.slice(0, -'.appmap.json'.length); - - const index = await this.appmapIndex(appmap); - return index.search(this.query, options); - } -} diff --git a/packages/cli/src/rpc/explain/LocationContextCollector.ts b/packages/cli/src/rpc/explain/LocationContextCollector.ts deleted file mode 100644 index 56b69b24eb..0000000000 --- a/packages/cli/src/rpc/explain/LocationContextCollector.ts +++ /dev/null @@ -1,93 +0,0 @@ -import { readFile } from 'fs/promises'; -import { warn } from 'console'; -import { isAbsolute, join } from 'path'; -import { ContextV2 } from '@appland/navie'; -import { SearchRpc } from '@appland/rpc'; -import Location from './location'; -import { exists, isFile, verbose } from '../../utils'; - -/** - * LocationContextCollector is responsible for collecting context information from specified locations - * within source directories. It reads the contents of files at these locations and extracts code snippets - * to build a context response. - * - * Primary effects: - * - Iterates over provided locations and determines if they are absolute or relative paths. - * - For each location, constructs the full path and checks if the file exists and is a valid file. - * - Reads the contents of the file and extracts a code snippet based on the location. - * - Builds a context response containing the extracted code snippets and their respective locations. - * - Returns the context response along with a search response. - */ -export default class LocationContextCollector { - constructor(private sourceDirectories: string[], private locations: Location[]) {} - - async collectContext(): Promise<{ - searchResponse: SearchRpc.SearchResponse; - context: ContextV2.ContextResponse; - }> { - const result: { searchResponse: SearchRpc.SearchResponse; context: ContextV2.ContextResponse } = - { searchResponse: { results: [], numResults: 0 }, context: [] }; - - const candidateLocations = new Array<{ location: Location; directory?: string }>(); - for (const location of this.locations) { - const { path } = location; - if (isAbsolute(path)) { - const directory = this.sourceDirectories.find((dir) => path.startsWith(dir)); - candidateLocations.push({ location, directory }); - } else { - for (const sourceDirectory of this.sourceDirectories) { - candidateLocations.push({ location, directory: sourceDirectory }); - } - } - } - - if (verbose()) - warn( - `[location-context] Candidate locations: ${candidateLocations - .map((loc) => loc.location.toString()) - .join(', ')}` - ); - - for (const { location, directory } of candidateLocations) { - let pathTokens: string[] = []; - - if (isAbsolute(location.path)) pathTokens = [location.path]; - else if (directory) pathTokens = [directory, location.path].filter(Boolean); - - const path = join(...pathTokens); - if (!(await exists(path))) { - if (verbose()) warn(`[location-context] Skipping non-existent location: ${path}`); - continue; - } - if (!(await isFile(path))) { - if (verbose()) warn(`[location-context] Skipping non-file location: ${path}`); - continue; - } - - let contents: string | undefined; - try { - contents = await readFile(path, 'utf8'); - } catch (e) { - warn(`[location-context] Failed to read file: ${path}`); - continue; - } - - if (verbose()) - warn( - `[location-context] Extracting snippet for location: ${location.toString()} (${ - contents.length - } bytes)` - ); - - const snippet = location.snippet(contents); - result.context.push({ - type: ContextV2.ContextItemType.CodeSnippet, - content: snippet, - location: location.toString(), - directory, - }); - } - - return result; - } -} diff --git a/packages/cli/src/rpc/explain/SearchContextCollector.ts b/packages/cli/src/rpc/explain/SearchContextCollector.ts deleted file mode 100644 index 30d4d2613f..0000000000 --- a/packages/cli/src/rpc/explain/SearchContextCollector.ts +++ /dev/null @@ -1,165 +0,0 @@ -import { log } from 'console'; -import sqlite3 from 'better-sqlite3'; - -import { ContextV2, applyContext } from '@appland/navie'; -import { SearchRpc } from '@appland/rpc'; -import { FileIndex, FileSearchResult } from '@appland/search'; - -import { SearchResponse as AppMapSearchResponse } from './index/appmap-match'; -import { DEFAULT_MAX_DIAGRAMS } from '../search/search'; -import EventCollector from './EventCollector'; -import indexFiles from './index-files'; -import indexSnippets from './index-snippets'; -import collectSnippets from './collect-snippets'; -import buildIndexInTempDir from './build-index-in-temp-dir'; -import { buildAppMapIndex, search } from './index/appmap-index'; - -export default class SearchContextCollector { - public excludePatterns: RegExp[] | undefined; - public includePatterns: RegExp[] | undefined; - public includeTypes: ContextV2.ContextItemType[] | undefined; - - constructor( - private appmapDirectories: string[], - private sourceDirectories: string[], - private appmaps: string[] | undefined, - private vectorTerms: string[], - private charLimit: number - ) {} - - async collectContext(): Promise<{ - searchResponse: SearchRpc.SearchResponse; - context: ContextV2.ContextResponse; - }> { - let appmapSearchResponse: AppMapSearchResponse; - if (this.appmaps) { - const results = this.appmaps - .map((appmap) => { - const directory = this.appmapDirectories.find((dir) => appmap.startsWith(dir)); - if (!directory) return undefined; - - return { - appmap, - directory, - score: 1, - }; - }) - .filter(Boolean) as SearchRpc.SearchResult[]; - appmapSearchResponse = { - type: 'appmap', - stats: { - max: 1, - mean: 1, - median: 1, - stddev: 0, - }, - results, - numResults: this.appmaps.length, - }; - } else { - const appmapIndex = await buildIndexInTempDir('appmaps', async (indexFile) => { - const db = new sqlite3(indexFile); - const fileIndex = new FileIndex(db); - await buildAppMapIndex(fileIndex, this.appmapDirectories); - return fileIndex; - }); - const selectedAppMaps = await search( - appmapIndex.index, - this.vectorTerms.join(' OR '), - DEFAULT_MAX_DIAGRAMS - ); - appmapIndex.close(); - - appmapSearchResponse = { - results: selectedAppMaps.results, - numResults: selectedAppMaps.results.length, - stats: selectedAppMaps.stats, - type: 'appmap', - }; - - log(`[search-context] Matched ${selectedAppMaps.results.length} AppMaps.`); - } - - const fileIndex = await buildIndexInTempDir('files', async (indexFile) => { - const db = new sqlite3(indexFile); - return await indexFiles( - db, - this.sourceDirectories, - this.includePatterns, - this.excludePatterns - ); - }); - let fileSearchResults: FileSearchResult[]; - try { - fileSearchResults = fileIndex.index.search(this.vectorTerms.join(' OR ')); - } finally { - fileIndex.close(); - } - - const snippetIndex = await buildIndexInTempDir('snippets', async (indexFile) => { - const db = new sqlite3(indexFile); - return await indexSnippets(db, fileSearchResults); - }); - - let contextCandidate: { - results: SearchRpc.SearchResult[]; - context: ContextV2.ContextResponse; - contextSize: number; - }; - try { - const eventsCollector = new EventCollector(this.vectorTerms.join(' '), appmapSearchResponse); - - let charCount = 0; - let maxEventsPerDiagram = 5; - log(`[search-context] Requested char limit: ${this.charLimit}`); - for (;;) { - log(`[search-context] Collecting context with ${maxEventsPerDiagram} events per diagram.`); - - contextCandidate = await eventsCollector.collectEvents( - maxEventsPerDiagram, - this.excludePatterns, - this.includePatterns, - this.includeTypes - ); - - const codeSnippetCount = contextCandidate.context.filter( - (item) => item.type === ContextV2.ContextItemType.CodeSnippet - ).length; - - const charLimit = codeSnippetCount === 0 ? this.charLimit : this.charLimit / 4; - const sourceContext = collectSnippets( - snippetIndex.index, - this.vectorTerms.join(' OR '), - charLimit - ); - contextCandidate.context = contextCandidate.context.concat(sourceContext); - - const appliedContext = applyContext(contextCandidate.context, this.charLimit); - const appliedContextSize = appliedContext.reduce( - (acc, item) => acc + item.content.length, - 0 - ); - contextCandidate.context = appliedContext; - contextCandidate.contextSize = appliedContextSize; - log(`[search-context] Collected an estimated ${appliedContextSize} characters.`); - - if (appliedContextSize === charCount || appliedContextSize > this.charLimit) { - break; - } - charCount = appliedContextSize; - maxEventsPerDiagram = Math.ceil(maxEventsPerDiagram * 1.5); - log(`[search-context] Increasing max events per diagram to ${maxEventsPerDiagram}.`); - } - } finally { - snippetIndex.close(); - } - - return { - searchResponse: { - results: contextCandidate.results, - numResults: appmapSearchResponse.numResults, - }, - context: contextCandidate.context, - }; - } -} diff --git a/packages/cli/src/rpc/explain/appmap-location.ts b/packages/cli/src/rpc/explain/appmap-location.ts index 68e6f4f572..b21f30c8bb 100644 --- a/packages/cli/src/rpc/explain/appmap-location.ts +++ b/packages/cli/src/rpc/explain/appmap-location.ts @@ -1,4 +1,4 @@ -import { SearchRpc } from '@appland/rpc'; +import { SearchRpc } from "@appland/rpc"; export default function appmapLocation(appmap: string, event?: SearchRpc.EventMatch): string { const appmapFile = [appmap, 'appmap.json'].join('.'); diff --git a/packages/cli/src/rpc/explain/buildContext.ts b/packages/cli/src/rpc/explain/buildContext.ts deleted file mode 100644 index 96c8d5c217..0000000000 --- a/packages/cli/src/rpc/explain/buildContext.ts +++ /dev/null @@ -1,87 +0,0 @@ -import { SearchRpc } from '@appland/rpc'; - -import lookupSourceCode from './lookupSourceCode'; -import { warn } from 'console'; -import { ContextV2 } from '@appland/navie'; -import buildSequenceDiagram from './build-sequence-diagram'; - -/** - * Processes search results to build sequence diagrams, code snippets, and code object sets. This is the format - * expected by the Navie AI. - * - * Given a list of search results, `buildContext` asynchronously: - * - * - Generates sequence diagrams for each result using event data and a filtered appmap, - * formatting the output as PlantUML and storing it in an array. The filtered sequence diagram - * includes only the code objects associated with the events in the search result, and their near neighbors. - * - * - Collects and de-duplicates code snippets tied to specific events' locations, storing them in a map with the location as the key. - * - * - Gathers a set of unique code objects identified by their fully qualified identifiers (fqid) from the events. - * These code objects are most commonly SQL queries and HTTP requests (client and server), since code snipptes are stored separately. - * The term "data requests" is being phased in to replace "codeObjects". - */ -export default async function buildContext( - searchResults: SearchRpc.SearchResult[] -): Promise { - const sequenceDiagrams = new Array(); - const codeSnippets = new Array(); - const dataRequests = new Array(); - - const codeSnippetLocations = new Set(); - const dataRequestContent = new Set(); - - const appmapLocation = (appmap: string, event?: SearchRpc.EventMatch) => { - const appmapFile = [appmap, 'appmap.json'].join('.'); - const tokens = [appmapFile]; - if (event?.eventIds.length) tokens.push(String(event.eventIds[0])); - return tokens.join(':'); - }; - - const examinedLocations = new Set(); - for (const result of searchResults) { - try { - const diagram = await buildSequenceDiagram(result); - sequenceDiagrams.push(diagram); - } catch (e) { - warn(`Failed to build sequence diagram for ${result.appmap}`); - warn(e); - } - for (const event of result.events) { - if (!event.location) { - if (!dataRequestContent.has(event.fqid)) { - dataRequestContent.add(event.fqid); - dataRequests.push({ - directory: result.directory, - location: appmapLocation(result.appmap, event), - type: ContextV2.ContextItemType.DataRequest, - content: event.fqid, - score: event.score, - }); - } - continue; - } - - if (examinedLocations.has(event.location)) continue; - - examinedLocations.add(event.location); - - if (codeSnippetLocations.has(event.location)) continue; - - codeSnippetLocations.add(event.location); - - const snippets = await lookupSourceCode(result.directory, event.location); - if (snippets) { - codeSnippets.push({ - directory: result.directory, - type: ContextV2.ContextItemType.CodeSnippet, - location: event.location, - content: snippets.join('\n'), - score: event.score, - }); - } - } - } - - return [...sequenceDiagrams, ...codeSnippets, ...dataRequests]; -} diff --git a/packages/cli/src/rpc/explain/collect-context.ts b/packages/cli/src/rpc/explain/collect-context.ts new file mode 100644 index 0000000000..a322dab934 --- /dev/null +++ b/packages/cli/src/rpc/explain/collect-context.ts @@ -0,0 +1,136 @@ +import { ContextV2 } from '@appland/navie'; +import { SearchRpc } from '@appland/rpc'; +import { queryKeywords } from '@appland/search'; + +import { SearchResult as EventSearchResult } from '../../fulltext/FindEvents'; +import Location from './location'; +import { warn } from 'console'; +import collectLocationContext from './collect-location-context'; +import collectSearchContext from './collect-search-context'; + +export const buildExclusionPattern = (dirName: string): RegExp => { + const dirNamePattern = dirName.replace('.', '\\.'); + return new RegExp(`(^|[/\\\\])${dirNamePattern}([/\\\\]|$)`); +}; + +const EXCLUDE_DIRS = ['.appmap', '.navie', '.yarn', 'venv', '.venv', 'node_modules', 'vendor']; + +export function textSearchResultToRpcSearchResult( + eventResult: EventSearchResult +): SearchRpc.EventMatch { + const result: SearchRpc.EventMatch = { + fqid: eventResult.fqid, + score: eventResult.score, + eventIds: eventResult.eventIds, + }; + if (eventResult.location) result.location = eventResult.location; + if (eventResult.elapsed) result.elapsed = eventResult.elapsed; + return result; +} + +export const CHARS_PER_SNIPPET = 50; + +export type ContextRequest = { + appmaps?: string[]; + excludePatterns?: RegExp[]; + includePatterns?: RegExp[]; + includeTypes?: ContextV2.ContextItemType[]; + locations?: Location[]; +}; + +export function buildContextRequest( + appmapDirectories: string[], + sourceDirectories: string[], + appmaps: string[] | undefined, + searchTerms: string[], + charLimit: number, + filters: ContextV2.ContextFilters +): { vectorTerms: string[]; request: ContextRequest } { + const vectorTerms = searchTerms + .map((term) => queryKeywords(term)) + .flat() + .map((t) => t.trim()) + .filter(Boolean); + + const request: ContextRequest = {}; + + const contextParameters: Record = { + sourceDirectories: sourceDirectories.join(', '), + charLimit, + }; + if (appmapDirectories.length > 0) + contextParameters.appmapDirectories = appmapDirectories.join(', '); + if (vectorTerms.length > 0) contextParameters.keywords = vectorTerms.join(', '); + if (appmaps && appmaps.length > 0) contextParameters.appmaps = appmaps.join(', '); + if (filters.recent) contextParameters.recent = filters.recent; + if (filters.locations) contextParameters.locations = filters.locations.join(', '); + if (filters.itemTypes) contextParameters.itemTypes = filters.itemTypes.join(', '); + if (filters.labels && filters.labels.length > 0) + contextParameters.labels = filters.labels + .map((label) => `${label.name}(${label.weight})`) + .join(', '); + if (filters.exclude) contextParameters.exclude = filters.exclude.join(', '); + if (filters.include) contextParameters.include = filters.include.join(', '); + + const contextDebugString = Object.entries(contextParameters) + .map(([key, value]) => `${key}: ${value}`) + .join(', '); + warn(`Collecting context with parameters: ${contextDebugString}`); + + if (appmaps) request.appmaps = appmaps; + + const excludePatterns: RegExp[] = []; + if (filters?.exclude) + excludePatterns.push(...filters.exclude.map((pattern) => new RegExp(pattern))); + if (filters?.include) + request.includePatterns = filters.include.map((pattern) => new RegExp(pattern)); + if (filters?.itemTypes) request.includeTypes = filters.itemTypes.map((type) => type); + if (filters?.locations) { + request.locations = filters.locations + .map((location) => Location.parse(location)) + .filter(Boolean) as Location[]; + warn(`Parsed locations: ${request.locations.map((loc) => loc.toString()).join(', ')}`); + } + + const appendIfNotExists = (patterns: RegExp[], pattern: RegExp): RegExp[] => { + if (!patterns.find((p) => p.source === pattern.source)) patterns.push(pattern); + return patterns; + }; + + for (const dir of EXCLUDE_DIRS) appendIfNotExists(excludePatterns, buildExclusionPattern(dir)); + + request.excludePatterns = excludePatterns; + + return { vectorTerms, request }; +} + +export default async function collectContext( + appmapDirectories: string[], + sourceDirectories: string[], + charLimit: number, + vectorTerms: string[], + request: ContextRequest +): Promise<{ searchResponse: SearchRpc.SearchResponse; context: ContextV2.ContextResponse }> { + let searchResponse: SearchRpc.SearchResponse = { results: [], numResults: 0 }; + const context: ContextV2.ContextResponse = []; + + if (request.locations && request.locations.length > 0) { + const locationResult = await collectLocationContext(sourceDirectories, request.locations); + context.push(...locationResult); + } + + if (vectorTerms.length > 0 && charLimit > 0) { + const searchResult = await collectSearchContext( + appmapDirectories, + sourceDirectories, + vectorTerms, + charLimit, + request + ); + + searchResponse = searchResult.searchResponse; + context.push(...searchResult.context); + } + + return { searchResponse, context }; +} diff --git a/packages/cli/src/rpc/explain/collect-location-context.ts b/packages/cli/src/rpc/explain/collect-location-context.ts new file mode 100644 index 0000000000..31ee7616ab --- /dev/null +++ b/packages/cli/src/rpc/explain/collect-location-context.ts @@ -0,0 +1,92 @@ +import { readFile } from 'fs/promises'; +import { warn } from 'console'; +import { isAbsolute, join } from 'path'; +import { ContextV2 } from '@appland/navie'; +import Location from './location'; +import { exists, isFile, verbose } from '../../utils'; + +export type LocationContextRequest = { + sourceDirectories: string[]; + locations: Location[]; +}; + +/** + * Collect context information from specified locations + * within source directories. It reads the contents of files at these locations and extracts code snippets + * to build a context response. + * + * Primary effects: + * - Iterates over provided locations and determines if they are absolute or relative paths. + * - For each location, constructs the full path and checks if the file exists and is a valid file. + * - Reads the contents of the file and extracts a code snippet based on the location. + * - Builds a context response containing the extracted code snippets and their respective locations. + * - Returns the context response along with a search response. + */ +export default async function collectLocationContext( + sourceDirectories: string[], + locations: Location[] +): Promise { + const result: ContextV2.ContextResponse = []; + + const candidateLocations = new Array<{ location: Location; directory?: string }>(); + for (const location of locations) { + const { path } = location; + if (isAbsolute(path)) { + const directory = sourceDirectories.find((dir) => path.startsWith(dir)); + candidateLocations.push({ location, directory }); + } else { + for (const sourceDirectory of sourceDirectories) { + candidateLocations.push({ location, directory: sourceDirectory }); + } + } + } + + if (verbose()) + warn( + `[location-context] Candidate locations: ${candidateLocations + .map((loc) => loc.location.toString()) + .join(', ')}` + ); + + for (const { location, directory } of candidateLocations) { + let pathTokens: string[] = []; + + if (isAbsolute(location.path)) pathTokens = [location.path]; + else if (directory) pathTokens = [directory, location.path].filter(Boolean); + + const path = join(...pathTokens); + if (!(await exists(path))) { + if (verbose()) warn(`[location-context] Skipping non-existent location: ${path}`); + continue; + } + if (!(await isFile(path))) { + if (verbose()) warn(`[location-context] Skipping non-file location: ${path}`); + continue; + } + + let contents: string | undefined; + try { + contents = await readFile(path, 'utf8'); + } catch (e) { + warn(`[location-context] Failed to read file: ${path}`); + continue; + } + + if (verbose()) + warn( + `[location-context] Extracting snippet for location: ${location.toString()} (${ + contents.length + } bytes)` + ); + + const snippet = location.snippet(contents); + result.push({ + type: ContextV2.ContextItemType.CodeSnippet, + content: snippet, + location: location.toString(), + directory, + }); + } + + return result; +} diff --git a/packages/cli/src/rpc/explain/collect-search-context.ts b/packages/cli/src/rpc/explain/collect-search-context.ts new file mode 100644 index 0000000000..60bc4015f2 --- /dev/null +++ b/packages/cli/src/rpc/explain/collect-search-context.ts @@ -0,0 +1,155 @@ +import { log } from 'console'; + +import { ContextV2, applyContext } from '@appland/navie'; +import { SearchRpc } from '@appland/rpc'; + +import { DEFAULT_MAX_DIAGRAMS } from '../search/search'; +import { SearchResponse as AppMapSearchResponse } from './index/appmap-match'; +import { searchAppMapFiles } from './index/appmap-file-index'; +import { searchProjectFiles } from './index/project-file-index'; +import { + buildProjectFileSnippetIndex, + snippetContextItem, +} from './index/project-file-snippet-index'; + +type ContextCandidate = { + results: SearchRpc.SearchResult[]; + context: ContextV2.ContextResponse; + contextSize: number; +}; + +export type SearchContextRequest = { + appmaps?: string[]; + excludePatterns?: RegExp[]; + includePatterns?: RegExp[]; + includeTypes?: ContextV2.ContextItemType[]; +}; + +export default async function collectSearchContext( + appmapDirectories: string[], + sourceDirectories: string[], + vectorTerms: string[], + charLimit: number, + request: SearchContextRequest = {} +): Promise<{ + searchResponse: SearchRpc.SearchResponse; + context: ContextV2.ContextResponse; +}> { + let appmapSearchResponse: AppMapSearchResponse; + if (request.appmaps) { + const results = request.appmaps + .map((appmap) => { + const directory = appmapDirectories.find((dir) => appmap.startsWith(dir)); + if (!directory) return undefined; + + return { + appmap, + directory, + score: 1, + }; + }) + .filter(Boolean) as SearchRpc.SearchResult[]; + appmapSearchResponse = { + type: 'appmap', + stats: { + max: 1, + mean: 1, + median: 1, + stddev: 0, + }, + results, + numResults: results.length, + }; + } else { + const selectedAppMaps = await searchAppMapFiles( + appmapDirectories, + vectorTerms, + DEFAULT_MAX_DIAGRAMS + ); + + appmapSearchResponse = { + results: selectedAppMaps.results, + numResults: selectedAppMaps.results.length, + stats: selectedAppMaps.stats, + type: 'appmap', + }; + + log(`[search-context] Matched ${selectedAppMaps.results.length} AppMaps.`); + } + + const fileSearchResults = await searchProjectFiles( + sourceDirectories, + request.includePatterns, + request.excludePatterns, + vectorTerms + ); + + const snippetIndex = await buildProjectFileSnippetIndex( + fileSearchResults, + appmapSearchResponse.results + ); + let contextCandidate: ContextCandidate; + try { + let charCount = 0; + let maxSnippets = 50; + log(`[search-context] Requested char limit: ${charLimit}`); + for (;;) { + log(`[search-context] Collecting context with ${maxSnippets} events per diagram.`); + + // Collect all events from AppMaps and use them to build the sequence diagram + // The unsolved part here is getting event ids from code snippets that are associated with + // AppMap events, because this association is not yet implemented. + + // const codeSnippets = new Array(); + // TODO: Apply this.includeTypes + + const snippetSearchResults = snippetIndex.index.searchSnippets( + vectorTerms.join(' OR '), + maxSnippets + ); + const context: ContextV2.ContextItem[] = []; + for (const result of snippetSearchResults) { + const contextItem = snippetContextItem(result); + if (contextItem) context.push(contextItem); + } + + const appmapSearchResults: SearchRpc.SearchResult[] = appmapSearchResponse.results.map( + (result) => ({ + appmap: result.appmap, + directory: result.directory, + score: result.score, + events: [], + }) + ); + + contextCandidate = { + results: appmapSearchResults, + context, + contextSize: snippetSearchResults.reduce((acc, result) => acc + result.content.length, 0), + }; + + const appliedContext = applyContext(contextCandidate.context, charLimit); + const appliedContextSize = appliedContext.reduce((acc, item) => acc + item.content.length, 0); + contextCandidate.context = appliedContext; + contextCandidate.contextSize = appliedContextSize; + log(`[search-context] Collected an estimated ${appliedContextSize} characters.`); + + if (appliedContextSize === charCount || appliedContextSize > charLimit) { + break; + } + charCount = appliedContextSize; + maxSnippets = Math.ceil(maxSnippets * 1.5); + log(`[search-context] Increasing max events per diagram to ${maxSnippets}.`); + } + } finally { + snippetIndex.close(); + } + + return { + searchResponse: { + results: contextCandidate.results, + numResults: appmapSearchResponse.numResults, + }, + context: contextCandidate.context, + }; +} diff --git a/packages/cli/src/rpc/explain/collect-snippets.ts b/packages/cli/src/rpc/explain/collect-snippets.ts deleted file mode 100644 index 1f963952f8..0000000000 --- a/packages/cli/src/rpc/explain/collect-snippets.ts +++ /dev/null @@ -1,24 +0,0 @@ -import { ContextV2 } from '@appland/navie'; -import { parseFileChunkSnippetId, SnippetIndex, SnippetSearchResult } from '@appland/search'; -import { CHARS_PER_SNIPPET } from './collectContext'; - -export default function collectSnippets( - snippetIndex: SnippetIndex, - query: string, - charLimit: number -): ContextV2.ContextResponse { - const snippets = snippetIndex.searchSnippets(query, Math.round(charLimit / CHARS_PER_SNIPPET)); - - const buildLocation = (result: SnippetSearchResult) => { - const snippetId = parseFileChunkSnippetId(result.snippetId); - const { filePath, startLine } = snippetId; - return [filePath, startLine].filter(Boolean).join(':'); - }; - - return snippets.map((snippet) => ({ - directory: snippet.directory, - type: ContextV2.ContextItemType.CodeSnippet, - content: snippet.content, - location: buildLocation(snippet), - })); -} diff --git a/packages/cli/src/rpc/explain/collectContext.ts b/packages/cli/src/rpc/explain/collectContext.ts deleted file mode 100644 index 682f67faaf..0000000000 --- a/packages/cli/src/rpc/explain/collectContext.ts +++ /dev/null @@ -1,159 +0,0 @@ -import { ContextV2 } from '@appland/navie'; -import { SearchRpc } from '@appland/rpc'; -import { queryKeywords } from '@appland/search'; - -import Location from './location'; -import SearchContextCollector from './SearchContextCollector'; -import LocationContextCollector from './LocationContextCollector'; -import { warn } from 'console'; - -export const buildExclusionPattern = (dirName: string): RegExp => { - const dirNamePattern = dirName.replace('.', '\\.'); - return new RegExp(`(^|[/\\\\])${dirNamePattern}([/\\\\]|$)`); -}; - -const EXCLUDE_DIRS = ['.appmap', '.navie', '.yarn', 'venv', '.venv', 'node_modules', 'vendor']; - -export const CHARS_PER_SNIPPET = 50; - -export class ContextCollector { - public appmaps: string[] | undefined; - public excludePatterns: RegExp[] | undefined; - public includePatterns: RegExp[] | undefined; - public includeTypes: ContextV2.ContextItemType[] | undefined; - public locations: Location[] | undefined; - - query: string; - vectorTerms: string[]; - - constructor( - private appmapDirectories: string[], - private sourceDirectories: string[], - vectorTerms: string[], - private charLimit: number - ) { - this.vectorTerms = vectorTerms.map((term) => term.trim()).filter(Boolean); - this.query = vectorTerms.join(' '); - } - - async collectContext(): Promise<{ - searchResponse: SearchRpc.SearchResponse; - context: ContextV2.ContextResponse; - }> { - const result: { searchResponse: SearchRpc.SearchResponse; context: ContextV2.ContextResponse } = - { searchResponse: { results: [], numResults: 0 }, context: [] }; - const mergeSearchResults = (searchResult: { - searchResponse: SearchRpc.SearchResponse; - context: ContextV2.ContextResponse; - }) => { - result.searchResponse.results = result.searchResponse.results.concat( - searchResult.searchResponse.results - ); - result.searchResponse.numResults += searchResult.searchResponse.numResults; - result.context = result.context.concat(searchResult.context); - }; - - if (this.locations && this.locations.length > 0) { - const locationContextCollector = new LocationContextCollector( - this.sourceDirectories, - this.locations - ); - const locationResult = await locationContextCollector.collectContext(); - mergeSearchResults(locationResult); - } - - if (this.vectorTerms.length > 0 && this.charLimit > 0) { - const searchContextCollector = new SearchContextCollector( - this.appmapDirectories, - this.sourceDirectories, - this.appmaps, - this.vectorTerms, - this.charLimit - ); - if (this.includePatterns) searchContextCollector.includePatterns = this.includePatterns; - if (this.excludePatterns) searchContextCollector.excludePatterns = this.excludePatterns; - if (this.includeTypes) searchContextCollector.includeTypes = this.includeTypes; - - const searchResult = await searchContextCollector.collectContext(); - mergeSearchResults(searchResult); - } - - return result; - } -} - -export default async function collectContext( - appmapDirectories: string[], - sourceDirectories: string[], - appmaps: string[] | undefined, - searchTerms: string[], - charLimit: number, - filters: ContextV2.ContextFilters -): Promise<{ - searchResponse: SearchRpc.SearchResponse; - context: ContextV2.ContextResponse; -}> { - const keywords = searchTerms.map((term) => queryKeywords(term)).flat(); - - // recent?: boolean; - // locations?: string[]; - // itemTypes?: ContextItemType[]; - // labels?: ContextLabel[]; - // exclude?: string[]; - // include?: string[]; - - const contextParameters: Record = { - sourceDirectories: sourceDirectories.join(', '), - charLimit, - }; - if (appmapDirectories.length > 0) - contextParameters.appmapDirectories = appmapDirectories.join(', '); - if (keywords.length > 0) contextParameters.keywords = keywords.join(', '); - if (appmaps && appmaps.length > 0) contextParameters.appmaps = appmaps.join(', '); - if (filters.recent) contextParameters.recent = filters.recent; - if (filters.locations) contextParameters.locations = filters.locations.join(', '); - if (filters.itemTypes) contextParameters.itemTypes = filters.itemTypes.join(', '); - if (filters.labels && filters.labels.length > 0) - contextParameters.labels = filters.labels - .map((label) => `${label.name}(${label.weight})`) - .join(', '); - if (filters.exclude) contextParameters.exclude = filters.exclude.join(', '); - if (filters.include) contextParameters.include = filters.include.join(', '); - - const contextDebugString = Object.entries(contextParameters) - .map(([key, value]) => `${key}: ${value}`) - .join(', '); - warn(`Collecting context with parameters: ${contextDebugString}`); - - const contextCollector = new ContextCollector( - appmapDirectories, - sourceDirectories, - keywords, - charLimit - ); - if (appmaps) contextCollector.appmaps = appmaps; - - const excludePatterns: RegExp[] = []; - if (filters?.exclude) - excludePatterns.push(...filters.exclude.map((pattern) => new RegExp(pattern))); - if (filters?.include) - contextCollector.includePatterns = filters.include.map((pattern) => new RegExp(pattern)); - if (filters?.itemTypes) contextCollector.includeTypes = filters.itemTypes.map((type) => type); - if (filters?.locations) { - contextCollector.locations = filters.locations - .map((location) => Location.parse(location)) - .filter(Boolean) as Location[]; - warn(`Parsed locations: ${contextCollector.locations.map((loc) => loc.toString()).join(', ')}`); - } - - const appendIfNotExists = (patterns: RegExp[], pattern: RegExp): RegExp[] => { - if (!patterns.find((p) => p.source === pattern.source)) patterns.push(pattern); - return patterns; - }; - - for (const dir of EXCLUDE_DIRS) appendIfNotExists(excludePatterns, buildExclusionPattern(dir)); - - contextCollector.excludePatterns = excludePatterns; - - return await contextCollector.collectContext(); -} diff --git a/packages/cli/src/rpc/explain/explain.ts b/packages/cli/src/rpc/explain/explain.ts index 62f8ce710f..0c9f10b6c8 100644 --- a/packages/cli/src/rpc/explain/explain.ts +++ b/packages/cli/src/rpc/explain/explain.ts @@ -9,7 +9,7 @@ import { ContextV2, Help, ProjectInfo, UserContext } from '@appland/navie'; import { ExplainRpc } from '@appland/rpc'; import { warn } from 'console'; import EventEmitter from 'events'; -import { basename, join } from 'path'; +import { basename } from 'path'; import { LRUCache } from 'lru-cache'; import detectAIEnvVar from '../../cmds/index/aiEnvVar'; @@ -18,13 +18,12 @@ import collectProjectInfos from '../../cmds/navie/projectInfo'; import configuration, { AppMapDirectory } from '../configuration'; import { getLLMConfiguration } from '../llmConfiguration'; import { RpcError, RpcHandler } from '../rpc'; -import collectContext from './collectContext'; +import collectContext, { buildContextRequest } from './collect-context'; import { initializeHistory } from './navie/historyHelper'; import { ThreadAccessError } from './navie/ihistory'; import INavie, { INavieProvider } from './navie/inavie'; import reportFetchError from './navie/report-fetch-error'; import Thread from './navie/thread'; -import handleReview from './review'; const searchStatusByUserMessageId = new Map(); @@ -149,7 +148,7 @@ export class Explain extends EventEmitter { // The meaning of tokenCount is "try and get at least this many tokens" const charLimit = tokenCount * 3; - const searchResult = await collectContext( + const contextRequest = buildContextRequest( this.appmapDirectories.map((dir) => dir.directory), this.projectDirectories, this.appmaps, @@ -158,6 +157,15 @@ export class Explain extends EventEmitter { data ); + const searchResult = await collectContext( + this.appmapDirectories.map((dir) => dir.directory), + this.projectDirectories, + charLimit, + contextRequest.vectorTerms, + contextRequest.request + ); + + // TODO: Append this result rather than over-writing, to allow Navie to request context more than once. this.status.searchResponse = searchResult.searchResponse; this.status.contextResponse = searchResult.context; @@ -373,15 +381,6 @@ const explainHandler: ( }); } } - - const { applied, userContext: newUserContext } = await handleReview( - options.question, - userContext - ); - if (applied) { - userContext = newUserContext; - } - return await explain( navieProvider, options.question, diff --git a/packages/cli/src/rpc/explain/index-files.ts b/packages/cli/src/rpc/explain/index-files.ts index 56e6914755..c904faf021 100644 --- a/packages/cli/src/rpc/explain/index-files.ts +++ b/packages/cli/src/rpc/explain/index-files.ts @@ -5,13 +5,41 @@ import { buildFileIndex, FileIndex, fileTokens, + FilterFn, + isBinaryFile, + isDataFile, + isLargeFile, listProjectFiles, readFileSafe, } from '@appland/search'; -import fileFilter from './fileFilter'; +import { fileNameMatchesFilterPatterns } from './index/filter-patterns'; const debug = makeDebug('appmap:rpc:explain:index-files'); +function fileFilter( + includePatterns: RegExp[] | undefined, + excludePatterns: RegExp[] | undefined +): FilterFn { + return async (path: string) => { + debug('Filtering: %s', path); + if (isBinaryFile(path)) { + debug('Skipping binary file: %s', path); + return false; + } + + const includeFile = fileNameMatchesFilterPatterns(path, includePatterns, excludePatterns); + if (!includeFile) return false; + + const isData = isDataFile(path); + if (isData && (await isLargeFile(path))) { + debug('Skipping large data file: %s', path); + return false; + } + + return true; + }; +} + export default async function indexFiles( db: sqlite3.Database, directories: string[], diff --git a/packages/cli/src/rpc/explain/index-snippets.ts b/packages/cli/src/rpc/explain/index-snippets.ts deleted file mode 100644 index ceb2d58f01..0000000000 --- a/packages/cli/src/rpc/explain/index-snippets.ts +++ /dev/null @@ -1,21 +0,0 @@ -import { - buildSnippetIndex, - FileSearchResult, - fileTokens, - langchainSplitter, - readFileSafe, - SnippetIndex, -} from '@appland/search'; -import sqlite3 from 'better-sqlite3'; - -export default async function indexSnippets( - db: sqlite3.Database, - fileSearchResults: FileSearchResult[] -): Promise { - const splitter = langchainSplitter; - - const snippetIndex = new SnippetIndex(db); - await buildSnippetIndex(snippetIndex, fileSearchResults, readFileSafe, splitter, fileTokens); - - return snippetIndex; -} diff --git a/packages/cli/src/rpc/explain/index/appmap-file-index.ts b/packages/cli/src/rpc/explain/index/appmap-file-index.ts new file mode 100644 index 0000000000..e49dc74fa6 --- /dev/null +++ b/packages/cli/src/rpc/explain/index/appmap-file-index.ts @@ -0,0 +1,31 @@ +import sqlite3 from 'better-sqlite3'; + +import { FileIndex } from '@appland/search'; + +import buildIndexInTempDir, { CloseableIndex } from './build-index-in-temp-dir'; +import { buildAppMapIndex, search } from './appmap-index'; +import { SearchResponse } from './appmap-match'; + +export async function buildAppMapFileIndex( + appmapDirectories: string[] +): Promise> { + return await buildIndexInTempDir('appmaps', async (indexFile) => { + const db = new sqlite3(indexFile); + const fileIndex = new FileIndex(db); + await buildAppMapIndex(fileIndex, appmapDirectories); + return fileIndex; + }); +} + +export async function searchAppMapFiles( + appmapDirectories: string[], + vectorTerms: string[], + maxDiagrams: number +): Promise { + const index = await buildAppMapFileIndex(appmapDirectories); + try { + return await search(index.index, vectorTerms.join(' OR '), maxDiagrams); + } finally { + index.close(); + } +} diff --git a/packages/cli/src/rpc/explain/index/appmap-index.ts b/packages/cli/src/rpc/explain/index/appmap-index.ts index 6dd598eaa8..a29221bd75 100644 --- a/packages/cli/src/rpc/explain/index/appmap-index.ts +++ b/packages/cli/src/rpc/explain/index/appmap-index.ts @@ -16,10 +16,10 @@ import { } from './appmap-match'; import loadAppMapConfig from '../../../lib/loadAppMapConfig'; -type ClassMapEntry = { +export type ClassMapEntry = { name: string; type: string; - labels: string[]; + labels?: string[]; children: ClassMapEntry[]; static?: boolean; sourceLocation?: string; @@ -48,6 +48,29 @@ export async function listAppMaps(directory: string): Promise { return appmapFiles.map(relativeToPath); } +export async function readIndexFile( + appmapName: string, + indexName: string +): Promise { + const indexFile = join(appmapName, [indexName, '.json'].join('')); + let indexStr: string; + try { + indexStr = await readFile(indexFile, 'utf-8'); + } catch (e) { + if (isNativeError(e) && !isNodeError(e, 'ENOENT')) { + warn(`Error reading metadata file ${indexFile}: ${e.message}`); + } + return undefined; + } + + try { + return JSON.parse(indexStr) as T; + } catch (e) { + const errorMessage = isNativeError(e) ? e.message : String(e); + warn(`Error parsing metadata file ${indexFile}: ${errorMessage}`); + } +} + /** * Read all content for an AppMap. For efficiency, utilizes the AppMap index files, rather * than reading the entire AppMap file directly. @@ -55,36 +78,16 @@ export async function listAppMaps(directory: string): Promise { export async function readAppMapContent(appmapFile: string): Promise { const appmapName = appmapFile.replace(/\.appmap\.json$/, ''); - async function readIndexFile(name: string): Promise { - const indexFile = join(appmapName, [name, '.json'].join('')); - let indexStr: string; - try { - indexStr = await readFile(indexFile, 'utf-8'); - } catch (e) { - if (isNativeError(e) && !isNodeError(e, 'ENOENT')) { - warn(`Error reading metadata file ${indexFile}: ${e.message}`); - } - return undefined; - } - - try { - return JSON.parse(indexStr) as T; - } catch (e) { - const errorMessage = isNativeError(e) ? e.message : String(e); - warn(`Error parsing metadata file ${indexFile}: ${errorMessage}`); - } - } - const appmapWords = new Array(); - const metadata = await readIndexFile('metadata'); + const metadata = await readIndexFile(appmapName, 'metadata'); if (metadata) { appmapWords.push(metadata.name); if (metadata.labels) appmapWords.push(...metadata.labels); if (metadata.exception) appmapWords.push(metadata.exception.message); } - const classMap = (await readIndexFile('classMap')) ?? []; + const classMap = (await readIndexFile(appmapName, 'classMap')) ?? []; const queries = new Array(); const codeObjects = new Array(); @@ -119,7 +122,7 @@ export async function readAppMapContent(appmapFile: string): Promise { classMap.forEach((co) => collectClassMapEntry(co)); appmapWords.push(...queries, ...codeObjects, ...routes, ...externalRoutes); - const parameters = (await readIndexFile('canonical.parameters')) ?? []; + const parameters = (await readIndexFile(appmapName, 'canonical.parameters')) ?? []; appmapWords.push(...parameters); appmapWords.push(...types); diff --git a/packages/cli/src/rpc/explain/build-index-in-temp-dir.ts b/packages/cli/src/rpc/explain/index/build-index-in-temp-dir.ts similarity index 96% rename from packages/cli/src/rpc/explain/build-index-in-temp-dir.ts rename to packages/cli/src/rpc/explain/index/build-index-in-temp-dir.ts index 8eea3e76b1..590283dbf7 100644 --- a/packages/cli/src/rpc/explain/build-index-in-temp-dir.ts +++ b/packages/cli/src/rpc/explain/index/build-index-in-temp-dir.ts @@ -10,7 +10,7 @@ export interface Closeable { close(): void; } -type CloseableIndex = { +export type CloseableIndex = { index: T; close: () => void; }; diff --git a/packages/cli/src/rpc/explain/index/index-events.ts b/packages/cli/src/rpc/explain/index/index-events.ts new file mode 100644 index 0000000000..76ae35ecb9 --- /dev/null +++ b/packages/cli/src/rpc/explain/index/index-events.ts @@ -0,0 +1,90 @@ +import { queryKeywords, SnippetId, SnippetIndex } from '@appland/search'; +import { warn } from 'console'; +import crypto from 'crypto'; + +import { SearchResult } from './appmap-match'; +import { ClassMapEntry, readIndexFile } from './appmap-index'; + +function hexDigest(input: string): string { + const hash = crypto.createHash('sha256'); + hash.update(input); + return hash.digest('hex'); +} + +async function indexAppMapEvents( + snippetIndex: SnippetIndex, + directory: string, + appmapFile: string +): Promise { + const appmapName = appmapFile.endsWith('.appmap.json') + ? appmapFile.slice(0, -'.appmap.json'.length) + : appmapFile; + const classMap = await readIndexFile(appmapName, 'classMap'); + if (!classMap) { + warn(`[index-events] No class map found for ${appmapName}`); + return; + } + + const indexCodeObject = (type: string, id: string, content: string, ...tags: string[]) => { + const words = [content, ...tags]; + const wordList = queryKeywords(words); + + const snippetId: SnippetId = { + type, + id, + }; + + // TODO: Include event id in the snippet id? + snippetIndex.indexSnippet(snippetId, directory, '', wordList.join(' '), content); + }; + + const boostCodeObject = (location: string) => { + const snippetId: SnippetId = { + type: 'code-snippet', + id: location, + }; + snippetIndex.boostSnippet(snippetId, 2); + }; + + const indexClassMapEntry = (cme: ClassMapEntry) => { + let id: string | undefined; + let tags: string[] = []; + if (cme.type === 'query') { + id = hexDigest(cme.name); + // TODO: We really want an event id for this code object. + // TODO: Include an index file that maps fqids to event ids? + // sequence.json does have the fqid -> event id mapping, but it's not + // in the index by default. + // TODO: Can we just link over to the appmap by fqid? + // Yes it can definitely be done. + tags = ['sql', 'query', 'database']; + } else if (cme.type === 'route') { + id = cme.name; + tags = ['route', 'request', 'server', 'http']; + } else if (cme.type === 'external-route') { + id = cme.name; + tags = ['route', 'request', 'client', 'http']; + } + + if (id) indexCodeObject(cme.type, id, cme.name, ...tags); + + if (cme.sourceLocation) { + // TODO: Which event ids should this be associated with? + boostCodeObject(cme.sourceLocation); + } + + cme.children?.forEach((child) => { + indexClassMapEntry(child); + }); + }; + classMap.forEach((co) => indexClassMapEntry(co)); +} + +export default async function indexEvents( + snippetIndex: SnippetIndex, + appmapSearchResults: SearchResult[] +): Promise { + for (const { directory, appmap } of appmapSearchResults) { + await indexAppMapEvents(snippetIndex, directory, appmap); + } +} diff --git a/packages/cli/src/rpc/explain/index/project-file-index.ts b/packages/cli/src/rpc/explain/index/project-file-index.ts new file mode 100644 index 0000000000..b68d4589e1 --- /dev/null +++ b/packages/cli/src/rpc/explain/index/project-file-index.ts @@ -0,0 +1,83 @@ +import sqlite3 from 'better-sqlite3'; +import makeDebug from 'debug'; + +import { + buildFileIndex, + FileIndex, + FileSearchResult, + fileTokens, + FilterFn, + isBinaryFile, + isDataFile, + isLargeFile, + listProjectFiles, + readFileSafe, +} from '@appland/search'; +import { fileNameMatchesFilterPatterns } from './filter-patterns'; + +import buildIndexInTempDir, { CloseableIndex } from './build-index-in-temp-dir'; + +const debug = makeDebug('appmap:index:project-files'); + +function fileFilter( + includePatterns: RegExp[] | undefined, + excludePatterns: RegExp[] | undefined +): FilterFn { + return async (path: string) => { + debug('Filtering: %s', path); + if (isBinaryFile(path)) { + debug('Skipping binary file: %s', path); + return false; + } + + const includeFile = fileNameMatchesFilterPatterns(path, includePatterns, excludePatterns); + if (!includeFile) return false; + + const isData = isDataFile(path); + if (isData && (await isLargeFile(path))) { + debug('Skipping large data file: %s', path); + return false; + } + + return true; + }; +} + +async function indexFiles( + db: sqlite3.Database, + directories: string[], + includePatterns: RegExp[] | undefined, + excludePatterns: RegExp[] | undefined +): Promise { + const fileIndex = new FileIndex(db); + + const filter = fileFilter(includePatterns, excludePatterns); + await buildFileIndex(fileIndex, directories, listProjectFiles, filter, readFileSafe, fileTokens); + + return fileIndex; +} + +export async function buildProjectFileIndex( + sourceDirectories: string[], + includePatterns: RegExp[] | undefined, + excludePatterns: RegExp[] | undefined +): Promise> { + return await buildIndexInTempDir('files', async (indexFile) => { + const db = new sqlite3(indexFile); + return await indexFiles(db, sourceDirectories, includePatterns, excludePatterns); + }); +} + +export async function searchProjectFiles( + sourceDirectories: string[], + includePatterns: RegExp[] | undefined, + excludePatterns: RegExp[] | undefined, + vectorTerms: string[] +): Promise { + const index = await buildProjectFileIndex(sourceDirectories, includePatterns, excludePatterns); + try { + return index.index.search(vectorTerms.join(' OR ')); + } finally { + index.close(); + } +} diff --git a/packages/cli/src/rpc/explain/index/project-file-snippet-index.ts b/packages/cli/src/rpc/explain/index/project-file-snippet-index.ts new file mode 100644 index 0000000000..a15c547c25 --- /dev/null +++ b/packages/cli/src/rpc/explain/index/project-file-snippet-index.ts @@ -0,0 +1,87 @@ +import sqlite3 from 'better-sqlite3'; +import { warn } from 'console'; + +import { ContextV2 } from '@appland/navie'; +import { + buildSnippetIndex, + FileSearchResult, + fileTokens, + langchainSplitter, + readFileSafe, + SnippetIndex, + SnippetSearchResult, +} from '@appland/search'; + +import buildIndexInTempDir, { CloseableIndex } from './build-index-in-temp-dir'; +import indexEvents from './index-events'; +import { SearchResult } from './appmap-match'; +import appmapLocation from '../appmap-location'; + +export function snippetContextItem( + snippet: SnippetSearchResult +): ContextV2.ContextItem | ContextV2.FileContextItem | undefined { + const { snippetId, directory, score, content } = snippet; + + const { type: snippetIdType, id: snippetIdValue } = snippetId; + + let location: string | undefined; + if (snippetIdType === 'code-snippet') location = snippetIdValue; + + const eventIds: number[] = []; + + switch (snippetId.type) { + case 'query': + case 'route': + case 'external-route': + // TODO: Collect event ids from these. + return { + type: ContextV2.ContextItemType.DataRequest, + content, + directory, + score, + // TODO: Add location + // location: appmapLocation(result.appmap, eventId), + }; + case 'code-snippet': + // TODO: Collect event ids from these. + return { + type: ContextV2.ContextItemType.CodeSnippet, + content, + directory, + score, + location, + }; + default: + warn(`[search-context] Unknown snippet type: ${snippetId.type}`); + + // TODO: Collect all matching events, then build a sequence diagram + // case 'event': + // return await buildSequenceDiagram(snippet); + // default: + // codeSnippets.push(snippet); + } +} + +export async function buildProjectFileSnippetIndex( + fileSearchResults: FileSearchResult[], + appmapSearchResults: SearchResult[] +): Promise> { + const indexSnippets = async ( + db: sqlite3.Database, + fileSearchResults: FileSearchResult[] + ): Promise => { + const splitter = langchainSplitter; + + const snippetIndex = new SnippetIndex(db); + await buildSnippetIndex(snippetIndex, fileSearchResults, readFileSafe, splitter, fileTokens); + + return snippetIndex; + }; + + return buildIndexInTempDir('snippets', async (indexFile) => { + const db = new sqlite3(indexFile); + const snippetIndex = await indexSnippets(db, fileSearchResults); + await indexEvents(snippetIndex, appmapSearchResults); + return snippetIndex; + }); +} diff --git a/packages/cli/src/rpc/explain/lookupSourceCode.ts b/packages/cli/src/rpc/explain/lookupSourceCode.ts deleted file mode 100644 index 16b600b80a..0000000000 --- a/packages/cli/src/rpc/explain/lookupSourceCode.ts +++ /dev/null @@ -1,105 +0,0 @@ -import chalk from 'chalk'; -import { warn } from 'console'; -import { readFile } from 'fs/promises'; -import { glob } from 'glob'; - -import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter'; -import { exists, verbose } from '../../utils'; -import { promisify } from 'util'; -import parseLocation from './parseLocation'; - -export const LANGUAGE_BY_FILE_EXTENSION: Record = - { - '.js': 'js', - '.ts': 'js', - '.jsx': 'js', - '.tsx': 'js', - '.java': 'java', - '.py': 'python', - '.rb': 'ruby', - '.php': 'php', - }; - -// TODO: Look up different types of files -const scannedExtensions = new Set(); -const FILE_NAMES = new Set(); - -// TODO: Return source code up to the next location in the class map. -// TODO: Reverse-strip comment that follow the function. -export default async function lookupSourceCode( - directory: string, - location: string -): Promise { - const parsedLocation = parseLocation(location); - if (!parsedLocation) return; - - const [requestedFileName, lineNo] = parsedLocation; - - if (verbose()) warn(chalk.gray(`Looking up source code for ${location}`)); - - const extension = requestedFileName.slice(requestedFileName.lastIndexOf('.')); - - if (!scannedExtensions.has(extension)) { - scannedExtensions.add(extension); - // dot: true is present to include the .tox directory for Python - const fileNames = await promisify(glob)(`${directory}/**/*${extension}`, { - dot: true, - ignore: [ - '**/node_modules/**', - '**/vendor/**', - 'tmp/**', - '**/build/**', - '**/dist/**', - '**/target/**', - '**/.git/**', - ], - }); - if (verbose()) - warn(chalk.gray(`Found ${fileNames.length} files with extension "${extension}"`)); - for (const fileName of fileNames) { - FILE_NAMES.add(fileName); - } - } - - const candidates = Array.from(FILE_NAMES).filter((candidate) => - candidate.endsWith(requestedFileName) - ); - if (candidates.length === 0) { - warn(chalk.gray(`File not found in the workspace: ${requestedFileName}`)); - return; - } - candidates.sort((a, b) => a.length - b.length); - - const fileName = candidates[0]; - if (!(await exists(fileName))) { - warn(chalk.gray(`File ${fileName} does not exist`)); - return; - } - - const fileContent = await readFile(fileName, 'utf-8'); - if (!lineNo) return [fileContent]; - - if (lineNo <= 0) return [fileContent]; - - const fileExtension = fileName.slice(fileName.lastIndexOf('.')); - const language = LANGUAGE_BY_FILE_EXTENSION[fileExtension]; - let splitter: RecursiveCharacterTextSplitter; - if (language) { - splitter = RecursiveCharacterTextSplitter.fromLanguage(language, { - chunkOverlap: 0, - chunkSize: 500, - }); - } else { - splitter = new RecursiveCharacterTextSplitter({ - chunkOverlap: 0, - chunkSize: 250, - }); - } - - const chunks = await splitter.createDocuments([fileContent]); - const matches = chunks.filter( - (chunk) => chunk.metadata.loc.lines.from <= lineNo && chunk.metadata.loc.lines.to >= lineNo - ); - if (verbose()) warn(chalk.gray(`Obtained ${matches.length} source code chunks for ${location}`)); - return matches.map((match) => match.pageContent); -} diff --git a/packages/cli/src/rpc/explain/parseLocation.ts b/packages/cli/src/rpc/explain/parseLocation.ts deleted file mode 100644 index d237cdbf36..0000000000 --- a/packages/cli/src/rpc/explain/parseLocation.ts +++ /dev/null @@ -1,16 +0,0 @@ -import chalk from 'chalk'; -import { warn } from 'console'; - -export default function parseLocation(location: string): [string, number | undefined] | undefined { - if (!location.includes(':')) return [location, undefined]; - - const locationTest = /([^:]+):(\d+)$/.exec(location); - if (!locationTest) { - warn(chalk.gray(`Invalid location format: ${location}. Skipping file lookup.`)); - return; - } - - const [requestedFileName, lineNoStr] = locationTest.slice(1); - const lineNoReturned = lineNoStr ? parseInt(lineNoStr, 10) : undefined; - return [requestedFileName, lineNoReturned]; -} diff --git a/packages/cli/src/rpc/explain/review.ts b/packages/cli/src/rpc/explain/review.ts deleted file mode 100644 index 3b326ed96a..0000000000 --- a/packages/cli/src/rpc/explain/review.ts +++ /dev/null @@ -1,55 +0,0 @@ -import { parseOptions, REVIEW_DIFF_LOCATION, UserContext } from '@appland/navie'; -import configuration from '../configuration'; -import { execFile } from 'node:child_process'; - -const exec = (command: string, args: string[], options?: { cwd?: string }) => - new Promise((resolve, reject) => { - const child = execFile(command, args, { ...(options ?? {}) }); - - let stdout = ''; - child.stdout?.setEncoding('utf8'); - child.stdout?.on('data', (data: string) => { - stdout += data.toString(); - }); - - let stderr = ''; - child.stderr?.setEncoding('utf8'); - child.stderr?.on('data', (data: string) => { - stderr += data.toString(); - }); - - child.on('close', (code) => { - if (code === 0) resolve(stdout); - else reject(new Error(stderr)); - }); - }); - -/** - * This function is responsible for transforming user context to include diff content when the - * user has requested a review. In the event that the user has not requested a review, the function - * will not return any user context and `applied` will be set to false. - */ -export default async function handleReview( - question: string, - userContext?: UserContext.Context -): Promise<{ applied: boolean; userContext?: UserContext.Context }> { - const [mode] = question.split(/\s+/g); - if (mode !== '@review') return { applied: false }; - - const result = parseOptions(question); - const base = result.options.stringValue('base', 'main'); - const cwd = result.options.stringValue('project', configuration().projectDirectories[0]); - return { - applied: true, - userContext: [ - ...(typeof userContext === 'string' - ? [{ content: userContext, type: 'code-selection' } as UserContext.CodeSelectionItem] - : userContext ?? []), - { - type: 'code-snippet', - location: REVIEW_DIFF_LOCATION, // eslint-disable-line @typescript-eslint/no-unsafe-assignment - content: await exec('git', ['log', '-p', '--full-diff', `${base}..HEAD`], { cwd }), - }, - ], - }; -} diff --git a/packages/cli/src/rpc/explain/textSearchResultToRpcSearchResult.ts b/packages/cli/src/rpc/explain/textSearchResultToRpcSearchResult.ts index 50625bc010..f3d2e96625 100644 --- a/packages/cli/src/rpc/explain/textSearchResultToRpcSearchResult.ts +++ b/packages/cli/src/rpc/explain/textSearchResultToRpcSearchResult.ts @@ -13,4 +13,3 @@ export function textSearchResultToRpcSearchResult( if (eventResult.elapsed) result.elapsed = eventResult.elapsed; return result; } - diff --git a/packages/cli/src/rpc/navie/metadata.ts b/packages/cli/src/rpc/navie/metadata.ts index b2ace64477..d1577a82f9 100644 --- a/packages/cli/src/rpc/navie/metadata.ts +++ b/packages/cli/src/rpc/navie/metadata.ts @@ -39,18 +39,6 @@ export function navieMetadataV1(): RpcHandler< { name: '@test', description: 'Write tests for your code.', - referenceUrl: 'https://appmap.io/docs/navie-reference/navie-commands.html#test', - }, - { - name: '@search', - description: 'Search your codebase with Navie.', - referenceUrl: 'https://appmap.io/docs/navie-reference/navie-commands.html#review', - }, - { - name: '@review', - description: - 'Navie will provide feedback on the changes in your current branch. Use /base= to specify the base reference.', - referenceUrl: 'https://appmap.io/docs/navie-reference/navie-commands.html#search', }, { name: '@help', diff --git a/packages/cli/src/rpc/search/search.ts b/packages/cli/src/rpc/search/search.ts index 0232b0c440..dc37063271 100644 --- a/packages/cli/src/rpc/search/search.ts +++ b/packages/cli/src/rpc/search/search.ts @@ -8,10 +8,10 @@ import { SearchResponse } from '../explain/index/appmap-match'; import { search as searchAppMaps } from '../explain/index/appmap-index'; import searchSingleAppMap from '../../cmds/search/searchSingleAppMap'; import configuration, { AppMapDirectory } from '../configuration'; -import buildIndexInTempDir from '../explain/build-index-in-temp-dir'; +import buildIndexInTempDir from '../explain/index/build-index-in-temp-dir'; import { buildAppMapIndex } from '../explain/index/appmap-index'; -export const DEFAULT_MAX_DIAGRAMS = 10; +export const DEFAULT_MAX_DIAGRAMS = 3; export const DEFAULT_MAX_EVENTS_PER_DIAGRAM = 100; export const DEFAULT_MAX_FILES = 10; diff --git a/packages/cli/tests/unit/readAppMapContent.spec.ts b/packages/cli/tests/unit/readAppMapContent.spec.ts new file mode 100644 index 0000000000..4a64da2508 --- /dev/null +++ b/packages/cli/tests/unit/readAppMapContent.spec.ts @@ -0,0 +1,42 @@ +import { vol } from 'memfs'; +import { readAppMapContent } from '../../src/rpc/explain/index/appmap-index'; +import { Metadata } from '@appland/models'; + +jest.mock('fs/promises', () => require('memfs').promises); + +describe('readAppMapContent', () => { + beforeEach(() => { + vol.reset(); + }); + + it('reads appmap content from index files', async () => { + const appmapName = '/appmaps/testAppMap'; + const metadata: Metadata = { + name: 'Test AppMap', + labels: ['test', 'appmap'], + exception: { class: 'Exception', message: 'Test exception' }, + client: { name: 'Test client', version: '1.0.0', url: 'http://test.com' }, + recorder: { name: 'Test recorder' }, + }; + const classMap = [ + { name: 'query1', type: 'query', labels: [], children: [] }, + { name: 'route1', type: 'route', labels: [], children: [] }, + ]; + + vol.fromJSON({ + [`${appmapName}/metadata.json`]: JSON.stringify(metadata), + [`${appmapName}/classMap.json`]: JSON.stringify(classMap), + [`${appmapName}/canonical.parameters.json`]: JSON.stringify(['param1', 'param2']), + }); + + const content = await readAppMapContent(`${appmapName}.appmap.json`); + expect(content).toContain('Test AppMap'); + expect(content).toContain('test'); + expect(content).toContain('appmap'); + expect(content).toContain('Test exception'); + expect(content).toContain('query1'); + expect(content).toContain('route1'); + expect(content).toContain('param1'); + expect(content).toContain('param2'); + }); +}); diff --git a/packages/cli/tests/unit/rpc/explain/ContextCollector.spec.ts b/packages/cli/tests/unit/rpc/explain/ContextCollector.spec.ts deleted file mode 100644 index ee2a1dcaf6..0000000000 --- a/packages/cli/tests/unit/rpc/explain/ContextCollector.spec.ts +++ /dev/null @@ -1,98 +0,0 @@ -import { ContextCollector } from '../../../../src/rpc/explain/collectContext'; -import * as SearchContextCollector from '../../../../src/rpc/explain/SearchContextCollector'; -import * as LocationContextCollector from '../../../../src/rpc/explain/LocationContextCollector'; -import * as navie from '@appland/navie'; -import Location from '../../../../src/rpc/explain/location'; - -jest.mock('@appland/navie'); -jest.mock('../../../../src/rpc/explain/SearchContextCollector'); -jest.mock('../../../../src/rpc/explain/LocationContextCollector'); - -describe('ContextCollector', () => { - const charLimit = 5000; - - beforeEach(() => { - jest.mocked(navie.applyContext).mockImplementation((context) => context); - }); - afterEach(() => jest.restoreAllMocks()); - - describe('vector term search', () => { - describe('with empty vector terms', () => { - it('returns an empty context', async () => { - const emptyVectorTerms = ['', ' ']; - - const contextCollector = new ContextCollector( - ['example'], - ['src'], - emptyVectorTerms, - charLimit - ); - const result = await contextCollector.collectContext(); - expect(result).toStrictEqual({ - searchResponse: { - results: [], - numResults: 0, - }, - context: [], - }); - - expect(SearchContextCollector.default).not.toHaveBeenCalled(); - expect(LocationContextCollector.default).not.toHaveBeenCalled(); - }); - }); - }); - - describe('with non-empty vector terms', () => { - it('invokes SearchContextCollector', async () => { - const vectorTerms = ['login', 'user']; - const contextCollector = new ContextCollector(['example'], ['src'], vectorTerms, charLimit); - - const searchConstructorSpy = jest.spyOn(SearchContextCollector, 'default'); - searchConstructorSpy.mockImplementation( - () => - ({ - collectContext: jest.fn().mockResolvedValue({ - searchResponse: { - results: [], - numResults: 0, - }, - context: [], - }), - } as unknown as SearchContextCollector.default) - ); - - await contextCollector.collectContext(); - expect(searchConstructorSpy).toHaveBeenCalledWith( - ['example'], - ['src'], - undefined, - vectorTerms, - charLimit - ); - }); - }); - describe('with locations specified', () => { - it('invokes LocationContextCollector', async () => { - const locations = ['file1.py']; - const contextCollector = new ContextCollector(['example'], ['src'], [], 0); - contextCollector.locations = locations.map((l) => Location.parse(l)) as Location[]; - - const locationConstructorSpy = jest.spyOn(LocationContextCollector, 'default'); - locationConstructorSpy.mockImplementation( - () => - ({ - collectContext: jest.fn().mockResolvedValue({ - searchResponse: { - results: [], - numResults: 0, - }, - context: [], - }), - } as unknown as LocationContextCollector.default) - ); - - await contextCollector.collectContext(); - expect(locationConstructorSpy).toHaveBeenCalledWith(['src'], contextCollector.locations); - }); - }); -}); diff --git a/packages/cli/tests/unit/rpc/explain/EventCollector.spec.ts b/packages/cli/tests/unit/rpc/explain/EventCollector.spec.ts deleted file mode 100644 index 98ccad0661..0000000000 --- a/packages/cli/tests/unit/rpc/explain/EventCollector.spec.ts +++ /dev/null @@ -1,119 +0,0 @@ -import { SearchRpc } from '@appland/rpc'; -import { join } from 'path'; - -import { textSearchResultToRpcSearchResult } from '../../../../src/rpc/explain/textSearchResultToRpcSearchResult'; -import buildContext from '../../../../src/rpc/explain/buildContext'; -import { SearchResponse as AppMapSearchResponse } from '../../../../src/rpc/explain/index/appmap-match'; -import FindEvents, { - SearchResponse as EventSearchResponse, -} from '../../../../src/fulltext/FindEvents'; -import EventCollector from '../../../../src/rpc/explain/EventCollector'; - -jest.mock('../../../../src/fulltext/FindEvents'); -jest.mock('../../../../src/rpc/explain/buildContext'); - -describe('EventCollector', () => { - const mockFindEventsResponses: EventSearchResponse[] = [ - { - type: 'event', - numResults: 1, - results: [ - { - appmap: 'appMapId1', - fqid: 'testFqid1', - score: 1, - eventIds: [1, 2, 3], - }, - ], - }, - { - type: 'event', - numResults: 1, - results: [ - { - appmap: 'appMapId2', - fqid: 'testFqid2', - score: 1, - eventIds: [2, 3, 4], - }, - ], - }, - ]; - - const oneSearchResponse: AppMapSearchResponse = { - type: 'appmap', - numResults: 1, - stats: { max: 1, mean: 1, median: 1, stddev: 0 }, - results: [{ appmap: 'appMapId1', directory: 'a', score: 1 }], - }; - - const multiSearchResponse: AppMapSearchResponse = { - type: 'appmap', - numResults: 2, // Indicating two appmaps are present - stats: { max: 1, mean: 1, median: 1, stddev: 0 }, - results: [ - { appmap: 'appMapId1', directory: 'a', score: 1 }, - { appmap: 'appMapId2', directory: 'b', score: 1 }, - ], - }; - - beforeEach(() => { - jest.mocked(FindEvents).prototype.initialize.mockResolvedValue(); - let mockFindEventsResponsesCopy = [...mockFindEventsResponses]; - jest - .mocked(FindEvents) - .prototype.search.mockImplementation(() => mockFindEventsResponsesCopy.shift()!); - jest.mocked(buildContext).mockResolvedValue([]); - }); - afterEach(() => jest.resetAllMocks()); - - it('correctly initializes and indexes app maps', async () => { - const collector = new EventCollector('query', oneSearchResponse); - await collector.collectEvents(10); - - const appmap = join('a', 'appMapId1'); - expect(FindEvents).toHaveBeenCalledWith(appmap); - expect(FindEvents.prototype.initialize).toHaveBeenCalled(); - expect(collector.appmapIndexes.has(appmap)).toBe(true); - }); - - it('collects events based on provided maxEvents', async () => { - const maxEvents = 10; - const collector = new EventCollector('query', oneSearchResponse); - const collectedData = await collector.collectEvents(maxEvents); - - expect(FindEvents.prototype.search).toHaveBeenCalledWith('query', { maxResults: maxEvents }); - expect(buildContext).toHaveBeenCalled(); - expect(collectedData.results[0].events).toEqual( - mockFindEventsResponses[0].results.map(textSearchResultToRpcSearchResult) - ); - }); - - it('collects events from multiple appmaps', async () => { - const maxEvents = 10; - const collector = new EventCollector('query', multiSearchResponse); - const collectedData = await collector.collectEvents(maxEvents); - - // Assume the findEvents method provides merged results from multiple appmaps - const expectedResponse: SearchRpc.SearchResponse = { - numResults: 2, - results: [ - { - appmap: join('a', 'appMapId1'), - directory: 'a', - score: 1, - events: mockFindEventsResponses[0].results.map(textSearchResultToRpcSearchResult), - }, - { - appmap: join('b', 'appMapId2'), - directory: 'b', - score: 1, - events: mockFindEventsResponses[1].results.map(textSearchResultToRpcSearchResult), - }, - ], - }; - - expect(FindEvents.prototype.search).toHaveBeenCalledTimes(multiSearchResponse.numResults); - expect(collectedData.results).toEqual(expectedResponse.results); - }); -}); diff --git a/packages/cli/tests/unit/rpc/explain/LocationContextCollector.spec.ts b/packages/cli/tests/unit/rpc/explain/LocationContextCollector.spec.ts deleted file mode 100644 index c818f4c1bf..0000000000 --- a/packages/cli/tests/unit/rpc/explain/LocationContextCollector.spec.ts +++ /dev/null @@ -1,90 +0,0 @@ -import * as fs from 'fs/promises'; -import * as utils from '../../../../src/utils'; - -import Location from '../../../../src/rpc/explain/location'; -import LocationContextCollector from '../../../../src/rpc/explain/LocationContextCollector'; - -jest.mock('fs/promises'); -// eslint-disable-next-line @typescript-eslint/no-unsafe-return -jest.mock('../../../../src/utils', () => ({ - ...jest.requireActual('../../../../src/utils'), - exists: jest.fn(), - isFile: jest.fn(), -})); - -describe('LocationContextCollector', () => { - const sourceDirectories = ['/src', '/lib']; - const locations: Location[] = [ - { path: 'file1.js', snippet: (contents: string) => contents.slice(0, 10) }, - { path: '/src/file2.js', snippet: (contents: string) => contents.slice(0, 10) }, - { path: '/other/file3.js', snippet: (contents: string) => contents.slice(0, 10) }, - ]; - - let collector: LocationContextCollector; - - beforeEach(() => (collector = new LocationContextCollector(sourceDirectories, locations))); - beforeEach(() => jest.resetAllMocks()); - - it('initializes correctly', () => { - expect(collector).toBeDefined(); - }); - - it('handles empty locations', async () => { - collector = new LocationContextCollector(sourceDirectories, []); - const result = await collector.collectContext(); - expect(result.context).toEqual([]); - expect(result.searchResponse.numResults).toBe(0); - }); - - it('handles valid locations', async () => { - jest.spyOn(utils, 'exists').mockResolvedValue(true); - jest.spyOn(utils, 'isFile').mockResolvedValue(true); - jest.spyOn(fs, 'readFile').mockResolvedValue('file contents'); - - const result = await collector.collectContext(); - expect(result.context.length).toBe(4); - expect(result.context[0].content).toBe('file conte'); - expect(result.context[1].content).toBe('file conte'); - expect(result.context[2].content).toBe('file conte'); - expect(result.context[3].content).toBe('file conte'); - - expect(utils.exists).toHaveBeenCalledTimes(4); - expect(utils.exists).toHaveBeenCalledWith('/src/file1.js'); - expect(utils.exists).toHaveBeenCalledWith('/lib/file1.js'); - expect(utils.exists).toHaveBeenCalledWith('/src/file2.js'); - expect(utils.exists).toHaveBeenCalledWith('/other/file3.js'); - }); - - it('handles non-file locations', async () => { - jest.spyOn(utils, 'exists').mockResolvedValue(true); - jest.spyOn(utils, 'isFile').mockResolvedValue(false); - - const result = await collector.collectContext(); - expect(result.context).toEqual([]); - }); - - it('handles non-existent files', async () => { - jest.spyOn(utils, 'exists').mockResolvedValue(false); - - const result = await collector.collectContext(); - expect(result.context).toEqual([]); - }); - - it('handles file reading errors', async () => { - jest.spyOn(utils, 'exists').mockResolvedValue(true); - jest.spyOn(utils, 'isFile').mockResolvedValue(true); - jest.spyOn(fs, 'readFile').mockRejectedValue(new Error('Read error')); - - const result = await collector.collectContext(); - expect(result.context).toEqual([]); - }); - - it('extracts snippets correctly', async () => { - jest.spyOn(utils, 'exists').mockResolvedValue(true); - jest.spyOn(utils, 'isFile').mockResolvedValue(true); - jest.spyOn(fs, 'readFile').mockResolvedValue('file contents'); - - const result = await collector.collectContext(); - expect(result.context[0].content).toBe('file conte'); - }); -}); diff --git a/packages/cli/tests/unit/rpc/explain/collect-context.spec.ts b/packages/cli/tests/unit/rpc/explain/collect-context.spec.ts new file mode 100644 index 0000000000..79f3e3cfc0 --- /dev/null +++ b/packages/cli/tests/unit/rpc/explain/collect-context.spec.ts @@ -0,0 +1,129 @@ +import * as collectSearchContext from '../../../../src/rpc/explain/collect-search-context'; +import * as collectLocationContext from '../../../../src/rpc/explain/collect-location-context'; +import collectContext, { + buildContextRequest, + ContextRequest, +} from '../../../../src/rpc/explain/collect-context'; +import Location from '../../../../src/rpc/explain/location'; + +jest.mock('../../../../src/rpc/explain/collect-search-context'); +jest.mock('../../../../src/rpc/explain/collect-location-context'); +jest.mock('@appland/navie'); + +describe('collect-context', () => { + afterEach(() => jest.resetAllMocks()); + afterEach(() => jest.restoreAllMocks()); + + describe('buildContextRequest', () => { + it('builds a context request', () => { + const request = buildContextRequest( + ['appmap-dir'], + ['src'], + ['appmap-a', 'appmap-b'], + ['login', 'the', 'user'], + 5000, + {} + ); + expect(request).toEqual({ + vectorTerms: ['login', 'user'], + request: { + appmaps: ['appmap-a', 'appmap-b'], + excludePatterns: [ + /(^|[/\\])\.appmap([/\\]|$)/, + /(^|[/\\])\.navie([/\\]|$)/, + /(^|[/\\])\.yarn([/\\]|$)/, + /(^|[/\\])venv([/\\]|$)/, + /(^|[/\\])\.venv([/\\]|$)/, + /(^|[/\\])node_modules([/\\]|$)/, + /(^|[/\\])vendor([/\\]|$)/, + ], + }, + }); + }); + }); + + describe('collectContext', () => { + const charLimit = 5000; + + describe('with empty vector terms', () => { + it('returns an empty context', async () => { + const emptyVectorTerms = []; + + const result = await collectContext( + ['appmap-dir'], + ['src'], + charLimit, + emptyVectorTerms, + {} + ); + expect(result).toStrictEqual({ + searchResponse: { + results: [], + numResults: 0, + }, + context: [], + }); + expect(collectLocationContext.default).not.toHaveBeenCalled(); + }); + }); + + describe('with vector terms', () => { + const appmapDirectories = ['dir1', 'dir2']; + const sourceDirectories = ['src1', 'src2']; + const vectorTerms = ['term1', 'term2']; + + it('should process vector terms and char limit correctly', async () => { + (collectSearchContext.default as jest.Mock).mockResolvedValue({ + searchResponse: { results: [], numResults: 2 }, + context: ['context1', 'context2'], + }); + + const request = { locations: [] }; + const result = await collectContext( + appmapDirectories, + sourceDirectories, + charLimit, + vectorTerms, + request + ); + + expect(collectSearchContext.default).toHaveBeenCalledWith( + appmapDirectories, + sourceDirectories, + vectorTerms, + charLimit, + request + ); + expect(collectLocationContext.default).not.toHaveBeenCalled(); + + expect(result.searchResponse.numResults).toBe(2); + expect(result.context).toEqual(['context1', 'context2']); + }); + }); + + describe('with locations specified', () => { + it('should process locations and char limit correctly', async () => { + (collectLocationContext.default as jest.Mock).mockResolvedValue(['context1', 'context2']); + + const request: ContextRequest = { + locations: [Location.parse('location1')!, Location.parse('location2')!], + }; + const result = await collectContext( + ['dir1', 'dir2'], + ['src1', 'src2'], + charLimit, + [], + request + ); + + expect(collectSearchContext.default).not.toHaveBeenCalled(); + expect(collectLocationContext.default).toHaveBeenCalledWith( + ['src1', 'src2'], + request.locations + ); + expect(result.searchResponse.numResults).toBe(0); + expect(result.context).toEqual(['context1', 'context2']); + }); + }); + }); +}); diff --git a/packages/cli/tests/unit/rpc/explain/collect-location-context.spec.ts b/packages/cli/tests/unit/rpc/explain/collect-location-context.spec.ts new file mode 100644 index 0000000000..0466fc5f75 --- /dev/null +++ b/packages/cli/tests/unit/rpc/explain/collect-location-context.spec.ts @@ -0,0 +1,88 @@ +import * as fs from 'fs/promises'; +import * as utils from '../../../../src/utils'; + +import Location from '../../../../src/rpc/explain/location'; +import collectLocationContext from '../../../../src/rpc/explain/collect-location-context'; + +jest.mock('fs/promises'); +// eslint-disable-next-line @typescript-eslint/no-unsafe-return +jest.mock('../../../../src/utils', () => ({ + ...jest.requireActual('../../../../src/utils'), + exists: jest.fn(), + isFile: jest.fn(), +})); + +describe('collectLocationContext', () => { + const sourceDirectories = ['/src', '/lib']; + + beforeEach(() => jest.resetAllMocks()); + + describe('with empty locations', () => { + it('handles empty locations', async () => { + const result = await collectLocationContext(sourceDirectories, []); + expect(result).toEqual([]); + }); + }); + + describe('with valid locations', () => { + const locations: Location[] = [ + { path: 'file1.js', snippet: (contents: string) => contents.slice(0, 10) }, + { path: '/src/file2.js', snippet: (contents: string) => contents.slice(0, 10) }, + { path: '/other/file3.js', snippet: (contents: string) => contents.slice(0, 10) }, + ]; + + const collect = async () => collectLocationContext(sourceDirectories, locations); + + it('handles valid locations', async () => { + jest.spyOn(utils, 'exists').mockResolvedValue(true); + jest.spyOn(utils, 'isFile').mockResolvedValue(true); + jest.spyOn(fs, 'readFile').mockResolvedValue('file contents'); + + const result = await collect(); + expect(result.length).toBe(4); + expect(result[0].content).toBe('file conte'); + expect(result[1].content).toBe('file conte'); + expect(result[2].content).toBe('file conte'); + expect(result[3].content).toBe('file conte'); + + expect(utils.exists).toHaveBeenCalledTimes(4); + expect(utils.exists).toHaveBeenCalledWith('/src/file1.js'); + expect(utils.exists).toHaveBeenCalledWith('/lib/file1.js'); + expect(utils.exists).toHaveBeenCalledWith('/src/file2.js'); + expect(utils.exists).toHaveBeenCalledWith('/other/file3.js'); + }); + + it('handles non-file locations', async () => { + jest.spyOn(utils, 'exists').mockResolvedValue(true); + jest.spyOn(utils, 'isFile').mockResolvedValue(false); + + const result = await collect(); + expect(result).toEqual([]); + }); + + it('handles non-existent files', async () => { + jest.spyOn(utils, 'exists').mockResolvedValue(false); + + const result = await collect(); + expect(result).toEqual([]); + }); + + it('handles file reading errors', async () => { + jest.spyOn(utils, 'exists').mockResolvedValue(true); + jest.spyOn(utils, 'isFile').mockResolvedValue(true); + jest.spyOn(fs, 'readFile').mockRejectedValue(new Error('Read error')); + + const result = await collect(); + expect(result).toEqual([]); + }); + + it('extracts snippets correctly', async () => { + jest.spyOn(utils, 'exists').mockResolvedValue(true); + jest.spyOn(utils, 'isFile').mockResolvedValue(true); + jest.spyOn(fs, 'readFile').mockResolvedValue('file contents'); + + const result = await collect(); + expect(result[0].content).toBe('file conte'); + }); + }); +}); diff --git a/packages/cli/tests/unit/rpc/explain/collect-search-context.spec.ts b/packages/cli/tests/unit/rpc/explain/collect-search-context.spec.ts new file mode 100644 index 0000000000..723082716a --- /dev/null +++ b/packages/cli/tests/unit/rpc/explain/collect-search-context.spec.ts @@ -0,0 +1,151 @@ +/* eslint-disable @typescript-eslint/no-unsafe-return */ +import collectSearchContext from '../../../../src/rpc/explain/collect-search-context'; +import * as AppMapFileIndex from '../../../../src/rpc/explain/index/appmap-file-index'; +import * as ProjectFileIndex from '../../../../src/rpc/explain/index/project-file-index'; +import * as ProjectFileSnippetIndex from '../../../../src/rpc/explain/index/project-file-snippet-index'; +import { SnippetSearchResult } from '@appland/search'; + +jest.mock('../../../../src/rpc/explain/index/appmap-file-index.ts', () => ({ + ...jest.requireActual('../../../../src/rpc/explain/index/appmap-file-index.ts'), + searchAppMapFiles: jest.fn(), +})); + +jest.mock('../../../../src/rpc/explain/index/project-file-index.ts', () => ({ + ...jest.requireActual('../../../../src/rpc/explain/index/project-file-index.ts'), + searchProjectFiles: jest.fn(), +})); + +jest.mock('../../../../src/rpc/explain/index/project-file-snippet-index.ts', () => ({ + ...jest.requireActual('../../../../src/rpc/explain/index/project-file-snippet-index.ts'), + buildProjectFileSnippetIndex: jest.fn().mockResolvedValue({ + index: { + searchSnippets: jest.fn().mockReturnValue([]), + }, + close: jest.fn(), + }), +})); + +describe('collectSearchContext', () => { + const appmapDirectories = ['dir1', 'dir2']; + const sourceDirectories = ['src1', 'src2']; + const vectorTerms = ['term1', 'term2']; + const charLimit = 1000; + + it('should emit appmaps provided in the request', async () => { + const request = { appmaps: ['dir1/appmap1', 'dir2/appmap2'] }; + const result = await collectSearchContext( + appmapDirectories, + sourceDirectories, + vectorTerms, + charLimit, + request + ); + + expect(result.searchResponse.numResults).toBe(request.appmaps.length); + expect(result.context).toEqual([]); + }); + + it('should search appmap files when appmaps are not provided', async () => { + (AppMapFileIndex.searchAppMapFiles as jest.Mock).mockResolvedValue({ + results: [{ appmap: 'appmap1', directory: 'dir1', score: 1 }], + stats: {}, + }); + + const result = await collectSearchContext( + appmapDirectories, + sourceDirectories, + vectorTerms, + charLimit + ); + + expect(AppMapFileIndex.searchAppMapFiles as jest.Mock).toHaveBeenCalledWith( + appmapDirectories, + vectorTerms, + expect.any(Number) + ); + expect(result.searchResponse.numResults).toBe(1); + }); + + it('should process and handle data returned from search functions', async () => { + (AppMapFileIndex.searchAppMapFiles as jest.Mock).mockResolvedValue({ + results: [{ appmap: 'appmap1', directory: 'dir1', score: 1 }], + stats: {}, + }); + (ProjectFileIndex.searchProjectFiles as jest.Mock).mockResolvedValue([ + { file: 'file1', content: 'content1' }, + ]); + + const result = await collectSearchContext( + appmapDirectories, + sourceDirectories, + vectorTerms, + charLimit + ); + + expect(result.searchResponse.numResults).toBe(1); + expect(result.context).toEqual([]); + }); + + it('should search project files and build snippet index', async () => { + (ProjectFileIndex.searchProjectFiles as jest.Mock).mockResolvedValue([ + { file: 'file1', content: 'content1' }, + ]); + + const result = await collectSearchContext( + appmapDirectories, + sourceDirectories, + vectorTerms, + charLimit + ); + + expect(ProjectFileIndex.searchProjectFiles as jest.Mock).toHaveBeenCalledWith( + sourceDirectories, + undefined, + undefined, + vectorTerms + ); + expect(ProjectFileSnippetIndex.buildProjectFileSnippetIndex as jest.Mock).toHaveBeenCalled(); + expect(result.context).toEqual([]); + }); + + it('should continue gathering context to meet the char limit', async () => { + const item1: SnippetSearchResult = { + snippetId: { type: 'code-snippet', id: 'path1.py' }, + directory: 'src', + score: 1, + content: 'short', + }; + const item2: SnippetSearchResult = { + snippetId: { type: 'code-snippet', id: 'path2.py' }, + directory: 'src', + score: 0.9, + content: 'longer content to try and meet the char limit', + }; + const mockSearchSnippets = jest + .fn() + .mockReturnValueOnce([item1]) + .mockReturnValue([item1, item2]); + + (ProjectFileSnippetIndex.buildProjectFileSnippetIndex as jest.Mock).mockResolvedValue({ + index: { + searchSnippets: mockSearchSnippets, + }, + close: jest.fn(), + }); + + const result = await collectSearchContext( + appmapDirectories, + sourceDirectories, + vectorTerms, + charLimit + ); + + expect(mockSearchSnippets).toHaveBeenCalledTimes(3); + expect(result.context).toEqual( + expect.arrayContaining([ + expect.objectContaining({ type: 'code-snippet', location: 'path1.py' }), + expect.objectContaining({ type: 'code-snippet', location: 'path2.py' }), + ]) + ); + }); +}); diff --git a/packages/cli/tests/unit/fulltext/appmap-index.readAppMapContent.spec.ts b/packages/cli/tests/unit/rpc/explain/index/appmap-index.readAppMapContent.spec.ts similarity index 96% rename from packages/cli/tests/unit/fulltext/appmap-index.readAppMapContent.spec.ts rename to packages/cli/tests/unit/rpc/explain/index/appmap-index.readAppMapContent.spec.ts index a3c1e35360..b51c47be4b 100644 --- a/packages/cli/tests/unit/fulltext/appmap-index.readAppMapContent.spec.ts +++ b/packages/cli/tests/unit/rpc/explain/index/appmap-index.readAppMapContent.spec.ts @@ -1,5 +1,5 @@ import { vol } from 'memfs'; -import { readAppMapContent } from '../../../src/rpc/explain/index/appmap-index'; +import { readAppMapContent } from '../../../../../src/rpc/explain/index/appmap-index'; import { Metadata } from '@appland/models'; jest.mock('fs/promises', () => require('memfs').promises); diff --git a/packages/cli/tests/unit/fulltext/appmap-index.search.spec.ts b/packages/cli/tests/unit/rpc/explain/index/appmap-index.search.spec.ts similarity index 93% rename from packages/cli/tests/unit/fulltext/appmap-index.search.spec.ts rename to packages/cli/tests/unit/rpc/explain/index/appmap-index.search.spec.ts index 202618958e..6f0b7e26e5 100644 --- a/packages/cli/tests/unit/fulltext/appmap-index.search.spec.ts +++ b/packages/cli/tests/unit/rpc/explain/index/appmap-index.search.spec.ts @@ -1,13 +1,13 @@ -import * as utils from '../../../src/utils'; -import UpToDate, { AppMapIndex } from '../../../src/lib/UpToDate'; +import * as utils from '../../../../../src/utils'; +import UpToDate from '../../../../../src/lib/UpToDate'; import { PathLike } from 'fs'; import { join } from 'path'; import { FileIndex, FileSearchResult } from '@appland/search'; -import { search } from '../../../src/rpc/explain/index/appmap-index'; -import { SearchStats } from '../../../src/rpc/explain/index/appmap-match'; +import { search } from '../../../../../src/rpc/explain/index/appmap-index'; +import { SearchStats } from '../../../../../src/rpc/explain/index/appmap-match'; -jest.mock('../../../src/utils'); -jest.mock('../../../src/lib/UpToDate'); +jest.mock('../../../../../src/utils'); +jest.mock('../../../../../src/lib/UpToDate'); describe('AppMapIndex', () => { let mockAppmapIndex: FileIndex; diff --git a/packages/cli/tests/unit/rpc/explain/index/appmap-index.spec.ts b/packages/cli/tests/unit/rpc/explain/index/appmap-index.spec.ts new file mode 100644 index 0000000000..b51c47be4b --- /dev/null +++ b/packages/cli/tests/unit/rpc/explain/index/appmap-index.spec.ts @@ -0,0 +1,90 @@ +import { vol } from 'memfs'; +import { readAppMapContent } from '../../../../../src/rpc/explain/index/appmap-index'; +import { Metadata } from '@appland/models'; + +jest.mock('fs/promises', () => require('memfs').promises); + +describe('readAppMapContent', () => { + beforeEach(() => vol.reset()); + afterEach(() => vol.reset()); + + it('reads appmap content from index files', async () => { + const appmapName = '/appmaps/testAppMap'; + const metadata: Metadata = { + name: 'Test AppMap', + labels: ['test', 'appmap'], + exception: { class: 'Exception', message: 'Test exception' }, + client: { name: 'Test client', version: '1.0.0', url: 'http://test.com' }, + recorder: { name: 'Test recorder' }, + }; + const classMap = [ + { + name: 'package1', + type: 'package', + labels: [], + children: [ + { + name: 'class1', + type: 'class', + labels: [], + children: [ + { + name: 'function1', + type: 'function', + labels: [], + children: [], + }, + ], + }, + { name: 'class2', type: 'class', labels: [], children: [] }, + ], + }, + { name: 'query1', type: 'query', labels: [], children: [] }, + { name: 'route1', type: 'route', labels: [], children: [] }, + ]; + + vol.fromJSON({ + [`${appmapName}/metadata.json`]: JSON.stringify(metadata), + [`${appmapName}/classMap.json`]: JSON.stringify(classMap), + [`${appmapName}/canonical.parameters.json`]: JSON.stringify(['param1', 'param2']), + }); + + const content = await readAppMapContent(`${appmapName}.appmap.json`); + expect(content).toContain('Test AppMap'); + expect(content).toContain('test'); + expect(content).toContain('appmap'); + expect(content).toContain('Test exception'); + expect(content).toContain('query1'); + expect(content).toContain('route1'); + expect(content).toContain('function1'); + expect(content).toContain('param1'); + expect(content).toContain('param2'); + expect(content).toContain('route'); + expect(content).toContain('sql'); + expect(content).toContain('database'); + + expect(content.split(' ')).toEqual([ + 'Test', + 'AppMap', + 'test', + 'appmap', + 'Test', + 'exception', + 'query1', + 'package1', + 'class1', + 'function1', + 'class2', + 'route1', + 'param1', + 'param2', + 'sql', + 'query', + 'database', + 'route', + 'request', + 'server', + 'http', + ]); + }); +}); diff --git a/packages/cli/tests/unit/rpc/explain/index/index-events.spec.ts b/packages/cli/tests/unit/rpc/explain/index/index-events.spec.ts new file mode 100644 index 0000000000..b8cdf6bf5f --- /dev/null +++ b/packages/cli/tests/unit/rpc/explain/index/index-events.spec.ts @@ -0,0 +1,93 @@ +/* eslint-disable @typescript-eslint/no-unsafe-member-access */ +/* eslint-disable @typescript-eslint/no-explicit-any */ +/* eslint-disable @typescript-eslint/no-unsafe-return */ +import { SnippetIndex } from '@appland/search'; +import sqlite3 from 'better-sqlite3'; + +import indexEvents from '../../../../../src/rpc/explain/index/index-events'; +import { SearchResult } from '../../../../../src/rpc/explain/index/appmap-match'; +import * as AppMapIndex from '../../../../../src/rpc/explain/index/appmap-index'; + +jest.mock('../../../../../src/rpc/explain/index/appmap-index', () => ({ + ...jest.requireActual('../../../../../src/rpc/explain/index/appmap-index'), + readIndexFile: jest.fn(), +})); + +describe('index-events', () => { + describe('indexAppMapEvents', () => { + let db: sqlite3.Database; + let snippetIndex: SnippetIndex; + + beforeEach(() => (db = new sqlite3(':memory:'))); + beforeEach(() => (snippetIndex = new SnippetIndex(db))); + afterEach(() => db.close()); + + it('should index events', async () => { + const searchResults: SearchResult[] = [ + { + directory: 'tmp/appmap', + appmap: 'appmap1', + score: 1, + }, + ]; + + const classMap: AppMapIndex.ClassMapEntry[] = [ + { + type: 'package', + name: 'package1', + children: [ + { + type: 'class', + name: 'class1', + children: [ + { + type: 'function', + name: 'method1', + sourceLocation: 'path/to/file1:10', + children: [], + }, + ], + }, + ], + }, + { + type: 'query', + name: 'SELECT * FROM table1', + children: [], + }, + { + type: 'route', + name: '/api/endpoint', + children: [], + }, + { + type: 'external-route', + name: 'GET https://example.com/api/endpoint', + children: [], + }, + ]; + + (AppMapIndex.readIndexFile as jest.Mock).mockResolvedValue(classMap); + + await indexEvents(snippetIndex, searchResults); + + const rows = db.prepare('SELECT * FROM snippet_content ORDER BY snippet_id').all(); + expect(rows.map((r) => (r as any).snippet_id)).toEqual([ + 'external-route:GET https://example.com/api/endpoint', + 'query:c78f4ded2dcc9714feb709a35c86af4727eef18d0eb90fe89c6b13b66977b7b1', + 'route:/api/endpoint', + ]); + + expect(rows.map((r) => (r as any).file_words)).toEqual([ + 'get https example com api endpoint route request client http', + 'select table1 sql query database', + 'api endpoint route request server http', + ]); + + const boostRows = db.prepare('SELECT * FROM snippet_boost ORDER BY snippet_id').all(); + expect(boostRows.map((r) => (r as any).snippet_id)).toEqual([ + 'code-snippet:path/to/file1:10', + ]); + }); + }); +}); diff --git a/packages/cli/tests/unit/rpc/explain/index/project-file-snippet-index.spec.ts b/packages/cli/tests/unit/rpc/explain/index/project-file-snippet-index.spec.ts new file mode 100644 index 0000000000..d834af99eb --- /dev/null +++ b/packages/cli/tests/unit/rpc/explain/index/project-file-snippet-index.spec.ts @@ -0,0 +1,242 @@ +/* eslint-disable @typescript-eslint/no-unsafe-return */ +import { join } from 'path'; + +import * as search from '@appland/search'; + +import { + buildProjectFileSnippetIndex, + snippetContextItem, +} from '../../../../../src/rpc/explain/index/project-file-snippet-index'; +import * as AppMapIndex from '../../../../../src/rpc/explain/index/appmap-index'; +import { CloseableIndex } from '../../../../../src/rpc/explain/index/build-index-in-temp-dir'; +import { SearchResult } from '../../../../../src/rpc/explain/index/appmap-match'; + +jest.mock('@appland/search', () => ({ + ...jest.requireActual('@appland/search'), + readFileSafe: jest.fn(), +})); + +jest.mock('../../../../../src/rpc/explain/index/appmap-index', () => ({ + ...jest.requireActual('../../../../../src/rpc/explain/index/appmap-index'), + readIndexFile: jest.fn(), +})); + +describe('project-file-snippet-index', () => { + beforeEach(() => jest.restoreAllMocks()); + beforeEach(() => jest.resetAllMocks()); + + describe('snippetContextItem', () => { + describe('query', () => { + it('should return a snippet context item', () => { + const snippet = { + snippetId: { type: 'query', id: 'the-query' }, + directory: 'a', + score: 1, + content: 'content', + }; + const result = snippetContextItem(snippet); + expect(result).toEqual({ + type: 'data-request', + content: 'content', + directory: 'a', + score: 1, + }); + }); + }); + describe('route', () => { + it('should return a snippet context item', () => { + const snippet = { + snippetId: { type: 'route', id: 'the-route' }, + directory: 'a', + score: 1, + content: 'content', + }; + const result = snippetContextItem(snippet); + expect(result).toEqual({ + type: 'data-request', + content: 'content', + directory: 'a', + score: 1, + }); + }); + }); + describe('external-route', () => { + it('should return a snippet context item', () => { + const snippet = { + snippetId: { type: 'external-route', id: 'the-route' }, + directory: 'a', + score: 1, + content: 'content', + }; + const result = snippetContextItem(snippet); + expect(result).toEqual({ + type: 'data-request', + content: 'content', + directory: 'a', + score: 1, + }); + }); + }); + describe('code-snippet', () => { + it('should return a snippet context item', () => { + const snippet = { + snippetId: { type: 'code-snippet', id: 'path/to/item.py:1-3' }, + directory: 'a', + score: 1, + content: 'content', + }; + const result = snippetContextItem(snippet); + expect(result).toEqual({ + type: 'code-snippet', + content: 'content', + directory: 'a', + score: 1, + location: 'path/to/item.py:1-3', + }); + }); + }); + }); + + describe('buildProjectFileSnippetIndex', () => { + let index: CloseableIndex; + + afterEach(() => index?.close()); + + it('should build a snippet index', async () => { + (search.readFileSafe as jest.Mock).mockImplementation((path: string) => { + if (path === 'a/path/to/item.py') return Promise.resolve('def item():\n return 42\n'); + + if (path === 'b/path/to/another.py') + return Promise.resolve('def another():\n return 21\n'); + + throw new Error(`Unexpected path: ${path}`); + }); + + const fileSearchResults: search.FileSearchResult[] = [ + { + directory: 'a', + filePath: 'path/to/item.py', + score: 1.0, + }, + { + directory: 'b', + filePath: 'path/to/another.py', + score: 1.0, + }, + ]; + const appmapSearchResults = []; + + index = await buildProjectFileSnippetIndex(fileSearchResults, appmapSearchResults); + + expect(index).toBeDefined(); + expect(search.readFileSafe as jest.Mock).toHaveBeenCalledTimes(2); + expect(search.readFileSafe as jest.Mock).toHaveBeenCalledWith(join('a', 'path/to/item.py')); + expect(search.readFileSafe as jest.Mock).toHaveBeenCalledWith( + join('b', 'path/to/another.py') + ); + + const result = index.index.searchSnippets('item', 10); + expect(result).toHaveLength(1); + expect(result[0].content).toEqual('def item():\n return 42'); + }); + + describe('indexing AppMap data requests', () => { + it('indexes a query', async () => { + const classMap: AppMapIndex.ClassMapEntry[] = [ + { + type: 'query', + name: 'SELECT * FROM table1', + children: [], + }, + ]; + (AppMapIndex.readIndexFile as jest.Mock).mockResolvedValue(classMap); + + const fileSearchResults: search.FileSearchResult[] = []; + const appmapSearchResults: SearchResult[] = [ + { + appmap: 'path/to/appmap_1.appmap.json', + directory: 'dir1', + score: 1.0, + }, + ]; + + index = await buildProjectFileSnippetIndex(fileSearchResults, appmapSearchResults); + + expect(AppMapIndex.readIndexFile as jest.Mock).toHaveBeenCalledTimes(1); + expect(AppMapIndex.readIndexFile as jest.Mock).toHaveBeenCalledWith( + 'path/to/appmap_1', + 'classMap' + ); + + const result = index.index.searchSnippets('table1', 10); + expect(result).toHaveLength(1); + expect(result[0].content).toEqual('SELECT * FROM table1'); + }); + + it('boosts a code snippet', async () => { + const classMap: AppMapIndex.ClassMapEntry[] = [ + { + type: 'package', + name: 'package1', + children: [ + { + type: 'function', + name: 'func1', + sourceLocation: 'path/to/func1.py:1', + children: [], + }, + ], + }, + ]; + + (AppMapIndex.readIndexFile as jest.Mock).mockResolvedValue(classMap); + (search.readFileSafe as jest.Mock).mockImplementation((path: string) => { + if (path === 'path/to/func1.py') return Promise.resolve('def myfunc():\n return 42\n'); + + if (path === 'path/to/func2.py') return Promise.resolve('def myfunc():\n return 21\n'); + + throw new Error(`Unexpected path: ${path}`); + }); + + const fileSearchResults: search.FileSearchResult[] = [ + { + directory: 'path/to', + filePath: 'func1.py', + score: 1.0, + }, + { + directory: 'path/to', + filePath: 'func2.py', + score: 1.0, + }, + ]; + const appmapSearchResults: SearchResult[] = [ + { + appmap: 'path/to/appmap_1.appmap.json', + directory: 'dir1', + score: 1.0, + }, + ]; + + index = await buildProjectFileSnippetIndex(fileSearchResults, appmapSearchResults); + + expect(AppMapIndex.readIndexFile as jest.Mock).toHaveBeenCalledTimes(1); + expect(AppMapIndex.readIndexFile as jest.Mock).toHaveBeenCalledWith( + 'path/to/appmap_1', + 'classMap' + ); + + expect(search.readFileSafe as jest.Mock).toHaveBeenCalledTimes(2); + expect(search.readFileSafe as jest.Mock).toHaveBeenCalledWith(join('path/to/func1.py')); + expect(search.readFileSafe as jest.Mock).toHaveBeenCalledWith(join('path/to/func2.py')); + + const result = index.index.searchSnippets('myfunc', 10); + expect(result).toHaveLength(2); + expect(result[0].snippetId).toEqual({ type: 'code-snippet', id: 'path/to/func1.py:1' }); + expect(result[1].snippetId).toEqual({ type: 'code-snippet', id: 'path/to/func2.py:1' }); + // Row 0 should have approximately twice the score of row 1 + expect(result[1].score * 2).toBeCloseTo(result[0].score); + }); + }); + }); +}); diff --git a/packages/cli/tests/unit/rpc/explain/pattern.spec.ts b/packages/cli/tests/unit/rpc/explain/pattern.spec.ts index e725818969..53694ef18f 100644 --- a/packages/cli/tests/unit/rpc/explain/pattern.spec.ts +++ b/packages/cli/tests/unit/rpc/explain/pattern.spec.ts @@ -1,4 +1,4 @@ -import { buildExclusionPattern } from '../../../../src/rpc/explain/collectContext'; +import { buildExclusionPattern } from '../../../../src/rpc/explain/collect-context'; describe('Regex patterns', () => { const EXCLUDE_DOT_APPMAP_DIR = buildExclusionPattern('.appmap'); diff --git a/packages/cli/tests/unit/rpc/explain/review.spec.ts b/packages/cli/tests/unit/rpc/explain/review.spec.ts deleted file mode 100644 index e0d6ec83ee..0000000000 --- a/packages/cli/tests/unit/rpc/explain/review.spec.ts +++ /dev/null @@ -1,83 +0,0 @@ -import { EventEmitter } from 'stream'; - -const eventEmitter = new EventEmitter(); -const childProcess = { - stdout: { - setEncoding: jest.fn(), - on: jest.fn().mockImplementation((event: string, callback: (data: string) => void) => { - return eventEmitter.on(event, callback); - }), - }, - stderr: { - setEncoding: jest.fn(), - on: jest.fn().mockImplementation((event: string, callback: (data: string) => void) => { - return eventEmitter.on(event, callback); - }), - }, - on: jest.fn().mockImplementation((event: string, callback: (data: string) => void) => { - return eventEmitter.on(event, callback); - }), -}; - -jest.mock('node:child_process', () => ({ - execFile: jest.fn().mockReturnValue(childProcess), -})); - -import handleReview from '../../../../src/rpc/explain/review'; - -describe('handleReview', () => { - const diffContent = 'git diff here'; - - afterAll(() => { - jest.resetAllMocks(); - }); - - const emitResult = (output: string, exitCode = 0) => { - eventEmitter.emit('data', output); - eventEmitter.emit('close', exitCode); - }; - - it('does nothing if the command is not "review"', async () => { - const result = await handleReview('hello'); - expect(result).toStrictEqual({ applied: false }); - }); - - it('converts string user context to a context array', async () => { - const result = handleReview('@review', 'print("hello")'); - emitResult(diffContent); - await expect(result).resolves.toStrictEqual({ - applied: true, - userContext: [ - { type: 'code-selection', content: 'print("hello")' }, - { type: 'code-snippet', location: 'git diff', content: diffContent }, - ], - }); - }); - - it('returns an expected user context when it was initially undefined', async () => { - const result = handleReview('@review'); - emitResult(diffContent); - await expect(result).resolves.toStrictEqual({ - applied: true, - userContext: [{ type: 'code-snippet', location: 'git diff', content: diffContent }], - }); - }); - - it('returns an expected user context when it was initially a context array', async () => { - const result = handleReview('@review', [{ type: 'code-selection', content: 'print("hello")' }]); - emitResult(diffContent); - await expect(result).resolves.toStrictEqual({ - applied: true, - userContext: [ - { type: 'code-selection', content: 'print("hello")' }, - { type: 'code-snippet', location: 'git diff', content: diffContent }, - ], - }); - }); - - it('raises an error if the command fails', async () => { - const result = handleReview('@review'); - emitResult('git diff here', 1); - await expect(result).rejects.toThrowError('git diff here'); - }); -}); diff --git a/packages/search/src/build-snippet-index.ts b/packages/search/src/build-snippet-index.ts index 7e86ef65e0..1887c8554e 100644 --- a/packages/search/src/build-snippet-index.ts +++ b/packages/search/src/build-snippet-index.ts @@ -1,7 +1,7 @@ import { isAbsolute, join } from 'path'; import { Tokenizer } from './build-file-index'; import { ContentReader } from './ioutil'; -import SnippetIndex, { fileChunkSnippetId } from './snippet-index'; +import SnippetIndex, { SnippetId } from './snippet-index'; import { Splitter } from './splitter'; export type File = { @@ -27,7 +27,11 @@ async function indexFile(context: Context, file: File) { chunks.forEach((chunk) => { const { content, startLine } = chunk; - const snippetId = fileChunkSnippetId(filePath, startLine); + const id = [filePath, startLine].filter(Boolean).join(':'); + const snippetId: SnippetId = { + type: 'code-snippet', + id, + }; context.snippetIndex.indexSnippet( snippetId, file.directory, diff --git a/packages/search/src/index.ts b/packages/search/src/index.ts index 219eac1a5a..7fcab03761 100644 --- a/packages/search/src/index.ts +++ b/packages/search/src/index.ts @@ -2,15 +2,7 @@ export { ContentReader, readFileSafe } from './ioutil'; export { Splitter, langchainSplitter } from './splitter'; export { ListFn, FilterFn, Tokenizer, default as buildFileIndex } from './build-file-index'; export { File, default as buildSnippetIndex } from './build-snippet-index'; -export { - default as SnippetIndex, - SnippetSearchResult, - SnippetId, - encodeSnippetId, - parseSnippetId, - fileChunkSnippetId, - parseFileChunkSnippetId, -} from './snippet-index'; +export { default as SnippetIndex, SnippetSearchResult, SnippetId } from './snippet-index'; export { default as FileIndex, FileSearchResult } from './file-index'; export { default as listProjectFiles } from './project-files'; export { isBinaryFile, isDataFile, isLargeFile } from './file-type'; diff --git a/packages/search/src/snippet-index.ts b/packages/search/src/snippet-index.ts index 7f2eda3e23..83e8ac50b3 100644 --- a/packages/search/src/snippet-index.ts +++ b/packages/search/src/snippet-index.ts @@ -42,38 +42,11 @@ ORDER BY score DESC LIMIT ?`; -export enum SnippetType { - FileChunk = 'file-chunk', -} - export type SnippetId = { type: string; id: string; }; -export function fileChunkSnippetId(filePath: string, startLine?: number): SnippetId { - return { - type: 'file-chunk', - id: [filePath, startLine].filter(Boolean).join(':'), - }; -} - -export function parseFileChunkSnippetId(snippetId: SnippetId): { - filePath: string; - startLine?: number; -} { - const type = snippetId.type; - assert(type === SnippetType.FileChunk); - const parts = snippetId.id.split(':'); - const filePath = parts.shift(); - assert(filePath); - const startLine = parts.shift(); - return { - filePath: filePath, - startLine: startLine ? parseInt(startLine, 10) : undefined, - }; -} - export function encodeSnippetId(snippetId: SnippetId): string { return [snippetId.type, snippetId.id].join(':'); } diff --git a/packages/search/test/query-keywords.spec.ts b/packages/search/test/query-keywords.spec.ts index 53cac635f9..1f129edc0e 100644 --- a/packages/search/test/query-keywords.spec.ts +++ b/packages/search/test/query-keywords.spec.ts @@ -77,7 +77,6 @@ describe('queryKeywords', () => { 'http', 'response', 'httpresponse', - 'code', 'responsecode', ]); });