Skip to content

Commit

Permalink
feat: Full text search of AppMaps and Code Objects
Browse files Browse the repository at this point in the history
  • Loading branch information
kgilpin committed Nov 17, 2023
1 parent 3d07dfb commit c58e618
Show file tree
Hide file tree
Showing 8 changed files with 340 additions and 2 deletions.
4 changes: 3 additions & 1 deletion packages/cli/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
"@types/jest": "^29.5.4",
"@types/jest-sinon": "^1.0.2",
"@types/jsdom": "^16.2.13",
"@types/lunr": "^2.3.7",
"@types/moo": "^0.5.5",
"@types/node": "^16",
"@types/semver": "^7.3.10",
"@types/sinon": "^10.0.2",
Expand Down Expand Up @@ -86,7 +88,6 @@
"@appland/sequence-diagram": "workspace:^1.7.0",
"@octokit/rest": "^20.0.1",
"@sidvind/better-ajv-errors": "^0.9.1",
"@types/moo": "^0.5.5",
"JSONStream": "^1.3.5",
"ajv": "^8.6.3",
"applicationinsights": "^2.1.4",
Expand All @@ -109,6 +110,7 @@
"inquirer": "^8.1.2",
"js-yaml": "^4.0.3",
"jsdom": "^16.6.0",
"lunr": "^2.3.9",
"minimatch": "^5.1.2",
"moo": "^0.5.1",
"open": "^8.2.1",
Expand Down
1 change: 1 addition & 0 deletions packages/cli/src/fingerprint/canonicalize.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ const algorithms = {
packageDependencies: require('./canonicalize/packageDependencies'),
sqlNormalized: require('./canonicalize/sqlNormalized'),
sqlTables: require('./canonicalize/sqlTables'),
parameters: require('./canonicalize/parameters'),
};
/* eslint-enable global-require */

Expand Down
15 changes: 15 additions & 0 deletions packages/cli/src/fingerprint/canonicalize/parameters.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
/* eslint-disable class-methods-use-this */
const Unique = require('./unique');
const { collectParameters } = require('../../fulltext/collectParameters');

class Canonicalize extends Unique {
functionCall(event) {
return collectParameters(event);
}

httpServerRequest(event) {
return collectParameters(event);
}
}

module.exports = (appmap) => new Canonicalize(appmap).execute();
6 changes: 5 additions & 1 deletion packages/cli/src/fingerprint/fingerprinter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ const renameFile = promisify(gracefulFs.rename);
/**
* CHANGELOG
*
* * # 1.4.0
*
* * Include parameter names in the index.
*
* * # 1.3.0
*
* * Include exceptions in the index.
Expand Down Expand Up @@ -49,7 +53,7 @@ const renameFile = promisify(gracefulFs.rename);
* * Fix handling of parent assignment in normalization.
* * sql can contain the analysis (action, tables, columns), and/or the normalized query string.
*/
export const VERSION = '1.2.0';
export const VERSION = '1.4.0';

const MAX_APPMAP_SIZE = 50 * 1024 * 1024;

Expand Down
134 changes: 134 additions & 0 deletions packages/cli/src/fulltext/FindAppMaps.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import { readFile } from 'fs/promises';
import { exists, processNamedFiles, verbose } from '../utils';
import { Metadata } from '@appland/models';
import { dirname, join } from 'path';
import { warn } from 'console';
import lunr from 'lunr';
import assert from 'assert';

const isCamelized = (str: string): boolean => {
if (str.length < 3) return false;

const testStr = str.slice(1);
return /[a-z][A-Z]/.test(testStr);
};

export const splitCamelized = (str: string): string => {
if (!isCamelized(str)) return str;

const result = new Array<string>();
let last = 0;
for (let i = 1; i < str.length; i++) {
const pc = str[i - 1];
const c = str[i];
const isUpper = c >= 'A' && c <= 'Z';
if (isUpper) {
result.push(str.slice(last, i));
last = i;
}
}
result.push(str.slice(last));
return result.join(' ');
};

type SerializedCodeObject = {
name: string;
type: string;
labels: string[];
children: SerializedCodeObject[];
static?: boolean;
sourceLocation?: string;
};

export type SearchOptions = {
maxResults?: number;
};

export type SearchResult = {
appmap: string;
score: number;
};

export default class FindAppMaps {
idx: lunr.Index | undefined;

constructor(public appmapDir: string) {}

async initialize() {
const { appmapDir } = this;

const documents = new Array<any>();
await processNamedFiles(appmapDir, 'metadata.json', async (metadataFile) => {
const metadata = JSON.parse(await readFile(metadataFile, 'utf-8')) as Metadata;
const indexDir = dirname(metadataFile);
const classMap = JSON.parse(
await readFile(join(indexDir, 'classMap.json'), 'utf-8')
) as SerializedCodeObject[];
const queries = new Array<string>();
const codeObjects = new Array<string>();
const routes = new Array<string>();
const externalRoutes = new Array<string>();

const collectFunction = (co: SerializedCodeObject) => {
if (co.type === 'query') queries.push(co.name);
else if (co.type === 'route') routes.push(co.name);
else if (co.type === 'external-route') externalRoutes.push(co.name);
else codeObjects.push(splitCamelized(co.name));

co.children?.forEach((child) => {
collectFunction(child);
});
};
classMap.forEach((co) => collectFunction(co));

const parameters = new Array<string>();
if (await exists(join(indexDir, 'canonical.parameters.json'))) {
const canonicalParameters = JSON.parse(
await readFile(join(indexDir, 'canonical.parameters.json'), 'utf-8')
) as string[];
canonicalParameters.forEach((cp) => {
parameters.push(splitCamelized(cp));
});
}

documents.push({
id: indexDir,
name: metadata.name,
source_location: metadata.source_location,
code_objects: codeObjects.join(' '),
queries: queries.join(' '),
routes: routes.join(' '),
external_routes: externalRoutes.join(' '),
parameters: parameters,
});
});

if (verbose()) warn(`Indexing ${documents.length} diagrams`);

this.idx = lunr(function () {
this.ref('id');
this.field('name');
this.field('source_location');
this.field('code_objects');
this.field('queries');
this.field('routes');
this.field('external_routes');
this.field('parameters');

this.tokenizer.separator = /[\s/-_:#.]+/;

for (const doc of documents) this.add(doc);
});
}

search(search: string, options: SearchOptions = {}): SearchResult[] {
assert(this.idx);
let matches = this.idx.search(search);
if (verbose()) warn(`Got ${matches.length} matches for search ${search}`);
if (options.maxResults && matches.length > options.maxResults) {
if (verbose()) warn(`Limiting to the top ${options.maxResults} matches`);
matches = matches.slice(0, options.maxResults);
}
return matches.map((match) => ({ appmap: match.ref, score: match.score }));
}
}
135 changes: 135 additions & 0 deletions packages/cli/src/fulltext/FindEvents.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
import { AppMap, AppMapFilter, Event, buildAppMap } from '@appland/models';
import { warn } from 'console';
import { readFile } from 'fs/promises';
import { verbose } from '../utils';
import lunr from 'lunr';
import { splitCamelized } from './FindAppMaps';
import { collectParameters } from './collectParameters';
import assert from 'assert';

type IndexItem = {
fqid: string;
name: string;
location?: string;
parameters: string[];
eventIds: number[];
elapsed?: number;
};

export type SearchOptions = {
maxResults?: number;
};

export type SearchResult = {
fqid: string;
location?: string;
score: number;
eventIds: number[];
elapsed?: number;
};

export default class FindEvents {
public maxSize?: number;
public filter?: AppMapFilter;

idx: lunr.Index | undefined;
indexItemsByFqid = new Map<string, IndexItem>();
filteredAppMap?: AppMap;

constructor(public appmapIndexDir: string) {}

get appmapId() {
return this.appmapIndexDir;
}

get appmap() {
assert(this.filteredAppMap);
return this.filteredAppMap;
}

async initialize() {
const appmapFile = [this.appmapId, 'appmap.json'].join('.');
const builder = buildAppMap().source(await readFile(appmapFile, 'utf-8'));
if (this.maxSize) builder.prune(this.maxSize);

const baseAppMap = builder.build();

if (verbose()) warn(`Built AppMap with ${baseAppMap.events.length} events.`);
if (verbose()) warn(`Applying default AppMap filters.`);

let filter = this.filter;
if (!filter) {
filter = new AppMapFilter();
if (baseAppMap.metadata.language?.name !== 'java')
filter.declutter.hideExternalPaths.on = true;
filter.declutter.limitRootEvents.on = true;
}
const filteredAppMap = filter.filter(baseAppMap, []);
if (verbose()) warn(`Filtered AppMap has ${filteredAppMap.events.length} events.`);
if (verbose()) warn(`Indexing AppMap`);

const indexEvent = (event: Event, depth = 0) => {
const co = event.codeObject;
const parameters = collectParameters(event);
if (!this.indexItemsByFqid.has(co.fqid)) {
const name = splitCamelized(co.id);
const item: IndexItem = {
fqid: co.fqid,
name,
parameters,
location: co.location,
eventIds: [event.id],
};
if (event.elapsedTime) item.elapsed = event.elapsedTime;
this.indexItemsByFqid.set(co.fqid, item);
} else {
const existing = this.indexItemsByFqid.get(co.fqid);
if (existing) {
existing.eventIds.push(event.id);
if (event.elapsedTime) existing.elapsed = (existing.elapsed || 0) + event.elapsedTime;
for (const parameter of parameters)
if (!existing.parameters.includes(parameter)) existing.parameters.push(parameter);
}
}
event.children.forEach((child) => indexEvent(child, depth + 1));
};
filteredAppMap.rootEvents().forEach((event) => indexEvent(event));

this.filteredAppMap = filteredAppMap;
const self = this;
this.idx = lunr(function () {
this.ref('fqid');
this.field('name');
this.tokenizer.separator = /[\s/\-_:#.]+/;

self.indexItemsByFqid.forEach((item) => {
let boost = 1;
if (item.location) boost += 1;
if (item.eventIds.length > 1) boost += 1;
this.add(item, { boost });
});
});
}

search(search: string, options: SearchOptions = {}): SearchResult[] {
assert(this.idx);
let matches = this.idx.search(search);
if (verbose()) warn(`Got ${matches.length} matches for search ${search}`);
if (options.maxResults && matches.length > options.maxResults) {
if (verbose()) warn(`Limiting to the top ${options.maxResults} matches`);
matches = matches.slice(0, options.maxResults);
}
return matches.map((match) => {
const indexItem = this.indexItemsByFqid.get(match.ref);
assert(indexItem);
const result: SearchResult = {
fqid: match.ref,
score: match.score,
elapsed: indexItem?.elapsed,
eventIds: indexItem?.eventIds ?? [],
};
if (indexItem?.location) result.location = indexItem.location;
return result;
});
}
}
31 changes: 31 additions & 0 deletions packages/cli/src/fulltext/collectParameters.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import { Event, ParameterObject, ParameterProperty } from '@appland/models';

export function collectParameters(event: Event): string[] {
const result = new Array<string>();
if (event.parameters) collectParameterNames(event.parameters, result);
if (event.message) collectProperties(event.message, result);
return result;
}

export function collectParameterNames(
parameters: readonly ParameterObject[],
result: string[] = []
) {
parameters.forEach((parameter) => (parameter.name ? result.push(parameter.name) : undefined));
return result;
}

export function collectProperties(properties: readonly ParameterProperty[], result: string[] = []) {
for (const property of properties) {
if (property.name) {
result.push(property.name);
}
if (property.items) {
collectProperties(property.items, result);
}
if (property.properties) {
collectProperties(property.properties, result);
}
}
return result;
}
Loading

0 comments on commit c58e618

Please sign in to comment.