-
Notifications
You must be signed in to change notification settings - Fork 4
/
scip.proto
547 lines (512 loc) · 19.2 KB
/
scip.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
// An index contains one or more pieces of information about a given piece of
// source code or software artifact. Complementary information can be merged
// together from multiple sources to provide a unified code intelligence
// experience.
//
// Programs producing a file of this format is an "indexer" and may operate
// somewhere on the spectrum between precision, such as indexes produced by
// compiler-backed indexers, and heurstics, such as indexes produced by local
// syntax-directed analysis for scope rules.
syntax = "proto3";
package scip;
option go_package = "github.com/sourcegraph/scip/bindings/go/scip/";
// Index represents a complete SCIP index for a workspace this is rooted at a
// single directory. An Index message payload can have a large memory footprint
// and it's therefore recommended to emit and consume an Index payload one field
// value at a time. To permit streaming consumption of an Index payload, the
// `metadata` field must appear at the start of the stream and must only appear
// once in the stream. Other field values may appear in any order.
message Index {
// Metadata about this index.
Metadata metadata = 1;
// Documents that belong to this index.
repeated Document documents = 2;
// (optional) Symbols that are referenced from this index but are defined in
// an external package (a separate `Index` message). Leave this field empty
// if you assume the external package will get indexed separately. If the
// external package won't get indexed for some reason then you can use this
// field to provide hover documentation for those external symbols.
repeated SymbolInformation external_symbols = 3;
}
message Metadata {
// Which version of this protocol was used to generate this index?
ProtocolVersion version = 1;
// Information about the tool that produced this index.
ToolInfo tool_info = 2;
// URI-encoded absolute path to the root directory of this index. All
// documents in this index must appear in a subdirectory of this root
// directory.
string project_root = 3;
// Text encoding of the source files on disk that are referenced from
// `Document.relative_path`.
TextEncoding text_document_encoding = 4;
}
enum ProtocolVersion {
UnspecifiedProtocolVersion = 0;
}
enum TextEncoding {
UnspecifiedTextEncoding = 0;
UTF8 = 1;
UTF16 = 2;
}
message ToolInfo {
// Name of the indexer that produced this index.
string name = 1;
// Version of the indexer that produced this index.
string version = 2;
// Command-line arguments that were used to invoke this indexer.
repeated string arguments = 3;
}
// Document defines the metadata about a source file on disk.
message Document {
// The string ID for the programming language this file is written in.
// The `Language` enum contains the names of most common programming languages.
// This field is typed as a string to permit any programming langauge, including
// ones that are not specified by the `Language` enum.
string language = 4;
// (Required) Unique path to the text document.
//
// 1. The path must be relative to the directory supplied in the associated
// `Metadata.project_root`.
// 2. The path must not begin with a leading '/'.
// 3. The path must point to a regular file, not a symbolic link.
// 4. The path must use '/' as the separator, including on Windows.
// 5. The path must be canonical; it cannot include empty components ('//'),
// or '.' or '..'.
string relative_path = 1;
// Occurrences that appear in this file.
repeated Occurrence occurrences = 2;
// Symbols that are "defined" within this document.
//
// This should include symbols which technically do not have any definition,
// but have a reference and are defined by some other symbol (see
// Relationship.is_definition).
repeated SymbolInformation symbols = 3;
}
// Symbol is similar to a URI, it identifies a class, method, or a local
// variable. `SymbolInformation` contains rich metadata about symbols such as
// the docstring.
//
// Symbol has a standardized string representation, which can be used
// interchangeably with `Symbol`. The syntax for Symbol is the following:
// ```
// # (<x>)+ stands for one or more repetitions of <x>
// <symbol> ::= <scheme> ' ' <package> ' ' (<descriptor>)+ | 'local ' <local-id>
// <package> ::= <manager> ' ' <package-name> ' ' <version>
// <scheme> ::= any UTF-8, escape spaces with double space.
// <manager> ::= same as above, use the placeholder '.' to indicate an empty value
// <package-name> ::= same as above
// <version> ::= same as above
// <descriptor> ::= <namespace> | <type> | <term> | <method> | <type-parameter> | <parameter> | <meta>
// <namespace> ::= <name> '/'
// <type> ::= <name> '#'
// <term> ::= <name> '.'
// <meta> ::= <name> ':'
// <method> ::= <name> '(' <method-disambiguator> ').'
// <type-parameter> ::= '[' <name> ']'
// <parameter> ::= '(' <name> ')'
// <name> ::= <identifier>
// <method-disambiguator> ::= <simple-identifier>
// <identifier> ::= <simple-identifier> | <escaped-identifier>
// <simple-identifier> ::= (<identifier-character>)+
// <identifier-character> ::= '_' | '+' | '-' | '$' | ASCII letter or digit
// <escaped-identifier> ::= '`' (<escaped-character>)+ '`'
// <escaped-characters> ::= any UTF-8 character, escape backticks with double backtick.
// ```
//
// The list of descriptors for a symbol should together form a fully
// qualified name for the symbol. That is, it should serve as a unique
// identifier across the package. Typically, it will include one descriptor
// for every node in the AST (along the ancestry path) between the root of
// the file and the node corresponding to the symbol.
message Symbol {
string scheme = 1;
Package package = 2;
repeated SymbolDescriptor descriptors = 3;
}
// Unit of packaging and distribution.
//
// NOTE: This corresponds to a module in Go and JVM languages.
message Package {
string manager = 1;
string name = 2;
string version = 3;
}
message SymbolDescriptor {
enum Suffix {
option allow_alias = true;
UnspecifiedSuffix = 0;
// Unit of code abstraction and/or namespacing.
//
// NOTE: This corresponds to a package in Go and JVM languages.
Namespace = 1;
// Use Namespace instead.
Package = 1 [deprecated=true];
Type = 2;
Term = 3;
Method = 4;
TypeParameter = 5;
Parameter = 6;
Macro = 9;
// Can be used for any purpose.
Meta = 7;
Local = 8;
}
string name = 1;
string disambiguator = 2;
Suffix suffix = 3;
}
// SymbolInformation defines metadata about a symbol, such as the symbol's
// docstring or what package it's defined it.
message SymbolInformation {
// Identifier of this symbol, which can be referenced from `Occurence.symbol`.
// The string must be formatted according to the grammar in `Symbol`.
string symbol = 1;
// (optional, but strongly recommended) The markdown-formatted documentation
// for this symbol. This field is repeated to allow different kinds of
// documentation. For example, it's nice to include both the signature of a
// method (parameters and return type) along with the accompanying docstring.
repeated string documentation = 3;
// (optional) Relationships to other symbols (e.g., implements, type definition).
repeated Relationship relationships = 4;
}
message Relationship {
string symbol = 1;
// When resolving "Find references", this field documents what other symbols
// should be included together with this symbol. For example, consider the
// following TypeScript code that defines two symbols `Animal#sound()` and
// `Dog#sound()`:
// ```ts
// interface Animal {
// ^^^^^^ definition Animal#
// sound(): string
// ^^^^^ definition Animal#sound()
// }
// class Dog implements Animal {
// ^^^ definition Dog#, implementation_symbols = Animal#
// public sound(): string { return "woof" }
// ^^^^^ definition Dog#sound(), references_symbols = Animal#sound(), implementation_symbols = Animal#sound()
// }
// const animal: Animal = new Dog()
// ^^^^^^ reference Animal#
// console.log(animal.sound())
// ^^^^^ reference Animal#sound()
// ```
// Doing "Find references" on the symbol `Animal#sound()` should return
// references to the `Dog#sound()` method as well. Vice-versa, doing "Find
// references" on the `Dog#sound()` method should include references to the
// `Animal#sound()` method as well.
bool is_reference = 2;
// Similar to `references_symbols` but for "Go to implementation".
// It's common for the `implementation_symbols` and `references_symbols` fields
// have the same values but that's not always the case.
// In the TypeScript example above, observe that `implementation_symbols` has
// the value `"Animal#"` for the "Dog#" symbol while `references_symbols` is
// empty. When requesting "Find references" on the "Animal#" symbol we don't
// want to include references to "Dog#" even if "Go to implementation" on the
// "Animal#" symbol should navigate to the "Dog#" symbol.
bool is_implementation = 3;
// Similar to `references_symbols` but for "Go to type definition".
bool is_type_definition = 4;
// Allows overriding the behavior of "Go to definition" and "Find references"
// for symbols which do not have a definition of their own or could
// potentially have multiple definitions.
//
// For example, in a language with single inheritance and no field overriding,
// inherited fields can reuse the same symbol as the ancestor which declares
// the field. In such a situation, is_definition is not needed.
//
// On the other hand, in languages with single inheritance and some form
// of mixins, you can use is_definition to relate the symbol to the
// matching symbol in ancestor classes, and is_reference to relate the
// symbol to the matching symbol in mixins.
//
// NOTE: At the moment, due to limitations of the SCIP to LSIF conversion,
// only global symbols in an index are allowed to use is_definition.
// The relationship may not get recorded if either symbol is local.
bool is_definition = 5;
// Update registerInverseRelationships on adding a new field here.
}
// SymbolRole declares what "role" a symbol has in an occurrence. A role is
// encoded as a bitset where each bit represents a different role. For example,
// to determine if the `Import` role is set, test whether the second bit of the
// enum value is defined. In pseudocode, this can be implemented with the
// logic: `const isImportRole = (role.value & SymbolRole.Import.value) > 0`.
enum SymbolRole {
// This case is not meant to be used; it only exists to avoid an error
// from the Protobuf code generator.
UnspecifiedSymbolRole = 0;
// Is the symbol defined here? If not, then this is a symbol reference.
Definition = 0x1;
// Is the symbol imported here?
Import = 0x2;
// Is the symbol written here?
WriteAccess = 0x4;
// Is the symbol read here?
ReadAccess = 0x8;
// Is the symbol in generated code?
Generated = 0x10;
// Is the symbol in test code?
Test = 0x20;
}
enum SyntaxKind {
option allow_alias = true;
UnspecifiedSyntaxKind = 0;
// Comment, including comment markers and text
Comment = 1;
// `;` `.` `,`
PunctuationDelimiter = 2;
// (), {}, [] when used syntactically
PunctuationBracket = 3;
// `if`, `else`, `return`, `class`, etc.
Keyword = 4;
IdentifierKeyword = 4 [deprecated=true];
// `+`, `*`, etc.
IdentifierOperator = 5;
// non-specific catch-all for any identifier not better described elsewhere
Identifier = 6;
// Identifiers builtin to the language: `min`, `print` in Python.
IdentifierBuiltin = 7;
// Identifiers representing `null`-like values: `None` in Python, `nil` in Go.
IdentifierNull = 8;
// `xyz` in `const xyz = "hello"`
IdentifierConstant = 9;
// `var X = "hello"` in Go
IdentifierMutableGlobal = 10;
// Parameter definition and references
IdentifierParameter = 11;
// Identifiers for variable definitions and references within a local scope
IdentifierLocal = 12;
// Identifiers that shadow other identifiers in an outer scope
IdentifierShadowed = 13;
// Identifier representing a unit of code abstraction and/or namespacing.
//
// NOTE: This corresponds to a package in Go and JVM languages,
// and a module in languages like Python and JavaScript.
IdentifierNamespace = 14;
IdentifierModule = 14 [deprecated=true];
// Function references, including calls
IdentifierFunction = 15;
// Function definition only
IdentifierFunctionDefinition = 16;
// Macro references, including invocations
IdentifierMacro = 17;
// Macro definition only
IdentifierMacroDefinition = 18;
// non-builtin types
IdentifierType = 19;
// builtin types only, such as `str` for Python or `int` in Go
IdentifierBuiltinType = 20;
// Python decorators, c-like __attribute__
IdentifierAttribute = 21;
// `\b`
RegexEscape = 22;
// `*`, `+`
RegexRepeated = 23;
// `.`
RegexWildcard = 24;
// `(`, `)`, `[`, `]`
RegexDelimiter = 25;
// `|`, `-`
RegexJoin = 26;
// Literal strings: "Hello, world!"
StringLiteral = 27;
// non-regex escapes: "\t", "\n"
StringLiteralEscape = 28;
// datetimes within strings, special words within a string, `{}` in format strings
StringLiteralSpecial = 29;
// "key" in { "key": "value" }, useful for example in JSON
StringLiteralKey = 30;
// 'c' or similar, in languages that differentiate strings and characters
CharacterLiteral = 31;
// Literal numbers, both floats and integers
NumericLiteral = 32;
// `true`, `false`
BooleanLiteral = 33;
// Used for XML-like tags
Tag = 34;
// Attribute name in XML-like tags
TagAttribute = 35;
// Delimiters for XML-like tags
TagDelimiter = 36;
}
// Occurrence associates a source position with a symbol and/or highlighting
// information.
//
// If possible, indexers should try to bundle logically related information
// across occurrences into a single occurrence to reduce payload sizes.
message Occurrence {
// Source position of this occurrence. Must be exactly three or four
// elements:
//
// - Four elements: `[startLine, startCharacter, endLine, endCharacter]`
// - Three elements: `[startLine, startCharacter, endCharacter]`. The end line
// is inferred to have the same value as the start line.
//
// Line numbers and characters are always 0-based. Make sure to increment the
// line/character values before displaying them in an editor-like UI because
// editors conventionally use 1-based numbers.
//
// Historical note: the original draft of this schema had a `Range` message
// type with `start` and `end` fields of type `Position`, mirroring LSP.
// Benchmarks revealed that this encoding was inefficient and that we could
// reduce the total payload size of an index by 50% by using `repeated int32`
// instead. The `repeated int32` encoding is admittedly more embarrassing to
// work with in some programming languages but we hope the performance
// improvements make up for it.
repeated int32 range = 1;
// (optional) The symbol that appears at this position. See
// `SymbolInformation.symbol` for how to format symbols as strings.
string symbol = 2;
// (optional) Bitset containing `SymbolRole`s in this occurrence.
// See `SymbolRole`'s documentation for how to read and write this field.
int32 symbol_roles = 3;
// (optional) CommonMark-formatted documentation for this specific range. If
// empty, the `Symbol.documentation` field is used instead. One example
// where this field might be useful is when the symbol represents a generic
// function (with abstract type parameters such as `List<T>`) and at this
// occurrence we know the exact values (such as `List<String>`).
//
// This field can also be used for dynamically or gradually typed languages,
// which commonly allow for type-changing assignment.
repeated string override_documentation = 4;
// (optional) What syntax highlighting class should be used for this range?
SyntaxKind syntax_kind = 5;
// (optional) Diagnostics that have been reported for this specific range.
repeated Diagnostic diagnostics = 6;
}
// Represents a diagnostic, such as a compiler error or warning, which should be
// reported for a document.
message Diagnostic {
// Should this diagnostic be reported as an error, warning, info, or hint?
Severity severity = 1;
// (optional) Code of this diagnostic, which might appear in the user interface.
string code = 2;
// Message of this diagnostic.
string message = 3;
// (optional) Human-readable string describing the source of this diagnostic, e.g.
// 'typescript' or 'super lint'.
string source = 4;
repeated DiagnosticTag tags = 5;
}
enum Severity {
UnspecifiedSeverity = 0;
Error = 1;
Warning = 2;
Information = 3;
Hint = 4;
}
enum DiagnosticTag {
UnspecifiedDiagnosticTag = 0;
Unnecessary = 1;
Deprecated = 2;
}
// Language standardises names of common programming languages that can be used
// for the `Document.language` field. The primary purpose of this enum is to
// prevent a situation where we have a single programming language ends up with
// multiple string representations. For example, the C++ language uses the name
// "CPlusPlus" in this enum and other names such as "cpp" are incompatible.
// Feel free to send a pull-request to add missing programming languages.
enum Language {
UnspecifiedLanguage = 0;
ABAP = 60;
APL = 49;
Ada = 39;
Agda = 45;
AsciiDoc = 86;
Assembly = 58;
Awk = 66;
Bat = 68;
BibTeX = 81;
C = 34;
COBOL = 59;
CPP = 35; // C++ (the name "CPP" was chosen for consistency with LSP)
CSS = 26;
CSharp = 1;
Clojure = 8;
Coffeescript = 21;
CommonLisp = 9;
Coq = 47;
Dart = 3;
Delphi = 57;
Diff = 88;
Dockerfile = 80;
Dyalog = 50;
Elixir = 17;
Erlang = 18;
FSharp = 42;
Fish = 65;
Flow = 24;
Fortran = 56;
Git_Commit = 91;
Git_Config = 89;
Git_Rebase = 92;
Go = 33;
Groovy = 7;
HTML = 30;
Hack = 20;
Handlebars = 90;
Haskell = 44;
Idris = 46;
Ini = 72;
J = 51;
JSON = 75;
Java = 6;
JavaScript = 22;
JavaScriptReact = 93;
Jsonnet = 76;
Julia = 55;
Kotlin = 4;
LaTeX = 83;
Lean = 48;
Less = 27;
Lua = 12;
Makefile = 79;
Markdown = 84;
Matlab = 52;
Nix = 77;
OCaml = 41;
Objective_C = 36;
Objective_CPP = 37;
PHP = 19;
PLSQL = 70;
Perl = 13;
PowerShell = 67;
Prolog = 71;
Python = 15;
R = 54;
Racket = 11;
Raku = 14;
Razor = 62;
ReST = 85;
Ruby = 16;
Rust = 40;
SAS = 61;
SCSS = 29;
SML = 43;
SQL = 69;
Sass = 28;
Scala = 5;
Scheme = 10;
ShellScript = 64; // Bash
Skylark = 78;
Swift = 2;
TOML = 73;
TeX = 82;
TypeScript = 23;
TypeScriptReact = 94;
VisualBasic = 63;
Vue = 25;
Wolfram = 53;
XML = 31;
XSL = 32;
YAML = 74;
Zig = 38;
// NextLanguage = 95;
// Steps add a new language:
// 1. Copy-paste the "NextLanguage = N" line above
// 2. Increment "NextLanguage = N" to "NextLanguage = N+1"
// 3. Replace "NextLanguage = N" with the name of the new language.
// 4. Move the new language to the correct line above using alphabetical order
// 5. (optional) Add a brief comment behind the language if the name is not self-explanatory
}