-
Notifications
You must be signed in to change notification settings - Fork 57
/
Copy pathfast_load_utils.js
131 lines (120 loc) · 6.32 KB
/
fast_load_utils.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
// This file stores only functionality that is required for idle operation of Rainbow CSV i.e. autodetection only.
// We want to avoid loading/parsing a lot of JS code in cases where we don't have any CSV files to work with.
const csv_utils = require('./rbql_core/rbql-js/csv_utils.js');
class AssertionError extends Error {}
function assert(condition, message=null) {
if (!condition) {
if (!message) {
message = 'Assertion error';
}
throw new AssertionError(message);
}
}
class RecordTextConsumer {
// The only purpose of this class is to avoid code duplication when dealing with leftover lines in line_aggregator (the second `consume` call).
constructor(delim, policy, stop_on_warning, collect_records, preserve_quotes_and_whitespaces, detect_trailing_spaces, min_num_fields_for_autodetection) {
this.delim = delim;
this.policy = policy;
this.stop_on_warning = stop_on_warning;
this.first_defective_line = null;
this.records = collect_records ? [] : null;
this.collect_records = collect_records;
this.num_records_parsed = 0;
this.fields_info = new Map();
this.first_trailing_space_line = null;
this.detect_trailing_spaces = detect_trailing_spaces;
this.preserve_quotes_and_whitespaces = preserve_quotes_and_whitespaces;
this.min_num_fields_for_autodetection = min_num_fields_for_autodetection;
}
consume(record_text, record_start_line) {
let [record, warning] = csv_utils.smart_split(record_text, this.delim, this.policy, this.preserve_quotes_and_whitespaces);
if (warning) {
if (this.first_defective_line === null) {
this.first_defective_line = record_start_line;
}
if (this.stop_on_warning)
return /*can_continue=*/false;
}
if (this.detect_trailing_spaces && this.first_trailing_space_line === null) {
for (let field of record) {
if (field.length && (field.charAt(0) == ' ' || field.charAt(field.length - 1) == ' ')) {
this.first_trailing_space_line = record_start_line;
}
}
}
if (!this.fields_info.has(record.length)) {
this.fields_info.set(record.length, this.num_records_parsed);
if (this.min_num_fields_for_autodetection != -1) {
// Autodetection mode: stop on inconsistent records length and when there is not enough columns (typically less than 2 i.e. 1).
if (record.length < this.min_num_fields_for_autodetection)
return /*can_continue=*/false;
if (this.fields_info.size > 1)
return /*can_continue=*/false;
}
}
if (this.collect_records) {
this.records.push(record);
}
this.num_records_parsed += 1;
return /*can_continue=*/true;
}
}
function parse_document_records(document, delim, policy, comment_prefix=null, stop_on_warning=false, max_records_to_parse=-1, collect_records=true, preserve_quotes_and_whitespaces=false, detect_trailing_spaces=false, min_num_fields_for_autodetection=-1) {
// TODO consider switching to a single row_info array format that would have (comment, record_ranges and the record itself) - this would make it more compatible with the incremental parsing functions.
let num_lines = document.lineCount;
let record_start_line = 0;
let line_aggregator = new csv_utils.MultilineRecordAggregator(comment_prefix);
let consumer = new RecordTextConsumer(delim, policy, stop_on_warning, collect_records, preserve_quotes_and_whitespaces, detect_trailing_spaces, min_num_fields_for_autodetection);
let comments = []; // An ordered list of {record_no, comment_text} tuples which can be merged with the records later.
for (let lnum = 0; lnum < num_lines; ++lnum) {
let line_text = document.lineAt(lnum).text;
if (lnum + 1 >= num_lines && line_text == "") {
if (collect_records) {
// Treat the last empty line as a comment - this is to prevent align/shrink functions from removing it.
comments.push({record_num: consumer.num_records_parsed, comment_text: line_text});
}
break; // Skip the last empty line.
}
let record_text = null;
if (policy == 'quoted_rfc') {
line_aggregator.add_line(line_text);
if (line_aggregator.has_comment_line) {
record_start_line = lnum + 1;
line_aggregator.reset();
if (collect_records) {
comments.push({record_num: consumer.num_records_parsed, comment_text: line_text});
}
continue;
} else if (line_aggregator.has_full_record) {
record_text = line_aggregator.get_full_line('\n');
line_aggregator.reset();
} else {
continue;
}
} else {
if (comment_prefix && line_text.startsWith(comment_prefix)) {
record_start_line = lnum + 1;
if (collect_records) {
comments.push({record_num: consumer.num_records_parsed, comment_text: line_text});
}
continue;
} else {
record_text = line_text;
}
}
if (!consumer.consume(record_text, record_start_line)) {
return [consumer.records, consumer.num_records_parsed, consumer.fields_info, consumer.first_defective_line, consumer.first_trailing_space_line, comments];
}
record_start_line = lnum + 1;
if (max_records_to_parse !== -1 && consumer.num_records_parsed >= max_records_to_parse) {
return [consumer.records, consumer.num_records_parsed, consumer.fields_info, consumer.first_defective_line, consumer.first_trailing_space_line, comments];
}
}
if (line_aggregator.is_inside_multiline_record()) {
assert(policy == 'quoted_rfc');
consumer.consume(line_aggregator.get_full_line('\n'), record_start_line);
}
return [consumer.records, consumer.num_records_parsed, consumer.fields_info, consumer.first_defective_line, consumer.first_trailing_space_line, comments];
}
module.exports.parse_document_records = parse_document_records;
module.exports.assert = assert;