-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathimport_template.rb
345 lines (296 loc) · 10.1 KB
/
import_template.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
require 'yaml'
require 'tempfile'
module TraitDB
class ImportTemplate
DEFAULT_SET_DELIMITER = '::'
def initialize(file_path_or_url=nil)
@template_source = file_path_or_url
if source_is_readable_file?
read_template_from_file
elsif source_is_readable_url?
read_template_from_url
else
@template_source = nil
raise "Unable to load template file at #{file_path_or_url}"
end
end
# template name
def name
@config['template_name']
end
# taxonomy
def taxonomy_columns # a hash that maps constants to the CSV column names, reverse it to get a map the other way
@config['taxonomy_columns']
end
# metadata
def metadata_columns # a hash that maps constants to the CSV column names, reverse it to get a map the other way
@config['metadata_columns'] || {}
end
# options
def trait_options # a hash that includes things like source_prefix, require_source, and notes_prefix
@config['trait_options'] || {}
end
def trait_sets?
@config['trait_sets'].nil? ? false : true
end
# trait sets
def trait_set(path=[], tree=@config)
# path will be a list of names to follow
# tree must be a hash
if tree.nil?
# dead end
return {}
elsif path.length == 0
# Terminal
tree
else
# slice off the first element in the path and return the subtree
sliced_path = path[1..-1]
subtree = (tree['trait_sets'] || []).find{|x| x && x['name'] == path[0]}
trait_set(sliced_path,subtree)
end
end
def trait_set_names(path=[], tree=@config)
trait_set(path, tree)['trait_sets'].map{|x| x['name']}
end
def trait_set_continuous_traits(path=[], tree=@config)
trait_set(path, tree)['continuous_trait_columns']
end
def trait_set_categorical_traits(path=[], tree=@config)
trait_set(path, tree)['categorical_trait_columns']
end
# returns array of path components
def trait_set_qualified_continuous_trait_names
trait_set_qualified_trait_names([], @config, 'continuous_trait_columns')
end
# returns array of path components
def trait_set_qualified_categorical_trait_names
trait_set_qualified_trait_names([], @config, 'categorical_trait_columns')
end
def trait_path_from_column(column_name)
column_name.split(delimiter)
end
# trait names
def categorical_trait_column_names
if trait_sets?
# get name array paths, and join with delimiter
trait_set_qualified_categorical_trait_names.map{|n| n.join(delimiter) }
else
@config['categorical_trait_columns'].map{|x| x['name'] }
end
end
def continuous_trait_column_names
if trait_sets?
# get name array paths, and join with delimiter
trait_set_qualified_continuous_trait_names.map{|n| n.join(delimiter) }
else
@config['continuous_trait_columns'].map{|x| x['name'] }
end
end
def continuous_trait_names_ungrouped
if trait_sets?
names = []
trait_set_qualified_continuous_trait_names.each do |qname|
trait = continuous(qname.join(delimiter))
if trait
names << qname.join(delimiter)
end
end
names
else
@config['continuous_trait_columns'].map{|x| x['name']}
end
end
def categorical_trait_names_ungrouped
if trait_sets?
names = []
trait_set_qualified_categorical_trait_names.each do |qname|
trait = categorical(qname.join(delimiter))
if trait
names << qname.join(delimiter)
end
end
names
else
@config['categorical_trait_columns'].map{|x| x['name']}
end
end
def categorical_trait_names_in_group(group_name)
if trait_sets?
names = []
trait_set_qualified_categorical_trait_names.each do |qname|
trait = categorical(qname.join(delimiter))
if trait
names << qname.join(delimiter) if trait['groups'].include? group_name
end
end
names
else
@config['categorical_trait_columns'].select{|x| x['groups'].include? group_name}.map{|x| x['name']}
end
end
def continuous_trait_names_in_group(group_name)
if trait_sets?
names = []
trait_set_qualified_continuous_trait_names.each do |qname|
trait = continuous(qname.join(delimiter))
if trait
names << qname.join(delimiter) if trait['groups'].include? group_name
end
end
names
else
@config['continuous_trait_columns'].select{|x| x['groups'].include? group_name}.map{|x| x['name']}
end
end
# trait values
def categorical_trait_values(trait_name)
categorical(trait_name)['values']
end
# groups
def trait_group_names
@config['trait_groups'].map{|x| x['name']}
end
def trait_group_rank(group_name)
@config['trait_groups'].find{|x| x && x['name'] == group_name}['taxonomic_rank']
end
def trait_group_taxon_name(group_name)
@config['trait_groups'].find{|x| x && x['name'] == group_name}['taxon_name']
end
def groups_for_categorical_trait(trait_name)
t = categorical(trait_name)
t.nil? ? [] : t['groups']
end
def groups_for_continuous_trait(trait_name)
t = continuous(trait_name)
t.nil? ? [] : t['groups']
end
# formats
def continuous_trait_format(trait_name)
t = continuous(trait_name)
t.nil? ? [] : t['format']
end
def categorical_trait_format(trait_name)
t = categorical(trait_name)
t.nil? ? [] : t['format']
end
# Summarization method
def continuous_trait_summarization_method(trait_name)
t = continuous(trait_name)
t.nil? ? [] : t['summarization_method']
end
def categorical_trait_summarization_method(trait_name)
t = categorical(trait_name)
t.nil? ? [] : t['summarization_method']
end
def column_headers(group_name)
headers = []
headers += taxonomy_columns.values
source_prefix = trait_options['source_prefix']
require_source = trait_options['require_source']
notes_prefix = trait_options['notes_prefix']
categorical_trait_names_in_group(group_name).each do |categorical_trait_name|
headers << categorical_trait_name
headers << "#{source_prefix}#{categorical_trait_name}" if require_source
headers << "#{notes_prefix}#{categorical_trait_name}" if notes_prefix
end
continuous_trait_names_in_group(group_name).each do |continuous_trait_name|
headers << continuous_trait_name
headers << "#{source_prefix}#{continuous_trait_name}" if require_source
headers << "#{notes_prefix}#{continuous_trait_name}" if notes_prefix
end
headers += metadata_columns.values
headers
end
def all_column_headers
headers = []
headers += taxonomy_columns.values
source_prefix = trait_options['source_prefix']
require_source = trait_options['require_source']
notes_prefix = trait_options['notes_prefix']
categorical_trait_names_ungrouped.each do |categorical_trait_name|
headers << categorical_trait_name
headers << "#{source_prefix}#{categorical_trait_name}" if require_source
headers << "#{notes_prefix}#{categorical_trait_name}" if notes_prefix
end
continuous_trait_names_ungrouped.each do |continuous_trait_name|
headers << continuous_trait_name
headers << "#{source_prefix}#{continuous_trait_name}" if require_source
headers << "#{notes_prefix}#{continuous_trait_name}" if notes_prefix
end
headers += metadata_columns.values
headers
end
private
def delimiter
trait_options['set_delimiter'] || DEFAULT_SET_DELIMITER
end
def continuous(trait_name)
if trait_sets?
path = trait_path_from_column(trait_name)
t = trait_set_continuous_traits(path[0..-2])
trait_name = path[-1] #reassigning the name
else
t = @config['continuous_trait_columns']
end
t && t.find{|x| x && x['name'] == trait_name}
end
def categorical(trait_name)
if trait_sets?
path = trait_path_from_column(trait_name)
t = trait_set_categorical_traits(path[0..-2])
trait_name = path[-1] #reassigning the name
else
t = @config['categorical_trait_columns']
end
t && t.find{|x| x && x['name'] == trait_name}
end
# returns arrays of path components
def trait_set_qualified_trait_names(prefixes, tree, terminal_path)
# Build up an array of paths
# start at the root
if tree['trait_sets']
# have trait sets, recurse!
paths = []
tree['trait_sets'].each do |t|
paths += trait_set_qualified_trait_names(prefixes + [t['name']], t, terminal_path)
end
paths
elsif tree[terminal_path]
# At the tip, return the column names
names = tree[terminal_path].map{|x| x['name']}
names.map{|n| prefixes + [n]}
else
[]
end
end
def source_is_readable_file?
return false unless @template_source
return false unless File.exists?(@template_source)
return false unless File.readable?(@template_source)
return true
end
def source_is_readable_url?
return false unless @template_source
return false unless URI.parse(@template_source).is_a? URI::HTTP
return true
end
def read_template_from_url
tempfile = Tempfile.new('import-template')
d = Downloader.new(@template_source, tempfile.path)
root_object = YAML.load_file(d.downloaded_file)
tempfile.close!
read_config_from_yaml root_object
end
def read_template_from_file
root_object = YAML.load_file(@template_source)
read_config_from_yaml root_object
end
def read_config_from_yaml(root_object)
@config = root_object['traitdb_spreadsheet_template']
@config['continuous_trait_columns'] ||= []
@config['categorical_trait_columns'] ||= []
@config['trait_groups'] ||= []
end
end
end