-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmisc.py
72 lines (45 loc) · 1.63 KB
/
misc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import pickle
from gzip import open as gzip_open
from collections import defaultdict
# ------------------------------------------------------------------------------
# Openers
#
file_openers = {
"vert": open,
"txt": open,
"gz": gzip_open,
}
def get_opener(file_path):
""" Gets a proper opening function for certain file types. """
return file_openers[file_path.split(".")[-1]]
def use_opener(file_path, mode="r"):
return get_opener(file_path)(file_path, mode)
# ------------------------------------------------------------------------------
class LineCorpus(object):
def __init__(self, file_name):
self.file_name = file_name
def __iter__(self):
with use_opener(self.file_name) as f:
for line in f:
sentence = line.strip("\n\r\t ").split()
if len(sentence) > 0:
yield sentence
def corpus2vocab(corpus):
vocab = defaultdict(lambda: 0)
for sentence in corpus:
for word in sentence:
vocab[word] += 1
return vocab
def dump_sentences(sentences, output_file):
with use_opener(output_file, "w") as f:
for sentence in sentences:
f.write(" ".join(sentence) + "\n")
# ------------------------------------------------------------------------------
def save_report(report, dataset_name, model_name, formula, directory="reports/"):
parts = [dataset_name, model_name, formula]
name = ".".join(parts)
with open(directory + "/" + name + ".pickle", "w") as f:
pickle.dump(file=f, obj=report)
def load_report(name):
with open(name) as f:
return pickle.load(file=f)