forked from Impavidity/relogic
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpointwise_test.py
43 lines (35 loc) · 1.26 KB
/
pointwise_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import json
from types import SimpleNamespace
import relogic.utils.crash_on_ipy
from relogic.logickit.base.constants import POINTWISE_TASK
from relogic.logickit.dataflow import TASK_TO_DATAFLOW_CLASS_MAP, PointwiseDataFlow
from relogic.logickit.tokenizer.tokenization import BertTokenizer
config = SimpleNamespace(
**{
"buckets": [(0, 15), (15, 40), (40, 450)],
"max_seq_length": 450,
"label_mapping_path": "data/preprocessed_data/binary_classification.json"
})
tokenizers = {
"BPE": BertTokenizer.from_pretrained("bert-base-multilingual-cased"),
}
dataflow: PointwiseDataFlow = TASK_TO_DATAFLOW_CLASS_MAP[POINTWISE_TASK](
task_name=POINTWISE_TASK,
config=config,
tokenizers=tokenizers,
label_mapping=json.load(open(config.label_mapping_path)))
examples = [{
"text_a": "bbc world service staff cuts",
"text_b":
"gossip day by day : bbc world service to cut five language services",
"label": "1"
}, {
"text_a": "barbara walters chicken pox",
"text_b":
"stoke city : begovic wilkinson shawcross wilson wilkinson walters whelan nzonzi kightly jerome crouch",
"label": "0"
}]
dataflow.update_with_jsons(examples)
for mb in dataflow.get_minibatches(minibatch_size=2):
print(mb)
raise NotImplementedError("You can start to play with data")