-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathreformat_scx.py
executable file
·68 lines (57 loc) · 1.89 KB
/
reformat_scx.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/env python2
import collections
import sys
import common
def print_code_points(codes, scripts):
for start, end, category in common.group_by_gc(codes, gc):
if start == end:
range_str = '%04X' % start
name_str = name[start]
count_str = ''
else:
range_str = '%04X..%04X' % (start, end)
name_str = '%s..%s' % (name[start], name[end])
count_str = '[%d]' % (end - start + 1)
range_str = range_str.ljust(13)
count_str = count_str.rjust(5)
print '%s ; %s # %s %s %s' % (
range_str, scripts, category, count_str, name_str)
name = {}
gc = {}
with open('UnicodeData.txt') as uni_txt:
for line in uni_txt.readlines():
line = line.split(';')
code = int(line[0], 16)
name[code] = line[1]
gc[code] = line[2]
with open('ScriptExtensions.txt') as sxt_txt:
src_data = sxt_txt.readlines()
header = []
for line in src_data:
header.append(line)
if line == '# Property: Script_Extensions\n':
break
codes_for_scripts = collections.defaultdict(set)
for line in common.iterate_over_data(src_data):
codes, scripts = line.split(';')
codes = codes.strip()
scripts = scripts.strip()
codes = common.range_to_codes(codes)
scripts = frozenset(scripts.split(' '))
codes_for_scripts[scripts].update(codes)
script_sets = codes_for_scripts.keys()
script_sets.sort(key=common.script_set_to_sort_key)
sys.stdout.write(''.join(header))
for script_set in script_sets:
formatted_scripts = ' '.join(sorted(script_set))
print
print '# ================================================'
print
print '# Script_Extensions=%s' % formatted_scripts
print
codes = codes_for_scripts[script_set]
print_code_points(codes, formatted_scripts)
print
print '# Total code points: %d' % len(codes)
print
print '# EOF'