-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathngram.py
89 lines (78 loc) · 2.38 KB
/
ngram.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import random
def nSplit(n):
s = open('log.txt', 'r').read()
length = len(s)
splittedList = []
tempString = ''
index = n
biIndex = 0
for letter in s:
if letter != '\n' and letter != '\r':
if letter == '`':
if len(tempString) > 0:
if tempString[len(tempString) - 1] == '`':
splittedList.append(tempString)
tempString = '`'
else:
tempString += '`'
else:
tempString += '`'
elif letter == ' ' and biIndex != index:
tempString += ' '
splittedList.append(tempString)
tempString = ''
biIndex = 0
elif letter ==' ' and biIndex == index:
tempString += ' '
splittedList.append(tempString)
tempString = ''
biIndex = 0
elif biIndex == index:
splittedList.append(tempString)
tempString = letter
biIndex = 1
else:
tempString += letter
biIndex += 1
else:
biIndex = 0
#tempString = ''
continue
splittedList.append(tempString)
#print splittedList
return splittedList
def addToDict(splitWords):
dict = {}
index = 0
while index < len(splitWords) - 1:
if index + 1 > len(splitWords) - 1:
break
elif splitWords[index] not in dict:
dict[splitWords[index]] = [splitWords[index + 1]]
else:
dict[splitWords[index]].append(splitWords[index + 1])
index += 1
#print dict
return dict
def njoin(dict):
starterWords = []
currentWord = ''
combinedString = ''
for word in dict.keys():
if word[0] == '`':
starterWords.append(word)
currentWord = random.sample(starterWords, 1)[0]
while True:
if currentWord[len(currentWord) - 1] == '`':
combinedString += currentWord
break
else:
combinedString += currentWord
currentWord = random.sample(dict[currentWord], 1)[0]
#print combinedString
return combinedString[1:-1]
def run(n = 3):
x = nSplit(n)
return njoin(addToDict(x))
print(run(4))
#print(str(addToDict(nSplit(4))))