forked from sriniavireddy/CSE282
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCompareClusteringNoHardStop.py
94 lines (62 loc) · 3.66 KB
/
CompareClusteringNoHardStop.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import pickle
import kmers
import Clustering
from BipartiteMatching import get_bipartite_matching
def output_seq_kmer_mapping_list(input_name, consensus_mapping_list, cluster_dict):
sequence_kmer_mapping_list = []
matching_score_sum = 0
mapping_score_sum = 0
for consensus_mapping in consensus_mapping_list:
consensus_kmer = consensus_mapping[0]
consensus_matching_score = consensus_mapping[1]
matching_score_sum += consensus_matching_score
design_kmer = consensus_mapping[2]
sequence_kmer_list = cluster_dict[consensus_kmer]
for sequence_kmer in sequence_kmer_list:
mapping_score_sum += consensus_matching_score
sequence_kmer_mapping_list.append((sequence_kmer, consensus_matching_score, design_kmer))
output_list_file_name = "no_hard_stop" + input_name + "_mapping.p"
pickle.dump(sequence_kmer_mapping_list, open(output_list_file_name, "wb"))
score_dict = {"matching_score_sum": matching_score_sum, "mapping_score_sum": mapping_score_sum}
output_score_file_name = "no_hard_stop" + input_name + "_score.p"
pickle.dump(score_dict, open(output_score_file_name, "wb"))
def main():
prefix_500 = "500"
prefix_1000 = "1000"
prefix_2000 = "2000"
suffix_start_count = 1
suffix_end_count = 10
design_kmer_list = kmers.get_design_kmers()
use_cluster_size_hard_stop = False
for suffix_count in range(suffix_start_count, suffix_end_count + 1):
input_file_name = prefix_500 + '_' + str(suffix_count)
sequence_kmer_list = kmers.get_sequence_kmers(input_file_name)
cluster_dict = Clustering.get_cluster_dict(sequence_kmer_list,
design_kmer_list,
use_cluster_size_hard_stop)
pickle.dump(cluster_dict, open("clusters_no_hard_stop" + input_file_name + ".p", "wb"))
consensus_kmer_list = cluster_dict.keys()
consensus_mapping_list = get_bipartite_matching(consensus_kmer_list, design_kmer_list)
output_seq_kmer_mapping_list(input_file_name, consensus_mapping_list, cluster_dict)
for suffix_count in range(suffix_start_count, suffix_end_count + 1):
input_file_name = prefix_1000 + '_' + str(suffix_count)
sequence_kmer_list = kmers.get_sequence_kmers(input_file_name)
cluster_dict = Clustering.get_cluster_dict(sequence_kmer_list,
design_kmer_list,
use_cluster_size_hard_stop)
pickle.dump(cluster_dict, open("clusters_no_hard_stop" + input_file_name + ".p", "wb"))
consensus_kmer_list = cluster_dict.keys()
consensus_mapping_list = get_bipartite_matching(consensus_kmer_list, design_kmer_list)
output_seq_kmer_mapping_list(input_file_name, consensus_mapping_list, cluster_dict)
for suffix_count in range(suffix_start_count, suffix_end_count + 1):
input_file_name = prefix_2000 + '_' + str(suffix_count)
sequence_kmer_list = kmers.get_sequence_kmers(input_file_name)
cluster_dict = Clustering.get_cluster_dict(sequence_kmer_list,
design_kmer_list,
use_cluster_size_hard_stop)
pickle.dump(cluster_dict, open("clusters_no_hard_stop" + input_file_name + ".p", "wb"))
consensus_kmer_list = cluster_dict.keys()
consensus_mapping_list = get_bipartite_matching(consensus_kmer_list, design_kmer_list)
output_seq_kmer_mapping_list(input_file_name, consensus_mapping_list, cluster_dict)
if __name__ == "__main__":
main()