1 """
2 Created 2012
3 @author: GieseS
4
5
6 Little plotting script which is called in the analysis of different mappings to an artificial reference genome.
7 It produces the following plots:
8
9
10
11
12 1) ROC Curve
13 2) Overview histograms for FP / TP.
14
15 """
16
17
19 """
20 Function for reading a complete ReferenceFile into Memory.
21 @type ref: string
22 @param ref: reference file.ArtRead
23 @type entries: int
24 @param entries: size for np.array
25 @type compareList: list
26 @param compareList: list containt 1s, where there is a difference in the genomes and 0s where the nucleotides are equal.
27 @type readdic: dictionary
28 @param readdic: dictionary containing read ids and read qualities.
29 @rtype: array
30 @return: aligned read objects in an array.
31 """
32
33 AlignedReadRefArray = np.zeros(entries, dtype=object)
34 tp = 0
35 start = time.time()
36 start100k = time.time()
37
38 fobj = open(ref, "r")
39 refdic = {}
40 k = 0
41
42
43 for alignment in fobj:
44 k += 1
45 if k == 100000:
46 end100k = time.time()
47 print ("%f.." % (end100k - start100k)),
48
49 read, readname = isSaneAlignment(alignment, "ref", compareList, readdic)
50 if read == 0:
51 pass
52
53 else:
54
55 if AlignedReadRefArray[readdic[readname].internalID] != 0:
56
57 AlignedReadRefArray[readdic[readname].internalID].toObjself(read)
58
59 else:
60
61 AlignedReadRefArray[readdic[readname].internalID] = read
62 tp += 1
63
64
65 fobj.close()
66 end = time.time()
67
68 print ("\t %f " % (end - start)),
69 return(AlignedReadRefArray, tp)
70
71
73 """
74 Function for reading a complete ReferenceFile into Memory.
75 @type art: string
76 @param art: artificial file.
77 @type RefArray: array
78 @param RefArray: Results from reading the reference SAM file.
79 @type compareList: list
80 @param compareList: list containt 1s, where there is a difference in the genomes and 0s where the nucleotides are equal.
81 @type readdic: dictionary
82 @param readdic: dictionary containing read ids and read qualities.
83 @rtype: dictionary
84 @return: aligned read objects from the artificial reference, which where uniquely aligned.
85 """
86
87 start = time.time()
88
89 fobj = open(art, "r")
90 artdic = {}
91 k = 0
92
93 read = SkipHeader(fobj,compareList,readdic)
94
95
96 for alignment in fobj:
97 k += 1
98 if k % 1000000 == 0:
99 print ("%d.." %(k/1000000)),
100 read, readname = isSaneAlignment(alignment, "art", compareList, readdic)
101 if read == 0:
102 pass
103 else:
104
105 index = returnIndex(readdic, readname)
106 if RefArray[index] != 0:
107
108 if (RefArray[index].isContained(read) == 0):
109 if readname in artdic:
110 artdic[readname].toObjself(read)
111
112 else:
113 artdic[readname] = read
114 else:
115 pass
116
117 else:
118
119 artdic[readname] = read
120
121
122 fobj.close()
123 end = time.time()
124
125 print ("\t %f " % (end - start)),
126
127 return(artdic)
128