Package core :: Module PlotData
[hide private]
[frames] | no frames]

Source Code for Module core.PlotData

  1  '''
 
  2  Created  2012
 
  3  @author: GieseS
 
  4  
 
  5  
 
  6  Little plotting script which is called in the analysis of different mappings to an artificial reference genome.
 
  7  It produces the following plots:
 
  8  
 
  9  
 
 10  
 
 11  
 
 12  1) ROC Curve
 
 13  2) Overview histograms for FP / TP.
 
 14  
 
 15  
 
 16  ''' 
 17  
 
 18  import numpy as np 
 19  import matplotlib.pyplot as plt 
 20  from mpl_toolkits.mplot3d import Axes3D 
 21  import random 
 22  import time 
 23  import pylab as p 
 24  
 
 25  ##### HELP FUNCTIONS ####
 
 26  
 
27 -def trapezoidal_rule(x, y):
28 """Approximates the integral through the points a,b""" 29 index = [i+1 for i in xrange(len(x)-1)] 30 xdiff = np.array([x[i]-x[i-1] for i in index]) 31 ysum = np.array([y[i]+y[i-1] for i in index]) 32 return(np.dot(xdiff,ysum)/2)
33 34 35 #### HELP FUNCTIONS END ############ 36 37 38 """ 39 abbreviations: 40 tp = True positives 41 fp = false positives 42 NM = number of mismtaches 43 mq = mapping quality 44 rq = readquality 45 subs = substitutions in artifical reference genome (ARG) 46 """ 47 48 49 50 ############################################################################### 51
52 -def CalculateRoc2(dataArray,prefix,readsize,uniquehits,mappedreads,filename):
53 """ 54 Calculates the adjusted ROC curve as well as the AUC value derived from the adjusted points 55 and writes the ROC tables to .txt files. 56 """ 57 starttime= time.time() 58 uniquehits = float(uniquehits) 59 readsize = float(readsize) 60 61 62 entries = len(dataArray) 63 64 65 resultmatrix = np.arange(entries*2) 66 resultmatrix = resultmatrix.reshape(2,entries) 67 68 maxrq = max(x.rq for x in dataArray) 69 maxnm = max(x.nm[0] for x in dataArray) 70 maxGaps= max(x.gaps[0] for x in dataArray) 71 maxMism= max(x.mism[0] for x in dataArray) 72 73 74 minrq = min(x.rq for x in dataArray) 75 minnm = min(x.nm[0] for x in dataArray) 76 minmq= min(x.mq[0] for x in dataArray) 77 minGaps= min(x.gaps[0] for x in dataArray) 78 minMism= min(x.mism[0] for x in dataArray) 79 80 81 # adjust stepsize for rq since the score behaves the other way 82 quants = [1,2,3,4,5] 83 tempa = maxrq-minrq 84 stepsize = tempa/5 85 86 rqQuants = [round(minrq+(i-1)*stepsize,3) for i in quants] 87 rqQuants.reverse() 88 rqQuants[-1] =0 # last entry is rounded bigger than the smallest in the dataset 89 90 nmQuants = [i*maxnm/5 for i in quants] 91 GapsQuants = [i*maxGaps/5 for i in quants] 92 MismQuants = [i*maxMism/5 for i in quants] 93 94 rocvector = [] 95 96 # i = NM,l = RQ, k = MQ 97 for l in quants: # RQ 98 for k in quants: # GAPS 99 for j in quants: # MISMATCH 100 temparray = [m for m in dataArray if m.gaps[0] <= GapsQuants[k-1] and m.mism[0] <= MismQuants[j-1] and m.rq >=rqQuants[l-1]] 101 102 103 tempids = [m.id for m in temparray] 104 uniquereads = {} 105 for i in xrange(0,len(tempids)): 106 uniquereads[tempids[i]] = "" 107 108 mappedreads = len(uniquereads) 109 110 111 112 templength = len(temparray) 113 114 if templength == 0: 115 continue 116 else: 117 tempTP = sum(x.mr[0] for x in temparray) 118 tempFP =templength-tempTP 119 F = round((float(mappedreads)/ readsize) ,3) 120 sens = round((tempTP/ uniquehits) * F,3) 121 if tempFP == 0: 122 spec = 0 123 else: 124 spec = round((tempFP / uniquehits) * F,3) 125 126 rocvector.append([rqQuants[l-1],GapsQuants[k-1],MismQuants[j-1],tempTP,tempFP,templength,sens,spec,F]) 127 128 #print ("%d\t%d\t%d\t" % (templength,tempTP,tempFP)) 129 130 #0 = NM 4 = TP 7 = sens 131 #1 = RQ 5 = FP 8 = 1-spec 132 #2 = GAPS 6 = P 9 = F 133 #append needed for last entry in AUC calculation 134 rocvector.append([0,0,0,0,0,0,0,0,0]) 135 nproc = np.array(rocvector) 136 137 #write the sens and specificity values from nproc according to the enumeration in line 149. 138 #specificity is in cell -2 139 # sensitivity is in cell -3 140 sens = [i[-3] for i in nproc] 141 spez = [i[-2] for i in nproc] 142 143 # adjust ROC curve. It is necessary that it the 1-specificity ends in 1. 144 # for the last record copy the predecessor in sens to it 145 # and write 1 to specificity 146 spez[-1] = 1 147 sens[-1] = sens[-2] 148 149 150 rocarray1 = np.array([sens,spez]) 151 rocarray1 = rocarray1.flatten('F') 152 rocarray1= rocarray1.reshape((len(spez),2)) 153 154 rocarray = np.array([sens,spez]) 155 rocarray = rocarray.flatten('F') 156 rocarray = rocarray.reshape((len(spez),2)) 157 rocarray = np.sort(rocarray.view('float,float'), order=['f0','f1'], axis=0).view(np.float) 158 159 rocarrayCorrected = rocarray 160 161 #print rocarrayCorrected 162 # project points where... 163 for m in range(len(rocarrayCorrected)-2,-1,-1): 164 if (rocarrayCorrected[m,1] >= rocarrayCorrected[m+1,1]): 165 rocarrayCorrected[m,1] = rocarrayCorrected[m+1,1] 166 167 168 #print rocarrayCorrected 169 plt.hold(True) 170 plt.figure() 171 plt.subplot(111) 172 #plt.scatter(spez, sens, c='b', marker='o', facecolor='red') 173 #plt.plot(rocarray[:,1], rocarray[:,0] 174 plt.plot(rocarrayCorrected[:,1],rocarrayCorrected[:,0], marker='o', markersize=7,linestyle='--', color='r', label='projected') 175 plt.plot(rocarray1[:,1], rocarray1[:,0], linestyle="None",label='real',marker='.',color='g') 176 plt.xlabel('1-specificity') 177 plt.ylabel('sensitivity') 178 plt.title(r'ROC:'+filename) 179 plt.axis([-0.1,1.1,-0.1,1.1]) 180 plt.grid(True) 181 plt.legend(loc='lower right') 182 plt.tight_layout() 183 plt.savefig(prefix + "_ROC.pdf",format='pdf') 184 plt.clf 185 186 187 AUC = trapezoidal_rule(rocarrayCorrected[:,1], rocarrayCorrected[:,0]) 188 189 fobj = open(prefix+"_roctable.txt","w") 190 fobj.write("RQ\tGAPS\tMM\tPTP\tFP\tP\tSn\t1-Sp\tF\r\n") 191 for i in xrange(0,len(rocvector),1): 192 temp = [str(k) for k in rocvector[i]] 193 tempstr = "\t".join(temp) 194 fobj.write(tempstr+"\r\n") 195 196 endtime= time.time() 197 print ("X"), 198 return(round(AUC,3))
199
200 -def plotOverviewHist(fp,tp,label,prefix,mappernames):
201 """Plots true positives and false positives into 2 different histogram subplots. """ 202 prefix2 = "/".join(prefix.split("/")[0:-1])+"/" 203 fobj = open(prefix2+"indexMappingTools.txt","w") 204 for i in range(0,len(label)): 205 fobj.write("%s - %s\r\n" %(i+1,mappernames[i])) 206 fobj.close() 207 208 x = [i for i in range(1,len(fp)*3,3)] 209 xmax = max(x)+1 210 ymaxTP = max(tp)+0.1 211 ymaxFP = max(fp)+0.1 212 213 ##### SUBPLOT NUMBER OF MISMATCHES #### 214 y = tp 215 x =x 216 z = fp 217 218 219 fig = p.figure() 220 # only plot every 2nd label 221 if len(label) <= 7: 222 widthp = 0.7 223 ticks = label 224 else: 225 widthp = 0.3 226 ticks = [i if i%2 == 0 else "" for i in label] 227 # Here we're adding 2 subplots. The grid is set 228 # up as one row, two columns. 229 ax1 = fig.add_subplot(1,2,1) 230 ax1.bar(x,y,width=widthp, facecolor='darkgreen') 231 ax1.set_ylabel('#TP hits') 232 ax1.set_xlabel('index mapping tool') 233 ax1.set_title("Global comparison #TP hits") 234 p.xticks(x,ticks) 235 p.grid(True) 236 p.axis([0,xmax,0,ymaxTP+ymaxTP*10/100]) 237 238 # on the second axis, make the width smaller (default is 0.8) 239 ax2 = fig.add_subplot(1,2,2) 240 ax2.bar(x,z,width=widthp, facecolor='darkred') 241 ax2.set_ylabel('#FP hits') 242 ax2.set_xlabel('index mapping tool') 243 ax2.set_title("Global comparison #FP hits") 244 p.axis([0,xmax,0,ymaxFP+ymaxFP*10/100]) 245 p.xticks(x,ticks) 246 p.grid(True) 247 plt.tight_layout() 248 p.savefig(prefix2 + "Overall_histabs.pdf",format='pdf') 249 p.clf() 250 251 252 tpsum =sum(tp) 253 fpsum =sum(fp) 254 y = [i/float(tpsum) for i in tp] 255 x =x 256 z = [i/float(fpsum) for i in fp] 257 ymax = max(max(z),max(y))+0.2 258 259 fig = p.figure() 260 # only plot every 2nd labelare provided 261 if len(label) <= 7: 262 ticks = label 263 else: 264 ticks = [i if i%2 == 0 else "" for i in label] 265 # Here we're adding 2 subplots. The grid is set 266 # up as one row, two columns. 267 ax1 = fig.add_subplot(1,2,1) 268 269 270 ax1.bar(x,y,width=widthp, facecolor='darkgreen') 271 ax1.set_ylabel('%TP hits') 272 ax1.set_xlabel('index mapping tool') 273 ax1.set_title("Global comparison %TP hits") 274 p.xticks(x,ticks) 275 p.grid(True) 276 p.axis([0,xmax,0,1.1]) 277 278 # on the second axis, make the width smaller (default is 0.8) 279 ax2 = fig.add_subplot(1,2,2) 280 ax2.bar(x,z,width=widthp, facecolor='darkred') 281 ax2.set_ylabel('%FP hits') 282 ax2.set_xlabel('index mapping tool') 283 ax2.set_title("Global comparison %FP hits") 284 p.axis([0,xmax,0,1.1]) 285 p.xticks(x,ticks) 286 p.grid(True) 287 plt.tight_layout() 288 p.savefig(prefix2 + "Overall_histper.pdf",format='pdf') 289 p.clf() 290 291 print ("X"),
292