1  '''
 
  2  Created  2012
 
  3  @author: GieseS
 
  4  
 
  5  
 
  6  Little plotting script which is called in the analysis of different mappings to an artificial reference genome.
 
  7  It produces the following plots:
 
  8  
 
  9  
 
 10  
 
 11  
 
 12  1) ROC Curve
 
 13  2) Overview histograms for FP / TP.
 
 14  
 
 15  
 
 16  ''' 
 17  
 
 18  import numpy as np 
 19  import matplotlib.pyplot as plt 
 20  from mpl_toolkits.mplot3d import Axes3D 
 21  import random 
 22  import time 
 23  import pylab as p 
 24  
 
 25  
 
 26  
 
 28      """Approximates the integral through the points a,b""" 
 29      index =  [i+1 for i in xrange(len(x)-1)] 
 30      xdiff = np.array([x[i]-x[i-1] for i in index]) 
 31      ysum = np.array([y[i]+y[i-1] for i in index]) 
 32      return(np.dot(xdiff,ysum)/2) 
  33  
 
 34  
 
 35  
 
 36  
 
 37  
 
 38  """ 
 
 39  abbreviations:
 
 40  tp = True positives
 
 41  fp = false positives
 
 42  NM = number of mismtaches
 
 43  mq = mapping quality
 
 44  rq = readquality
 
 45  subs = substitutions in artifical reference genome (ARG)
 
 46  """ 
 47  
 
 48  
 
 49              
 
 50  
 
 51  
 
 52 -def CalculateRoc2(dataArray,prefix,readsize,uniquehits,mappedreads,filename): 
  53      """
 
 54      Calculates the adjusted ROC curve as well as the AUC value derived from the adjusted points
 
 55      and writes the ROC tables to .txt files. 
 
 56      """ 
 57      starttime= time.time() 
 58      uniquehits = float(uniquehits) 
 59      readsize = float(readsize) 
 60      
 
 61      
 
 62      entries = len(dataArray) 
 63          
 
 64  
 
 65      resultmatrix = np.arange(entries*2) 
 66      resultmatrix = resultmatrix.reshape(2,entries) 
 67          
 
 68      maxrq =  max(x.rq for x in dataArray) 
 69      maxnm =  max(x.nm[0] for x in dataArray) 
 70      maxGaps=  max(x.gaps[0] for x in dataArray) 
 71      maxMism=  max(x.mism[0] for x in dataArray) 
 72      
 
 73          
 
 74      minrq =  min(x.rq for x in dataArray) 
 75      minnm =  min(x.nm[0] for x in dataArray) 
 76      minmq=  min(x.mq[0] for x in dataArray) 
 77      minGaps=  min(x.gaps[0] for x in dataArray)   
 78      minMism=  min(x.mism[0] for x in dataArray)       
 79      
 
 80      
 
 81      
 
 82      quants = [1,2,3,4,5] 
 83      tempa = maxrq-minrq 
 84      stepsize = tempa/5 
 85          
 
 86      rqQuants = [round(minrq+(i-1)*stepsize,3) for i in quants] 
 87      rqQuants.reverse() 
 88      rqQuants[-1] =0  
 89          
 
 90      nmQuants = [i*maxnm/5 for i in quants] 
 91      GapsQuants = [i*maxGaps/5 for i in quants] 
 92      MismQuants = [i*maxMism/5 for i in quants] 
 93  
 
 94      rocvector = [] 
 95          
 
 96      
 
 97      for l in quants:  
 98                  for k in quants:  
 99                      for j in quants:  
100                              temparray = [m for m in dataArray if m.gaps[0] <= GapsQuants[k-1] and m.mism[0] <= MismQuants[j-1]  and m.rq >=rqQuants[l-1]] 
101                              
 
102  
 
103                              tempids = [m.id for m in temparray] 
104                              uniquereads = {} 
105                              for i in xrange(0,len(tempids)): 
106                                  uniquereads[tempids[i]] = "" 
107  
 
108                              mappedreads = len(uniquereads) 
109                              
 
110   
 
111                              
 
112                              templength = len(temparray) 
113                              
 
114                              if templength == 0: 
115                                  continue 
116                              else: 
117                                  tempTP = sum(x.mr[0] for x in temparray) 
118                                  tempFP =templength-tempTP 
119                                  F = round((float(mappedreads)/ readsize) ,3) 
120                                  sens = round((tempTP/ uniquehits) * F,3) 
121                                  if tempFP == 0: 
122                                      spec = 0 
123                                  else: 
124                                      spec = round((tempFP / uniquehits) * F,3)                     
125                                  
 
126                                  rocvector.append([rqQuants[l-1],GapsQuants[k-1],MismQuants[j-1],tempTP,tempFP,templength,sens,spec,F]) 
127                          
 
128                      
 
129  
 
130      
 
131      
 
132      
 
133      
 
134      rocvector.append([0,0,0,0,0,0,0,0,0])                 
135      nproc = np.array(rocvector) 
136      
 
137      
 
138      
 
139      
 
140      sens =  [i[-3] for i in nproc] 
141      spez =  [i[-2] for i in nproc] 
142      
 
143      
 
144      
 
145      
 
146      spez[-1] = 1 
147      sens[-1] = sens[-2] 
148      
 
149  
 
150      rocarray1 = np.array([sens,spez]) 
151      rocarray1 = rocarray1.flatten('F') 
152      rocarray1= rocarray1.reshape((len(spez),2)) 
153      
 
154      rocarray = np.array([sens,spez]) 
155      rocarray = rocarray.flatten('F') 
156      rocarray = rocarray.reshape((len(spez),2)) 
157      rocarray = np.sort(rocarray.view('float,float'), order=['f0','f1'], axis=0).view(np.float) 
158      
 
159      rocarrayCorrected = rocarray 
160      
 
161      
 
162      
 
163      for m in range(len(rocarrayCorrected)-2,-1,-1): 
164          if (rocarrayCorrected[m,1] >= rocarrayCorrected[m+1,1]): 
165              rocarrayCorrected[m,1] = rocarrayCorrected[m+1,1] 
166  
 
167              
 
168      
 
169      plt.hold(True) 
170      plt.figure() 
171      plt.subplot(111) 
172      
 
173      
 
174      plt.plot(rocarrayCorrected[:,1],rocarrayCorrected[:,0], marker='o', markersize=7,linestyle='--', color='r', label='projected') 
175      plt.plot(rocarray1[:,1], rocarray1[:,0], linestyle="None",label='real',marker='.',color='g') 
176      plt.xlabel('1-specificity') 
177      plt.ylabel('sensitivity') 
178      plt.title(r'ROC:'+filename) 
179      plt.axis([-0.1,1.1,-0.1,1.1]) 
180      plt.grid(True) 
181      plt.legend(loc='lower right') 
182      plt.tight_layout() 
183      plt.savefig(prefix + "_ROC.pdf",format='pdf') 
184      plt.clf   
185      
 
186      
 
187      AUC = trapezoidal_rule(rocarrayCorrected[:,1], rocarrayCorrected[:,0]) 
188      
 
189      fobj =  open(prefix+"_roctable.txt","w") 
190      fobj.write("RQ\tGAPS\tMM\tPTP\tFP\tP\tSn\t1-Sp\tF\r\n") 
191      for i in xrange(0,len(rocvector),1): 
192          temp = [str(k) for k in rocvector[i]] 
193          tempstr = "\t".join(temp) 
194          fobj.write(tempstr+"\r\n") 
195  
 
196      endtime= time.time() 
197      print ("X"), 
198      return(round(AUC,3)) 
 199  
 
201      """Plots true positives and false positives into 2 different histogram subplots. """ 
202      prefix2 = "/".join(prefix.split("/")[0:-1])+"/" 
203      fobj = open(prefix2+"indexMappingTools.txt","w") 
204      for i in range(0,len(label)): 
205          fobj.write("%s - %s\r\n" %(i+1,mappernames[i])) 
206      fobj.close() 
207      
 
208      x = [i for i in range(1,len(fp)*3,3)] 
209      xmax = max(x)+1 
210      ymaxTP = max(tp)+0.1 
211      ymaxFP = max(fp)+0.1 
212  
 
213      
 
214      y = tp 
215      x =x 
216      z = fp 
217  
 
218   
 
219      fig = p.figure() 
220      
 
221      if len(label) <= 7: 
222          widthp = 0.7 
223          ticks = label 
224      else: 
225          widthp = 0.3 
226          ticks = [i if i%2 == 0 else "" for i in label] 
227      
 
228      
 
229      ax1 = fig.add_subplot(1,2,1) 
230      ax1.bar(x,y,width=widthp, facecolor='darkgreen') 
231      ax1.set_ylabel('#TP hits') 
232      ax1.set_xlabel('index mapping tool') 
233      ax1.set_title("Global comparison #TP hits") 
234      p.xticks(x,ticks) 
235      p.grid(True) 
236      p.axis([0,xmax,0,ymaxTP+ymaxTP*10/100]) 
237      
 
238      
 
239      ax2 = fig.add_subplot(1,2,2) 
240      ax2.bar(x,z,width=widthp, facecolor='darkred') 
241      ax2.set_ylabel('#FP hits') 
242      ax2.set_xlabel('index mapping tool') 
243      ax2.set_title("Global comparison #FP hits") 
244      p.axis([0,xmax,0,ymaxFP+ymaxFP*10/100]) 
245      p.xticks(x,ticks) 
246      p.grid(True) 
247      plt.tight_layout() 
248      p.savefig(prefix2 + "Overall_histabs.pdf",format='pdf') 
249      p.clf() 
250      
 
251      
 
252      tpsum =sum(tp) 
253      fpsum =sum(fp) 
254      y = [i/float(tpsum) for i in tp] 
255      x =x 
256      z = [i/float(fpsum) for i in fp] 
257      ymax = max(max(z),max(y))+0.2 
258      
 
259      fig = p.figure() 
260      
 
261      if len(label) <= 7: 
262          ticks = label 
263      else: 
264          ticks = [i if i%2 == 0 else "" for i in label] 
265      
 
266      
 
267      ax1 = fig.add_subplot(1,2,1) 
268  
 
269     
 
270      ax1.bar(x,y,width=widthp, facecolor='darkgreen') 
271      ax1.set_ylabel('%TP hits') 
272      ax1.set_xlabel('index mapping tool') 
273      ax1.set_title("Global comparison %TP hits") 
274      p.xticks(x,ticks) 
275      p.grid(True) 
276      p.axis([0,xmax,0,1.1]) 
277      
 
278      
 
279      ax2 = fig.add_subplot(1,2,2) 
280      ax2.bar(x,z,width=widthp, facecolor='darkred') 
281      ax2.set_ylabel('%FP hits') 
282      ax2.set_xlabel('index mapping tool') 
283      ax2.set_title("Global comparison %FP hits") 
284      p.axis([0,xmax,0,1.1]) 
285      p.xticks(x,ticks) 
286      p.grid(True) 
287      plt.tight_layout() 
288      p.savefig(prefix2 + "Overall_histper.pdf",format='pdf') 
289      p.clf() 
290      
 
291      print ("X"), 
 292