1  """
 
  2  Created  2012
 
  3  @author: GieseS
 
  4  
 
  5  
 
  6  Little plotting script which is called in the analysis of different mappings to an artificial reference genome.
 
  7  It produces the following plots:
 
  8  
 
  9  
 
 10  
 
 11  
 
 12  1) ROC Curve
 
 13  2) Overview histograms for FP / TP.
 
 14  
 
 15  """ 
 16  
 
 17  
 
 19      """
 
 20      Function for reading a complete ReferenceFile into Memory.
 
 21      @type  ref: string
 
 22      @param ref: reference file.ArtRead
 
 23      @type  entries: int
 
 24      @param entries: size for np.array
 
 25      @type  compareList: list
 
 26      @param compareList: list containt 1s, where there is a difference in the genomes and 0s where the nucleotides are equal.
 
 27      @type  readdic: dictionary
 
 28      @param readdic: dictionary containing read ids and read qualities.
 
 29      @rtype:   array
 
 30      @return:  aligned read objects in an array.
 
 31      """ 
 32      
 
 33      AlignedReadRefArray = np.zeros(entries, dtype=object) 
 34      tp = 0 
 35      start = time.time() 
 36      start100k = time.time() 
 37      
 
 38      fobj = open(ref, "r") 
 39      refdic = {} 
 40      k = 0 
 41      
 
 42      
 
 43      for alignment in fobj: 
 44          k += 1 
 45          if k == 100000: 
 46              end100k = time.time() 
 47              print ("%f.." % (end100k - start100k)), 
 48  
 
 49          read, readname = isSaneAlignment(alignment, "ref", compareList, readdic) 
 50          if read == 0: 
 51              pass 
 52          
 
 53          else: 
 54              
 
 55              if AlignedReadRefArray[readdic[readname].internalID] != 0: 
 56                  
 
 57                  AlignedReadRefArray[readdic[readname].internalID].toObjself(read) 
 58                  
 
 59              else: 
 60                  
 
 61                  AlignedReadRefArray[readdic[readname].internalID] = read 
 62                  tp += 1 
 63                  
 
 64  
 
 65      fobj.close() 
 66      end = time.time() 
 67      
 
 68      print ("\t %f " % (end - start)), 
 69      return(AlignedReadRefArray, tp) 
  70  
 
 71  
 
 73      """
 
 74      Function for reading a complete ReferenceFile into Memory.
 
 75      @type  art: string
 
 76      @param art: artificial file.
 
 77      @type  RefArray: array
 
 78      @param RefArray: Results from reading the reference SAM file.
 
 79      @type  compareList: list
 
 80      @param compareList: list containt 1s, where there is a difference in the genomes and 0s where the nucleotides are equal.
 
 81      @type  readdic: dictionary
 
 82      @param readdic: dictionary containing read ids and read qualities.
 
 83      @rtype:   dictionary
 
 84      @return:  aligned read objects from the artificial reference, which where uniquely aligned.
 
 85      """ 
 86      
 
 87      start = time.time() 
 88      
 
 89      fobj = open(art, "r") 
 90      artdic = {} 
 91      k = 0 
 92      
 
 93      read = SkipHeader(fobj,compareList,readdic) 
 94      
 
 95      
 
 96      for alignment in fobj: 
 97          k += 1 
 98          if k % 1000000 == 0: 
 99              print ("%d.." %(k/1000000)), 
100          read, readname = isSaneAlignment(alignment, "art", compareList, readdic) 
101          if read == 0: 
102              pass 
103          else: 
104              
 
105              index = returnIndex(readdic, readname) 
106              if RefArray[index] != 0: 
107                  
 
108                  if (RefArray[index].isContained(read) == 0): 
109                      if readname in artdic: 
110                          artdic[readname].toObjself(read) 
111                      
 
112                      else: 
113                          artdic[readname] = read 
114                  else: 
115                      pass 
116                  
 
117              else: 
118                  
 
119                  artdic[readname] = read 
120             
 
121   
 
122      fobj.close() 
123      end = time.time() 
124      
 
125      print ("\t %f " % (end - start)), 
126      
 
127      return(artdic) 
 128