Package core :: Module Prep
[hide private]
[frames] | no frames]

Source Code for Module core.Prep

  1  # -*- coding: cp1252 -*-
 
  2  '''
 
  3  Created 2012
 
  4  
 
  5  Contains various help functions which initialize / translate /preprocess the data
 
  6  
 
  7  
 
  8  @author: Sven Giese''' 
  9  
 
 10  import cPickle as pickle 
 11  import random 
 12  
 
 13  ''' INIT DICTIONARIES ''' 
 14  genetic_code={'GCT':'A', 'GCC':'A', 'GCA':'A', 'GCG':'A',
 
 15                'CGT':'R', 'CGC':'R', 'CGA':'R', 'CGG':'R', 'AGA':'R', 'AGG':'R',
 
 16                'AAT':'N', 'AAC':'N',
 
 17                'GAT':'D', 'GAC':'D',
 
 18                'TGT':'C', 'TGC':'C',
 
 19                'CAA':'Q', 'CAG':'Q',
 
 20                'GAA':'E', 'GAG':'E',
 
 21                'GGT':'G', 'GGC':'G','GGA':'G', 'GGG':'G',
 
 22                'CAT':'H', 'CAC':'H',
 
 23                'ATT':'I', 'ATC':'I','ATA':'I',
 
 24                'ATG':'M',
 
 25                'TTA':'L', 'TTG':'L', 'CTT':'L', 'CTC':'L', 'CTA':'L', 'CTG':'L',
 
 26                'AAA':'K', 'AAG':'K',
 
 27                'TTT':'F', 'TTC':'F',
 
 28                'CCT':'P', 'CCC':'P','CCA':'P', 'CCG':'P',
 
 29                'TCT':'S', 'TCC':'S', 'TCA':'S', 'TCG':'S', 'AGT':'S', 'AGC':'S',
 
 30                'ACT':'T', 'ACC':'T','ACA':'T', 'ACG':'T',
 
 31                'TGG':'W',
 
 32                'TAT':'Y', 'TAC':'Y',
 
 33                'GTT':'V', 'GTC':'V','GTA':'V', 'GTG':'V',
 
 34                'TAA':'*', 'TGA':'*','TAG':'*','NNN':'n'} 
 35  
 
 36  
 
 37  
 
38 -def createdic(AAsequence):
39 """ 40 Creates the dictionary for the AA triplets and searches the starting indices 41 of the triplets in the given aminoacid sequence. 42 43 @type AAsequence: string 44 @param AAsequence: aminoacid sequence 45 @rtype: dictionary 46 @return: A dictionary with starting positions of each triplet in the given AA sequence 47 48 """ 49 50 liste = ["A","R","N","D","C","E","Q","G","H","I","L","K","M","F","P","S","T","W","Y","V","*"] 51 aa_triplets = {} 52 53 # create all possibilities (triplets) 54 for i in range(0,len(liste)): 55 for k in range(0,len(liste)): 56 for l in range(0,len(liste)): 57 aa_triplets[liste[i]+liste[k]+liste[l]]= [] 58 59 # create lookup dic 60 # key = triplet 61 # value = list of positions 62 for i in range(1,len(AAsequence),3): 63 if i+3 > len(AAsequence): 64 break 65 if AAsequence[i:i+3] in aa_triplets: 66 aa_triplets[AAsequence[i:i+3]].append(i) 67 return(aa_triplets)
68 69 70 71
72 -def isvalidtriplet(codon,dictentry):
73 """ 74 Function which checks if a given triplet has max hamming distance of 1 75 to a other triplet. Used for generation of possible substitutions triplets 76 77 @type codon: string 78 @param codon: nucleotide triplet 79 @type dictentry: string 80 @param dictentry: nucleotide triplet 81 @rtype: bool 82 @return: Boolean value. True if max hamming distance 1,else False . 83 84 """ 85 counter = 0 86 87 for i in range (0,3): 88 89 if codon[i]== dictentry[i]: 90 counter+=1 91 else: 92 continue 93 94 if counter == 2: 95 return (True) 96 else: 97 return (False)
98
99 -def trans_seq(DNA):
100 """ 101 Funtion which translates DNA to AA 102 103 @type DNA: list 104 @param DNA: nucleotide sequence 105 @rtype: prot,rest 106 @return: Translated aminoacid sequence,untranslated nucleotide sequence 107 """ 108 protein=[] 109 prot = "" 110 rest="" 111 112 DNA = "".join(DNA) 113 for i in range(0,len(DNA),3): 114 # Codon exceeds length 115 if(i+3 > len(DNA)): 116 rest +=DNA[i:i+3] 117 118 break 119 #' found Ns in nucleotid string 120 if("N" in DNA[i:i+3]): 121 a_a = "n" 122 protein.append(a_a) 123 else: 124 #standard triplet translation 125 codon=DNA[i:i+3] 126 # look codon up in translation dic 127 a_a=genetic_code[codon] 128 protein.append(a_a) 129 130 # transform to string 131 prot = "".join(protein) 132 return (prot,rest)
133 134 ''' DEBUG HELP FUNCTIONS ''' 135 136
137 -def savepickle(dictionary,outputname):
138 """ 139 basic pickle functions. actually for debugging and to speed up multiple simulations ( possible to load orf lists) 140 141 @type dictionary: dictionary 142 @param dictionary: Dictionary containg start and end positions of ORFs. 143 @type outputname: string 144 @param outputname: Filename for saving. 145 146 """ 147 pickle.dump( dictionary, open(outputname +".p", "wb" ) ) 148 print("Saved .pickle to: " + outputname +".p")
149
150 -def loadpickle(inputname):
151 """ 152 basic pickle functions. actually for debugging and to speed up multiple simulations ( possible to load orf lists) 153 154 155 @type inputname: string 156 @param inputname: Filename for loading. 157 @rtype: dictionary 158 @return: Dictionary containing start and end positions of ORFs. 159 """ 160 dictionary= pickle.load( open(inputname ))#+".p" ) ) 161 print("Loaded "+inputname+" pickle!") 162 return (dictionary)
163