Module star
[hide private]
[frames] | no frames]

Source Code for Module star

  1  import sys 
  2  from re import match 
  3  from string import split 
  4   
5 -class star:
6 - def __init__(self):
7 "Class to extract model-free data from the STAR formatted mfout file."
8 9
10 - def extract(self, mfout, num_res, sse_lim='0.90', ftest_lim='0.80', large_sse=20, ftest='n'):
11 "Extract the data from the mfout file and Return it as a 2D data structure." 12 13 self.mfout = mfout 14 self.num_res = num_res 15 self.sse_lim = sse_lim 16 self.ftest_lim = ftest_lim 17 self.large_sse = large_sse 18 19 self.data = [] 20 for i in range(self.num_res): 21 self.data.append({}) 22 self.line_num = 0 23 24 if match('n', ftest): 25 # Jump to first line of data. 26 for line in range(len(self.mfout)): 27 self.row = [[]] 28 self.row[0] = split(self.mfout[line]) 29 try: 30 self.row[0][1] 31 except IndexError: 32 continue 33 if match('S2$', self.row[0][0]) and match('\(\)', self.row[0][1]): 34 self.line_num = line 35 break 36 self.get_s2() 37 self.get_s2f() 38 self.get_s2s() 39 self.get_te() 40 self.get_rex() 41 self.get_sse() 42 43 if match('y', ftest): 44 # Jump to first line of data. 45 for line in range(len(self.mfout)): 46 self.row = [[]] 47 self.row[0] = split(self.mfout[line]) 48 try: 49 self.row[0][0] 50 except IndexError: 51 continue 52 if match('data_F_dist', self.row[0][0]): 53 self.line_num = line 54 break 55 self.get_ftest() 56 57 return self.data
58 59
60 - def get_ftest(self):
61 self.line_num = self.line_num + 5 62 for i in range(self.num_res): 63 self.row = [[]] 64 self.row[0] = split(self.mfout[self.line_num]) 65 percentile = int(float(self.ftest_lim) * 100 / 5) 66 self.line_num = self.line_num + percentile 67 self.row.append(split(self.mfout[self.line_num])) 68 lines_next_res = 2 + ( 20 - percentile ) 69 self.line_num = self.line_num + lines_next_res 70 self.data[i]['res_num'] = self.row[0][0] 71 self.data[i]['fstat'] = float(self.row[0][1]) 72 self.data[i]['fstat_lim'] = float(self.row[1][1]) 73 if self.data[i]['fstat_lim'] < 1.5: 74 if self.data[i]['fstat'] > 1.5: 75 self.data[i]['ftest'] = 1 76 else: 77 self.data[i]['ftest'] = 0 78 elif self.data[i]['fstat_lim'] >= 1.5: 79 if self.data[i]['fstat'] > self.data[i]['fstat_lim']: 80 self.data[i]['ftest'] = 1 81 else: 82 self.data[i]['ftest'] = 0
83 84
85 - def get_rex(self):
86 self.line_num = self.line_num + self.num_res + 3 87 self.row = [[]] 88 self.row[0] = split(self.mfout[self.line_num]) 89 self.split_rows(self.line_num, self.num_res) 90 for i in range(self.num_res): 91 j = i + 1 92 self.data[i]['rex'] = self.row[j][1] 93 self.data[i]['rex_err'] = self.row[j][5]
94 95
96 - def get_s2(self):
97 self.split_rows(self.line_num, self.num_res) 98 for i in range(self.num_res): 99 j = i + 1 100 self.data[i]['res_num'] = self.row[j][0] 101 self.data[i]['s2'] = self.row[j][1] 102 self.data[i]['s2_err'] = self.row[j][5]
103 104
105 - def get_s2f(self):
106 self.line_num = self.line_num + self.num_res + 3 107 self.row = [[]] 108 self.row[0] = split(self.mfout[self.line_num]) 109 self.split_rows(self.line_num, self.num_res) 110 for i in range(self.num_res): 111 j = i + 1 112 self.data[i]['s2f'] = self.row[j][1] 113 self.data[i]['s2f_err'] = self.row[j][5]
114 115
116 - def get_s2s(self):
117 self.line_num = self.line_num + self.num_res + 3 118 self.row = [[]] 119 self.row[0] = split(self.mfout[self.line_num]) 120 self.split_rows(self.line_num, self.num_res) 121 for i in range(self.num_res): 122 j = i + 1 123 self.data[i]['s2s'] = self.row[j][1] 124 self.data[i]['s2s_err'] = self.row[j][5]
125 126
127 - def get_sse(self):
128 self.line_num = self.line_num + self.num_res + 8 129 for i in range(self.num_res): 130 self.row = [[]] 131 self.row[0] = split(self.mfout[self.line_num]) 132 percentile = int(float(self.sse_lim) * 100 / 5) 133 self.line_num = self.line_num + percentile 134 self.row.append(split(self.mfout[self.line_num])) 135 lines_next_res = 2 + ( 20 - percentile ) 136 self.line_num = self.line_num + lines_next_res 137 self.data[i]['sse'] = float(self.row[0][1]) 138 self.data[i]['sse_lim'] = float(self.row[1][1]) 139 140 # Chi-squared test. 141 if self.data[i]['sse'] <= self.data[i]['sse_lim']: 142 self.data[i]['sse_test'] = 1 143 else: 144 self.data[i]['sse_test'] = 0 145 146 # Large SSE test. 147 if self.data[i]['sse'] >= self.large_sse: 148 self.data[i]['large_sse'] = 1 149 else: 150 self.data[i]['large_sse'] = 0 151 152 # Zero SSE test. 153 if self.data[i]['sse'] == 0: 154 self.data[i]['zero_sse'] = 1 155 else: 156 self.data[i]['zero_sse'] = 0
157 158
159 - def get_te(self):
160 self.line_num = self.line_num + self.num_res + 3 161 self.row = [[]] 162 self.row[0] = split(self.mfout[self.line_num]) 163 self.split_rows(self.line_num, self.num_res) 164 for i in range(self.num_res): 165 j = i + 1 166 self.data[i]['te'] = self.row[j][1] 167 self.data[i]['te_err'] = self.row[j][5]
168 169
170 - def split_rows(self, line_num, num_lines):
171 "Get the next 'num_res' lines after a match." 172 173 for i in range(num_lines): 174 j = i + 1 175 self.row.append(split(self.mfout[line_num + j]))
176