Package lib :: Package spectrum :: Module nmrpipe
[hide private]
[frames] | no frames]

Source Code for Module lib.spectrum.nmrpipe

  1  ############################################################################### 
  2  #                                                                             # 
  3  # Copyright (C) 2013-2014,2017 Troels E. Linnet                               # 
  4  # Copyright (C) 2013-2014 Edward d'Auvergne                                   # 
  5  #                                                                             # 
  6  # This file is part of the program relax (http://www.nmr-relax.com).          # 
  7  #                                                                             # 
  8  # This program is free software: you can redistribute it and/or modify        # 
  9  # it under the terms of the GNU General Public License as published by        # 
 10  # the Free Software Foundation, either version 3 of the License, or           # 
 11  # (at your option) any later version.                                         # 
 12  #                                                                             # 
 13  # This program is distributed in the hope that it will be useful,             # 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of              # 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               # 
 16  # GNU General Public License for more details.                                # 
 17  #                                                                             # 
 18  # You should have received a copy of the GNU General Public License           # 
 19  # along with this program.  If not, see <http://www.gnu.org/licenses/>.       # 
 20  #                                                                             # 
 21  ############################################################################### 
 22   
 23  # Module docstring. 
 24  """Module containing functions for handling NMRPipe SeriesTab files.""" 
 25   
 26   
 27  # Python module imports. 
 28  import re 
 29  from glob import glob 
 30  from os import sep 
 31  from os.path import abspath 
 32  subprocess_module = False 
 33  try: 
 34      import subprocess 
 35      subprocess_module = True 
 36  except ImportError: 
 37      pass 
 38  from warnings import warn 
 39   
 40  # relax module imports. 
 41  from lib.errors import RelaxError 
 42  from lib.io import file_root, get_file_path, open_write_file, sort_filenames, write_data 
 43  from lib.warnings import RelaxWarning 
 44   
 45   
46 -def read_seriestab(peak_list=None, file_data=None, int_col=None):
47 """Extract the intensity information from the NMRPipe SeriesTab peak intensity file. 48 49 @keyword peak_list: The peak list object to place all data into. 50 @type peak_list: lib.spectrum.objects.Peak_list instance 51 @keyword file_data: The data extracted from the file converted into a list of lists. 52 @type file_data: list of lists of str 53 @keyword int_col: The column which to multiply the peak intensity data (used by the SeriesTab intensity file format). 54 @type int_col: int 55 @raises RelaxError: When the expected peak intensity is not a float. 56 """ 57 58 # Set start variables. 59 modeline = False 60 mode = False 61 varsline = False 62 header = False 63 64 # Loop over lines, to extract variables and find header size. 65 line_nr = 0 66 for line in file_data: 67 if len(line) > 0: 68 if line[0] == 'REMARK' and line[1] == 'Mode:': 69 modeline = line[2:] 70 mode = modeline[0] 71 elif line[0] == 'VARS': 72 varsline = line[1:] 73 elif line[0] == '1': 74 header = line_nr 75 break 76 line_nr += 1 77 78 # Raise RelaxError, if the MODE is not found. 79 if not (modeline and mode): 80 raise RelaxError("MODE not detected. Expecting line 2:\nREMARK Mode: Summation") 81 82 # Raise RelaxError, if the VARS line is not found. 83 if not (varsline): 84 raise RelaxError("VARS not detected. Expecting line 8:\nVARS INDEX X_AXIS Y_AXIS X_PPM Y_PPM VOL ASS Z_A0") 85 86 # Raise RelaxError, if the header size is not found. 87 if not header: 88 raise RelaxError("'1' not detected in start of line. Cannot determine header size.") 89 90 # Find index of assignment ASS. 91 ass_i = varsline.index('ASS') 92 93 # If a list of int_col is given, make sure it only is one value 94 if type(int_col) == list: 95 # Make a set of all int columns 96 set_int_col = list(set(int_col)) 97 98 # If there is only integer column 99 if len(set_int_col) == 1: 100 int_col = set_int_col[0] 101 else: 102 warn(RelaxWarning("Multiple int_col is set to '%s'. I only accept a list of same values."%(int_col ))) 103 104 # Find index of assignment HEIGHT or VOL. 105 if int_col == None or type(int_col) == list: 106 if "VOL" in varsline: 107 int_type = "VOL" 108 elif "HEIGHT" in varsline: 109 int_type = "HEIGHT" 110 else: 111 raise RelaxError("The int_col is set to '%s'. Cannot determine which column to multiply with."%(int_col)) 112 warn(RelaxWarning("The int_col is set to '%s'. Looking for the '%s' index."%(int_col, int_type) )) 113 int_col = varsline.index('%s'%int_type) + 1 114 warn(RelaxWarning("The int_col is set to '%i' from the '%s' index."%(int_col, int_type) )) 115 116 # Chemical shifts preparation. 117 w1_col = None 118 w2_col = None 119 120 # Find index of chemical shift Y_PPM which in sparky is w1. 121 w1_col = varsline.index('Y_PPM') 122 123 # Find index of chemical shift X_PPM which in sparky is w2. 124 w2_col = varsline.index('X_PPM') 125 126 # Make a regular search for Z_A entries. 127 Z_A = re.compile("Z_A*") 128 spectra = list(filter(Z_A.search, varsline)) 129 130 # Find index of Z_A entries. 131 spectra_i = [] 132 for y in spectra: 133 spectra_i.append(varsline.index(y)) 134 135 # Remove the header. 136 file_data = file_data[header:] 137 138 # Loop over the file data. 139 for line in file_data: 140 # Skip non-assigned peaks. 141 if line[ass_i] == '?-?': 142 continue 143 144 # Standard assign if None 145 if line[ass_i] == 'None': 146 new_line_ass_i = "A%sN-HN"%line[0] 147 warn(RelaxWarning("Improperly formatted NMRPipe SeriesTab file. The spin assignment column 'ASS' is set to '%s'. Setting to %s." % (line[ass_i], new_line_ass_i))) 148 line[ass_i] = new_line_ass_i 149 150 # First split by the 2D separator. 151 assign1, assign2 = re.split('-', line[ass_i]) 152 153 # The assignment of the first dimension. 154 row1 = re.split('([a-zA-Z]+)', assign1) 155 name1 = row1[-2] + row1[-1] 156 157 # The assignment of the second dimension. 158 row2 = re.split('([a-zA-Z]+)', assign2) 159 name2 = row2[-2] + row2[-1] 160 161 # Get the residue number for dimension 1. 162 got_res_num1 = True 163 try: 164 res_num1 = int(row1[-3]) 165 except: 166 got_res_num1 = False 167 raise RelaxError("Improperly formatted NMRPipe SeriesTab file, cannot process the residue number for dimension 1 in assignment: %s." % line[0]) 168 169 # Get the residue number for dimension 2. 170 try: 171 res_num2 = int(row2[-3]) 172 except: 173 # We cannot always expect dimension 2 to have residue number. 174 if got_res_num1: 175 res_num2 = res_num1 176 else: 177 res_num2 = None 178 warn(RelaxWarning("Improperly formatted NMRPipe SeriesTab file, cannot process the residue number for dimension 2 in assignment: %s. Setting residue number to %s." % (line[0], res_num2))) 179 180 # The residue name for dimension 1. 181 got_res_name1 = True 182 try: 183 res_name1 = row1[-4] 184 except: 185 got_res_name1 = False 186 res_name1 = None 187 warn(RelaxWarning("Improperly formatted NMRPipe SeriesTab file, cannot process the residue name for dimension 1 in assignment: %s. Setting residue name to %s." % (line[0], res_name1))) 188 189 # The residue name for dimension 2. 190 try: 191 res_name2 = row2[-4] 192 except: 193 # We cannot always expect dimension 2 to have residue name. 194 if got_res_name1: 195 res_name2 = res_name1 196 else: 197 res_name2 = None 198 warn(RelaxWarning("Improperly formatted NMRPipe SeriesTab file, cannot process the residue name for dimension 2 in assignment: %s. Setting residue name to %s." % (line[0], res_name2))) 199 200 # Get the intensities. 201 try: 202 # Loop over the spectra. 203 intensities = [] 204 for i in range(len(spectra)): 205 # The intensity is given by column multiplication. 206 intensities.append( float(line[spectra_i[i]]) * float(line[int_col-1]) ) 207 208 # Bad data. 209 except ValueError: 210 raise RelaxError("The peak intensity value %s from the line %s is invalid." % (intensity, line)) 211 212 # Chemical shifts. 213 w1 = None 214 w2 = None 215 if w1_col != None: 216 try: 217 w1 = float(line[w1_col]) 218 except ValueError: 219 raise RelaxError("The chemical shift from the line %s is invalid." % line) 220 if w2_col != None: 221 try: 222 w2 = float(line[w2_col]) 223 except ValueError: 224 raise RelaxError("The chemical shift from the line %s is invalid." % line) 225 226 # Add the assignment to the peak list object. 227 peak_list.add(res_nums=[res_num1, res_num2], res_names=[res_name1, res_name2], spin_names=[name1, name2], shifts=[w1, w2], intensity=intensities, intensity_name=spectra)
228 229
230 -def show_apod_extract(file_name=None, dir=None, path_to_command='showApod'):
231 """Extract showApod information for spectrum fourier transformed with NMRPipe. 232 233 @keyword file: The filename of the NMRPipe fourier transformed file. 234 @type file: str 235 @keyword dir: The directory where the file is located. 236 @type dir: str 237 @keyword path_to_command: If showApod not in PATH, then specify absolute path as: /path/to/showApod 238 @type path_to_command: str 239 @return: The output from showApod as list of lines. 240 @rtype: list of lines 241 """ 242 243 # Get the file path. 244 file_path = get_file_path(file_name=file_name, dir=dir) 245 246 if not subprocess_module: 247 raise RelaxError("Python module 'subprocess' not found, cannot call showApod.") 248 249 # Call function. 250 Temp = subprocess.Popen([path_to_command, file_path], stdout=subprocess.PIPE) 251 252 # Communicate with program, and get outout and exitcode. 253 (output, errput) = Temp.communicate() 254 255 # Wait for finish and get return code. 256 return_value = Temp.wait() 257 258 # Python 3 support - convert byte arrays to text. 259 if hasattr(output, 'decode'): 260 output = output.decode() 261 262 return output.splitlines()
263 264
265 -def show_apod_rmsd(file_name=None, dir=None, path_to_command='showApod'):
266 """Extract showApod 'Noise Std Dev' for spectrum fourier transformed with NMRPipe. 267 268 @keyword file: The filename of the NMRPipe fourier transformed file. 269 @type file: str 270 @keyword dir: The directory where the file is located. 271 @type dir: str 272 @keyword path_to_command: If showApod not in PATH, then specify absolute path as: /path/to/showApod 273 @type path_to_command: str 274 @return: The Noise Std Dev from line: 'REMARK Automated Noise Std Dev in Processed Data' 275 @rtype: float 276 """ 277 278 # Call extract function. 279 show_apod_lines = show_apod_extract(file_name=file_name, dir=dir, path_to_command=path_to_command) 280 281 # Loop over the lines 282 found = False 283 for line in show_apod_lines: 284 # Look for line with this remark. 285 if line[:49] == 'REMARK Automated Noise Std Dev in Processed Data:': 286 # The rest of the line is the rmsd. 287 rmsd = float(line[49:].split()[0]) 288 return rmsd 289 290 if not found: 291 print(show_apod_lines) 292 raise RelaxError("Could not find the line: 'REMARK Automated Noise Std Dev in Processed Data:', from the output of showApod.")
293 294
295 -def show_apod_rmsd_to_file(file_name=None, dir=None, path_to_command='showApod', outdir=None, force=False):
296 """Extract showApod 'Noise Std Dev' from showApod, and write to file with same filename and ending '.rmsd' 297 298 @keyword file: The filename of the NMRPipe fourier transformed file. 299 @type file: str 300 @keyword dir: The directory where the file is located. 301 @type dir: str 302 @keyword path_to_command: If showApod not in PATH, then specify absolute path as: /path/to/showApod 303 @type path_to_command: str 304 @keyword outdir: The directory where to write the file. If 'None', then write in same directory. 305 @type outdir: str 306 @param force: Boolean argument which if True causes the file to be overwritten if it already exists. 307 @type force: bool 308 @return: Write the 'Noise Std Dev' from showApod to a file with same file filename, with ending '.rmsd'. This will be a file path. 309 @rtype: str 310 """ 311 312 # Call extract function. 313 apod_rmsd = show_apod_rmsd(file_name=file_name, dir=dir, path_to_command=path_to_command) 314 315 # Get the filename striped of extension details. 316 file_name_root = file_root(file_name) 317 318 # Define extension. 319 extension = ".rmsd" 320 321 # Define file name for writing. 322 file_name_out = file_name_root + extension 323 324 # Define folder to write to. 325 if outdir == None: 326 write_outdir = dir 327 else: 328 write_outdir = outdir 329 330 # Open file for writing, 331 wfile, wfile_path = open_write_file(file_name=file_name_out, dir=write_outdir, force=force, verbosity=1, return_path=True) 332 333 # Write to file. 334 out_write_data = [['%s'%apod_rmsd]] 335 336 # Write data 337 write_data(out=wfile, headings=None, data=out_write_data, sep=None) 338 339 # Close file. 340 wfile.close() 341 342 # Return path to file. 343 return wfile_path
344 345
346 -def show_apod_rmsd_dir_to_files(file_ext='.ft2', dir=None, path_to_command='showApod', outdir=None, force=False):
347 """Searches 'dir' for files with extension 'file_ext'. Extract showApod 'Noise Std Dev' from showApod, and write to file with same filename and ending '.rmsd'. 348 349 @keyword file_ext: The extension for files which is NMRPipe fourier transformed file. 350 @type file_ext: str 351 @keyword dir: The directory where the files is located. 352 @type dir: str 353 @keyword path_to_command: If showApod not in PATH, then specify absolute path as: /path/to/showApod 354 @type path_to_command: str 355 @keyword outdir: The directory where to write the files. If 'None', then write in same directory. 356 @type outdir: str 357 @param force: Boolean argument which if True causes the file to be overwritten if it already exists. 358 @type force: bool 359 @return: Write the 'Noise Std Dev' from showApod to a file with same file filename, with ending '.rmsd'. This will be a list of file paths. 360 @rtype: list of str 361 """ 362 363 # First get correct dir, no matter if dir is specified with or without system folder separator. 364 dir_files = abspath(dir) 365 366 # Define glop pattern. 367 glob_pat = '*%s' % file_ext 368 369 # Get a list of files which math the file extension. 370 file_list = glob(dir_files + sep + glob_pat) 371 372 # Now sort into Alphanumeric order. 373 file_list_sorted = sort_filenames(filenames=file_list, rev=False) 374 375 # Loop over the files. 376 rmsd_files = [] 377 for ft_file in file_list_sorted: 378 # Write rmsd to file, and get file path to file. 379 rmsd_file = show_apod_rmsd_to_file(file_name=ft_file, path_to_command=path_to_command, outdir=outdir, force=force) 380 381 # Collect file path. 382 rmsd_files.append(rmsd_file) 383 384 return rmsd_files
385