Author: bugman Date: Mon Jul 22 08:37:58 2013 New Revision: 20442 URL: http://svn.gna.org/viewcvs/relax?rev=20442&view=rev Log: Completed NMRPipe SeriesTab reader. Progress sr #3043: (https://gna.org/support/index.php?3043) - support for NMRPipe seriesTab format *.ser. Completed NMRPipe SeriesTab reader for assignment according to SPARKY format. Changes implemented according to: http://article.gmane.org/gmane.science.nmr.relax.devel/4120. Modified: trunk/lib/software/nmrpipe.py Modified: trunk/lib/software/nmrpipe.py URL: http://svn.gna.org/viewcvs/relax/trunk/lib/software/nmrpipe.py?rev=20442&r1=20441&r2=20442&view=diff ============================================================================== --- trunk/lib/software/nmrpipe.py (original) +++ trunk/lib/software/nmrpipe.py Mon Jul 22 08:37:58 2013 @@ -21,3 +21,125 @@ # Module docstring. """Module containing functions for handling NMRPipe SeriesTab files.""" + + +# Python module imports. +import re + +# relax module imports. +from lib.errors import RelaxError +from lib.io import open_write_file, strip + + +def read_list_intensity_seriestab(file_data=None, int_col=None): + """Return the peak intensity information from the NMRPipe SeriesTab peak intensity file. + + The residue number, heteronucleus and proton names, and peak intensity will be returned. + + + @keyword file_data: The data extracted from the file converted into a list of lists. + @type file_data: list of lists of str + @keyword int_col: The column which to multiply the peak intensity data (used by the SeriesTab intensity file format). + @type int_col: int + @raises RelaxError: When the expected peak intensity is not a float. + @return: The extracted data as a list of lists. The first dimension corresponds to the spin. The second dimension consists of the proton name, heteronucleus name, residue number, the intensity value, and the original line of text. + @rtype: list of lists of str, str, int, float, str + """ + + # Set start variables + modeline = False + mode = False + varsline = False + header = False + + # Loop over lines, to extract variables and find header size + line_nr = 0 + for line in file_data: + if len(line) > 0: + if line[0] == 'REMARK' and line[1] == 'Mode:': + modeline = line[2:] + mode = modeline[0] + elif line[0] == 'VARS': + varsline = line[1:] + elif line[0] == '1': + header = line_nr + break + line_nr += 1 + + # Raise RelaxError, if the MODE is not found + if not (modeline and mode): + raise RelaxError("MODE not detected. Expecting line 2:\nREMARK Mode: Summation") + + # Raise RelaxError, if the VARS line is not found + if not (varsline): + raise RelaxError("VARS not detected. Expecting line 8:\nVARS INDEX X_AXIS Y_AXIS X_PPM Y_PPM VOL ASS Z_A0") + + # Raise RelaxError, if the header size is not found + if not header: + raise RelaxError("'1' not detected in start of line. Cannot determine header size.") + + # Find index of assignment ASS + ass_i = varsline.index('ASS') + + # Make a regular search for Z_A entries + Z_A = re.compile("Z_A*") + spectra = filter(Z_A.search, varsline) + + # Find index of Z_A entries + spectra_i = [[x for x in varsline].index(y) for y in spectra] + + # Remove the header. + file_data = file_data[header:] + + # Define a list, for storing all the data + data_all = [] + + # Define a current counter + i = 0 + + # Loop over the spectra + for spectrum in spectra: + # Define a list, for storing the current spectrum data + data = [] + + # Current intensity index + int_i = spectra_i[i] + + for line in file_data: + # Skip non-assigned peaks. + if line[ass_i] == '?-?': + continue + + # First split by the 2D separator. + x_assign, h_assign = re.split('-', line[ass_i]) + + # The proton info. + h_row = re.split('([A-Z]+)', h_assign) + h_name = h_row[-2] + h_row[-1] + + # The heteronucleus info. + x_row = re.split('([A-Z]+)', x_assign) + x_name = x_row[-2] + x_row[-1] + + # The residue number. + try: + res_num = int(x_row[-3]) + except: + raise RelaxError("Improperly formatted Sparky file.") + + # Intensity. + try: + intensity = float(line[int_i])*float(line[5]) + except ValueError: + raise RelaxError("The peak intensity value %s from the line %s is invalid."%(intensity,line)) + + # Append the data. + data.append([h_name, x_name, res_num, intensity]) + + # Append to all data + data_all.append([data,spectrum]) + + # Add 1 to counter + i += 1 + # Return the data. + return data_all