lib.software.xplor

1 ############################################################################### 2 # # 3 # Copyright (C) 2009-2013 Edward d'Auvergne # 4 # # 5 # This file is part of the program relax (http://www.nmr-relax.com). # 6 # # 7 # This program is free software: you can redistribute it and/or modify # 8 # it under the terms of the GNU General Public License as published by # 9 # the Free Software Foundation, either version 3 of the License, or # 10 # (at your option) any later version. # 11 # # 12 # This program is distributed in the hope that it will be useful, # 13 # but WITHOUT ANY WARRANTY; without even the implied warranty of # 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15 # GNU General Public License for more details. # 16 # # 17 # You should have received a copy of the GNU General Public License # 18 # along with this program. If not, see <http://www.gnu.org/licenses/>. # 19 # # 20 ############################################################################### 21 22 # Module docstring. 23 """Module containing functions for the parsing and creation of Xplor formatted files.""" 24 25 # Python module imports. 26 from re import search 27 28 # relax module imports. 29 from lib.errors import RelaxError 30 31

32 -def __convert_to_id(string):

33 """Convert the string into a relax atom id representation. 34 35 @param string: The Xplor atom string. 36 @type string: str 37 @return: The relax atom id. 38 @rtype: str 39 """ 40 41 # Split up the string by the 'and' statements. 42 data = string.split('and') 43 44 # Loop over the data. 45 relax_id = '' 46 for i in range(len(data)): 47 # Split by whitespace. 48 info = data[i].split() 49 50 # Don't know what this is! 51 if len(info) != 2: 52 raise RelaxError("Cannot convert the Xplor atom string '%s' to relax format." % string) 53 54 # A molecule identifier. 55 if info[0] == 'segid': 56 relax_id = relax_id + '#' + info[1] 57 58 # A residue identifier. 59 elif info[0] == 'resid': 60 relax_id = relax_id + ':' + info[1] 61 62 # An atom identifier. 63 elif info[0] == 'name': 64 relax_id = relax_id + '@' + info[1] 65 66 # Return the relax id. 67 return relax_id

68 69

70 -def parse_noe_restraints(lines):

71 """Parse and return the NOE restraints from the Xplor lines. 72 73 @param lines: The Xplor formatted file, or file fragment, split into lines. 74 @type lines: list of str 75 @return: The NOE restraint list in the format of two atom identification strings (or list 76 of str for pseudoatoms) and the lower and upper restraints. 77 @rtype: list of lists of str, str, float, float 78 """ 79 80 # Strip all comments from the data. 81 lines = strip_comments(lines) 82 83 # Init. 84 data = [] 85 86 # First level pass (assign statements). 87 for id1, id2, noe, lower, upper in first_parse(lines): 88 # Second parse (pseudoatoms). 89 id1 = second_parse(id1) 90 id2 = second_parse(id2) 91 92 # Convert to relax spin IDs. 93 if isinstance(id1, list): 94 relax_id1 = [] 95 for i in range(len(id1)): 96 relax_id1.append(__convert_to_id(id1[i])) 97 else: 98 relax_id1 = __convert_to_id(id1) 99 100 if isinstance(id2, list): 101 relax_id2 = [] 102 for i in range(len(id2)): 103 relax_id2.append(__convert_to_id(id2[i])) 104 else: 105 relax_id2 = __convert_to_id(id2) 106 107 # Convert to upper and lower bounds. 108 lower_bound = noe - lower 109 upper_bound = noe + upper 110 111 # Add the data to the list. 112 data.append([relax_id1, relax_id2, lower_bound, upper_bound]) 113 114 # Return the data. 115 return data

116 117

118 -def first_parse(lines):

119 """Generator function to parse and extract the 2 atom IDs and NOE info from the lines. 120 121 The first parse loops over and returns the data from assign statements, returning pseudo atoms 122 as single strings. The second parse splits the pseudoatoms. 123 124 @param lines: The Xplor formatted file, or file fragment, split into lines. 125 @type lines: list of str 126 @return: The 2 atom IDs, and NOE info (NOE, upper, and lower bounds). 127 @rtype: str, str, float, float, float 128 """ 129 130 # Extract the data. 131 line_index = 0 132 while True: 133 # Break out! 134 if line_index >= len(lines): 135 break 136 137 # Find the assign statements. 138 if search('^assign', lines[line_index]): 139 # Init. 140 char_index = -1 141 142 # Extract the atom ID strings. 143 id = ['', ''] 144 id_index = 0 145 inside = 0 146 while True: 147 # Inc the character index. 148 char_index = char_index + 1 149 150 # Break out! 151 if line_index >= len(lines): 152 break 153 154 # Check if we need to go to the next line. 155 if char_index >= len(lines[line_index]): 156 line_index = line_index + 1 157 char_index = -1 158 continue 159 160 # A starting bracket, so increment the inside counter. 161 if lines[line_index][char_index] == '(': 162 inside = inside + 1 163 164 # Don't include the first bracket in the ID string. 165 if inside == 1: 166 continue 167 168 # Not inside, so jump to the next character. 169 if not inside: 170 continue 171 172 # An ending bracket. 173 elif lines[line_index][char_index] == ')': 174 inside = inside - 1 175 176 # A logical test (debugging). 177 if inside < 0: 178 raise RelaxError("Improperly formatted Xplor file, unmatched ')'.") 179 180 # Append the character. 181 if inside: 182 id[id_index] = id[id_index] + lines[line_index][char_index] 183 184 # Go to the second id_index, or break. 185 if inside == 0: 186 if id_index == 1: 187 break 188 else: 189 id_index = 1 190 191 # The rest of the data (NOE restraint info). 192 info = lines[line_index][char_index+1:].split() 193 194 # NOE dist, lower, upper. 195 noe = float(info[0]) 196 lower = float(info[1]) 197 upper = float(info[2]) 198 199 # Non-data line. 200 else: 201 # Line index. 202 line_index = line_index + 1 203 204 # Skip to the next line without yielding. 205 continue 206 207 # Line index. 208 line_index = line_index + 1 209 210 # Return the data. 211 yield id[0], id[1], noe, lower, upper

212 213

214 -def second_parse(id):

215 """Split up pseudoatoms. 216 217 @param id: The Xplor atom id without outer brackets, i.e. a single atom or a list of atoms in 218 the case of pseudoatoms. 219 @type id: str 220 @return: For normal atoms, the id string is returned unmodified. For pseudoatoms, a list of 221 strings, with brackets removed, is returned. 222 @rtype: str or list of str 223 """ 224 225 # Loop over the characters. 226 atoms = [''] 227 index = -1 228 inside = False 229 while True: 230 # Inc the character index. 231 index = index + 1 232 233 # Break out. 234 if index >= len(id): 235 break 236 237 # A starting bracket, so flip the inside flag. 238 if id[index] == '(': 239 # 2 brackets?!? 240 if inside: 241 raise RelaxError("The Xplor pseudoatom ID string '%s' is invalid." % id) 242 243 # The flag. 244 inside = True 245 246 # Don't include the first bracket in the ID string. 247 continue 248 249 # Not inside, so jump to the next character. 250 if not inside: 251 continue 252 253 # An ending bracket. 254 if id[index] == ')': 255 inside = False 256 257 # Append the character. 258 if inside: 259 atoms[-1] = atoms[-1] + id[index] 260 261 # Add another atom. 262 if not inside: 263 atoms.append('') 264 265 # Remove the last empty atom string. 266 if atoms[0] and atoms[-1] == '': 267 atoms = atoms[:-1] 268 269 # Return the data. 270 if not atoms[0]: 271 return id 272 else: 273 return atoms

274 275

276 -def strip_comments(lines):

277 """Remove all Xplor comments from the data. 278 279 @param lines: The Xplor formatted file, or file fragment, split into lines. 280 @type lines: list of str 281 @return: The file data with all comments removed. 282 @rtype: list of str 283 """ 284 285 # Loop over the lines. 286 new_lines = [] 287 for line in lines: 288 # Comment lines. 289 if search('^!', line): 290 continue 291 292 # Partial comment lines. 293 new_line = '' 294 for char in line: 295 # Comment - so skip the rest of the line. 296 if char == '!': 297 continue 298 299 # Build the new line. 300 new_line = new_line + char 301 302 # Add the new line. 303 new_lines.append(new_line) 304 305 # Return the stripped data. 306 return new_lines

307

Source Code for Module lib.software.xplor