generic_fns.sequence

1 ############################################################################### 2 # # 3 # Copyright (C) 2003-2012 Edward d'Auvergne # 4 # # 5 # This file is part of the program relax (http://www.nmr-relax.com). # 6 # # 7 # This program is free software: you can redistribute it and/or modify # 8 # it under the terms of the GNU General Public License as published by # 9 # the Free Software Foundation, either version 3 of the License, or # 10 # (at your option) any later version. # 11 # # 12 # This program is distributed in the hope that it will be useful, # 13 # but WITHOUT ANY WARRANTY; without even the implied warranty of # 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15 # GNU General Public License for more details. # 16 # # 17 # You should have received a copy of the GNU General Public License # 18 # along with this program. If not, see <http://www.gnu.org/licenses/>. # 19 # # 20 ############################################################################### 21 22 # Module docstring. 23 """Module for handling the molecule, residue, and spin sequence.""" 24 25 # relax module imports. 26 from arg_check import is_int 27 from generic_fns.interatomic import return_interatom_list 28 from generic_fns.mol_res_spin import count_molecules, count_residues, count_spins, create_molecule, create_residue, create_spin, exists_mol_res_spin_data, generate_spin_id, return_molecule, return_residue, return_spin, set_spin_element, set_spin_isotope, spin_id_to_data_list, spin_loop 29 from generic_fns import pipes 30 from relax_errors import RelaxError, RelaxDiffMolNumError, RelaxDiffResNumError, RelaxDiffSeqError, RelaxDiffSpinNumError, RelaxFileEmptyError, RelaxInvalidSeqError, RelaxNoSequenceError, RelaxSequenceError 31 from relax_io import open_write_file, read_spin_data, write_spin_data 32 import sys 33 34 35

36 -def attach_protons():

37 """Attach a single proton to all heteronuclei.""" 38 39 # Loop over all spins. 40 mol_names = [] 41 res_nums = [] 42 res_names = [] 43 for spin, mol_name, res_num, res_name, spin_id in spin_loop(full_info=True, return_id=True): 44 # The spin is already a proton. 45 if hasattr(spin, 'element') and spin.element == 'H': 46 continue 47 48 # Get the interatomic data container. 49 interatoms = return_interatom_list(spin_id) 50 proton_found = False 51 if len(interatoms): 52 for i in range(len(interatoms)): 53 # Get the attached spin. 54 spin_attached = return_spin(interatoms[i].spin_id1) 55 if id(spin_attached) == id(spin): 56 spin_attached = return_spin(interatoms[i].spin_id2) 57 58 # Is it a proton? 59 if hasattr(spin_attached, 'element') and spin_attached.element == 'H' or spin.name == 'H': 60 proton_found = True 61 break 62 63 # Attached proton found. 64 if proton_found: 65 continue 66 67 # Store the sequence info. 68 mol_names.append(mol_name) 69 res_nums.append(res_num) 70 res_names.append(res_name) 71 72 # Create all protons (this must be done out of the spin loop, as it affects the looping!). 73 for i in range(len(mol_names)): 74 # Create the spin container. 75 create_spin(spin_name='H', res_name=res_names[i], res_num=res_nums[i], mol_name=mol_names[i]) 76 77 # Set the element and spin type. 78 set_spin_element(spin_id='@H', element='H') 79 set_spin_isotope(spin_id='@H', isotope='1H')

80 81

82 -def copy(pipe_from=None, pipe_to=None, preserve_select=False, verbose=True):

83 """Copy the molecule, residue, and spin sequence data from one data pipe to another. 84 85 @keyword pipe_from: The data pipe to copy the sequence data from. This defaults to the 86 current data pipe. 87 @type pipe_from: str 88 @keyword pipe_to: The data pipe to copy the sequence data to. This defaults to the 89 current data pipe. 90 @type pipe_to: str 91 @keyword preserve_select: A flag which if True will cause spin selections to be preserved. 92 @type preserve_select: bool 93 @keyword verbose: A flag which if True will cause info about each spin to be printed 94 out as the sequence is generated. 95 @type verbose: bool 96 """ 97 98 # Defaults. 99 if pipe_from == None and pipe_to == None: 100 raise RelaxError("The pipe_from and pipe_to arguments cannot both be set to None.") 101 elif pipe_from == None: 102 pipe_from = pipes.cdp_name() 103 elif pipe_to == None: 104 pipe_to = pipes.cdp_name() 105 106 # Test if the pipe_from and pipe_to data pipes exist. 107 pipes.test(pipe_from) 108 pipes.test(pipe_to) 109 110 # Test if pipe_from contains sequence data. 111 if not exists_mol_res_spin_data(pipe_from): 112 raise RelaxNoSequenceError 113 114 # Test if pipe_to contains sequence data. 115 if exists_mol_res_spin_data(pipe_to): 116 raise RelaxSequenceError 117 118 # Loop over the spins of the pipe_from data pipe. 119 for spin, mol_name, res_num, res_name in spin_loop(pipe=pipe_from, full_info=True): 120 # Preserve selection. 121 if preserve_select: 122 select = spin.select 123 else: 124 select = True 125 126 # Generate the new sequence. 127 generate(mol_name, res_num, res_name, spin.num, spin.name, pipe_to, select=select, verbose=verbose)

128 129

130 -def compare_sequence(pipe1=None, pipe2=None, fail=True):

131 """Compare the sequence in two data pipes. 132 133 @keyword pipe1: The name of the first data pipe. 134 @type pipe1: str 135 @keyword pipe2: The name of the second data pipe. 136 @type pipe2: str 137 @keyword fail: A flag which if True causes a RelaxError to be raised. 138 @type fail: bool 139 @return: 1 if the sequence is the same, 0 if different. 140 @rtype: int 141 @raises RelaxError: If the sequence is different and the fail flag is True. 142 """ 143 144 # Failure status. 145 status = 1 146 147 # Molecule number. 148 if count_molecules(pipe=pipe1) != count_molecules(pipe=pipe2): 149 status = 0 150 if fail: 151 raise RelaxDiffMolNumError(pipe1, pipe2) 152 153 # Residue number. 154 if count_residues(pipe=pipe1) != count_residues(pipe=pipe2): 155 status = 0 156 if fail: 157 raise RelaxDiffResNumError(pipe1, pipe2) 158 159 # Spin number. 160 if count_spins(pipe=pipe1) != count_spins(pipe=pipe2): 161 status = 0 162 if fail: 163 raise RelaxDiffSpinNumError(pipe1, pipe2) 164 165 # Create a string representation of the 2 sequences. 166 seq1 = '' 167 seq2 = '' 168 for spin, spin_id in spin_loop(return_id=True, pipe=pipe1): 169 seq1 = seq1 + spin_id + '\n' 170 for spin, spin_id in spin_loop(return_id=True, pipe=pipe2): 171 seq2 = seq2 + spin_id + '\n' 172 173 # Sequence check. 174 if seq1 != seq2: 175 status = 0 176 if fail: 177 raise RelaxDiffSeqError(pipe1, pipe2) 178 179 # Return the status. 180 return status

181 182

183 -def display(sep=None, mol_name_flag=False, res_num_flag=False, res_name_flag=False, spin_num_flag=False, spin_name_flag=False):

184 """Display the molecule, residue, and/or spin sequence data. 185 186 This calls the write() function to do most of the work. 187 188 189 @keyword sep: The column seperator which, if None, defaults to whitespace. 190 @type sep: str or None 191 @keyword mol_name_flag: A flag which if True will cause the molecule name column to be 192 written. 193 @type mol_name_flag: bool 194 @keyword res_num_flag: A flag which if True will cause the residue number column to be 195 written. 196 @type res_num_flag: bool 197 @keyword res_name_flag: A flag which if True will cause the residue name column to be 198 written. 199 @type res_name_flag: bool 200 @keyword spin_name_flag: A flag which if True will cause the spin name column to be written. 201 @type spin_name_flag: bool 202 @keyword spin_num_flag: A flag which if True will cause the spin number column to be 203 written. 204 @type spin_num_flag: bool 205 @param mol_name_flag: The column to contain the molecule name information. 206 """ 207 208 # Test if the sequence data is loaded. 209 if not count_spins(): 210 raise RelaxNoSequenceError 211 212 # Write the data. 213 write(file=sys.stdout, sep=sep, mol_name_flag=mol_name_flag, res_num_flag=res_num_flag, res_name_flag=res_name_flag, spin_num_flag=spin_num_flag, spin_name_flag=spin_name_flag)

214 215

216 -def generate(mol_name=None, res_num=None, res_name=None, spin_num=None, spin_name=None, pipe=None, select=True, verbose=True):

217 """Generate the sequence item-by-item by adding a single molecule/residue/spin container as necessary. 218 219 @keyword mol_name: The molecule name. 220 @type mol_name: str or None 221 @keyword res_num: The residue number. 222 @type res_num: int or None 223 @keyword res_name: The residue name. 224 @type res_name: str or None 225 @keyword spin_num: The spin number. 226 @type spin_num: int or None 227 @keyword spin_name: The spin name. 228 @type spin_name: str or None 229 @keyword pipe: The data pipe in which to generate the sequence. This defaults to the current data pipe. 230 @type pipe: str 231 @keyword select: The spin selection flag. 232 @type select: bool 233 @keyword verbose: A flag which if True will cause info about each spin to be printed out as the sequence is generated. 234 @type verbose: bool 235 """ 236 237 # The current data pipe. 238 if pipe == None: 239 pipe = pipes.cdp_name() 240 241 # A new molecule. 242 if not return_molecule(generate_spin_id(mol_name=mol_name), pipe=pipe): 243 create_molecule(mol_name=mol_name, pipe=pipe) 244 245 # A new residue. 246 curr_res = return_residue(generate_spin_id(mol_name=mol_name, res_num=res_num, res_name=res_name), pipe=pipe) 247 if not curr_res or ((res_num != None and curr_res.num != res_num) or (res_name != None and curr_res.name != res_name)): 248 create_residue(mol_name=mol_name, res_num=res_num, res_name=res_name, pipe=pipe) 249 250 # A new spin. 251 curr_spin = return_spin(generate_spin_id(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=spin_num, spin_name=spin_name), pipe=pipe) 252 if not curr_spin or ((spin_num != None and curr_spin.num != spin_num) or (spin_name != None and curr_spin.name != spin_name)): 253 # Add the spin. 254 curr_spin = create_spin(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=spin_num, spin_name=spin_name, pipe=pipe) 255 256 # Set the selection flag. 257 curr_spin.select = select

258 259

260 -def read(file=None, dir=None, file_data=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, sep=None, spin_id=None):

261 """Read the molecule, residue, and/or spin sequence data from file. 262 263 @param file: The name of the file to open. 264 @type file: str 265 @param dir: The directory containing the file (defaults to the current directory if 266 None). 267 @type dir: str or None 268 @keyword file_data: An alternative to opening a file, if the data already exists in the 269 correct format. The format is a list of lists where the first index 270 corresponds to the row and the second the column. 271 @type file_data: list of lists 272 @keyword spin_id_col: The column containing the spin ID strings. If supplied, the 273 mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col 274 arguments must be none. 275 @type spin_id_col: int or None 276 @keyword mol_name_col: The column containing the molecule name information. If supplied, 277 spin_id_col must be None. 278 @type mol_name_col: int or None 279 @keyword res_name_col: The column containing the residue name information. If supplied, 280 spin_id_col must be None. 281 @type res_name_col: int or None 282 @keyword res_num_col: The column containing the residue number information. If supplied, 283 spin_id_col must be None. 284 @type res_num_col: int or None 285 @keyword spin_name_col: The column containing the spin name information. If supplied, 286 spin_id_col must be None. 287 @type spin_name_col: int or None 288 @keyword spin_num_col: The column containing the spin number information. If supplied, 289 spin_id_col must be None. 290 @type spin_num_col: int or None 291 @keyword sep: The column separator which, if None, defaults to whitespace. 292 @type sep: str or None 293 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all 294 spins. 295 @type spin_id: None or str 296 """ 297 298 # Test if the current data pipe exists. 299 pipes.test() 300 301 # Test if sequence data already exists. 302 if exists_mol_res_spin_data(): 303 raise RelaxSequenceError 304 305 # Init the data. 306 mol_names = [] 307 res_nums = [] 308 res_names = [] 309 spin_nums = [] 310 spin_names = [] 311 312 # Generate the sequence. 313 for mol_name, res_num, res_name, spin_num, spin_name in read_spin_data(file=file, dir=dir, file_data=file_data, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, sep=sep, spin_id=spin_id): 314 # Add the spin. 315 generate(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=spin_num, spin_name=spin_name) 316 317 # Append the new spin. 318 mol_names.append(mol_name) 319 res_nums.append(res_num) 320 res_names.append(res_name) 321 spin_nums.append(spin_num) 322 spin_names.append(spin_name) 323 324 # No data, so fail. 325 if not len(spin_names): 326 raise RelaxError("No sequence data could be loaded.") 327 328 # Write the data. 329 write_spin_data(sys.stdout, mol_names=mol_names, res_nums=res_nums, res_names=res_names, spin_nums=spin_nums, spin_names=spin_names)

330 331

332 -def validate_sequence(data, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, data_col=None, error_col=None):

333 """Test if the sequence data is valid. 334 335 The only function this performs is to raise a RelaxError if the data is invalid. 336 337 338 @param data: The sequence data. 339 @type data: list of lists. 340 @keyword spin_id_col: The column containing the spin ID strings. 341 @type spin_id_col: int or None 342 @param mol_name_col: The column containing the molecule name information. 343 @type mol_name_col: int or None 344 @param res_name_col: The column containing the residue name information. 345 @type res_name_col: int or None 346 @param res_num_col: The column containing the residue number information. 347 @type res_num_col: int or None 348 @param spin_name_col: The column containing the spin name information. 349 @type spin_name_col: int or None 350 @param spin_num_col: The column containing the spin number information. 351 @type spin_num_col: int or None 352 """ 353 354 # Spin ID. 355 if spin_id_col: 356 if len(data) < spin_id_col: 357 raise RelaxInvalidSeqError(data, "the Spin ID data is missing") 358 359 # Molecule name data. 360 if mol_name_col: 361 if len(data) < mol_name_col: 362 raise RelaxInvalidSeqError(data, "the molecule name data is missing") 363 364 # Residue number data. 365 if res_num_col: 366 # No data in column. 367 if len(data) < res_num_col: 368 raise RelaxInvalidSeqError(data, "the residue number data is missing") 369 370 # Bad data in column. 371 try: 372 res_num = eval(data[res_num_col-1]) 373 if not (res_num == None or is_int(res_num, raise_error=False)): 374 raise ValueError 375 except: 376 raise RelaxInvalidSeqError(data, "the residue number data '%s' is invalid" % data[res_num_col-1]) 377 378 # Residue name data. 379 if res_name_col: 380 if len(data) < res_name_col: 381 raise RelaxInvalidSeqError(data, "the residue name data is missing") 382 383 # Spin number data. 384 if spin_num_col: 385 # No data in column. 386 if len(data) < spin_num_col: 387 raise RelaxInvalidSeqError(data, "the spin number data is missing") 388 389 # Bad data in column. 390 try: 391 res_num = eval(data[res_num_col-1]) 392 if not (res_num == None or is_int(res_num, raise_error=False)): 393 raise ValueError 394 except: 395 raise RelaxInvalidSeqError(data, "the spin number data '%s' is invalid" % data[res_num_col-1]) 396 397 # Spin name data. 398 if spin_name_col: 399 if len(data) < spin_name_col: 400 raise RelaxInvalidSeqError(data, "the spin name data is missing") 401 402 # Data. 403 if data_col: 404 if len(data) < data_col: 405 raise RelaxInvalidSeqError(data, "the data is missing") 406 407 # Errors 408 if error_col: 409 if len(data) < error_col: 410 raise RelaxInvalidSeqError(data, "the error data is missing")

411 412

413 -def write(file, dir=None, sep=None, mol_name_flag=True, res_num_flag=True, res_name_flag=True, spin_num_flag=True, spin_name_flag=True, force=False):

414 """Write the molecule, residue, and/or sequence data. 415 416 This calls the relax_io.write_spin_data() function to do most of the work. 417 418 419 @param file: The name of the file to write the data to. 420 @type file: str 421 @keyword dir: The directory to contain the file (defaults to the current directory if None). 422 @type dir: str or None 423 @keyword sep: The column seperator which, if None, defaults to whitespace. 424 @type sep: str or None 425 @keyword mol_name_flag: A flag which if True will cause the molecule name column to be written. 426 @type mol_name_flag: bool 427 @keyword res_num_flag: A flag which if True will cause the residue number column to be written. 428 @type res_num_flag: bool 429 @keyword res_name_flag: A flag which if True will cause the residue name column to be written. 430 @type res_name_flag: bool 431 @keyword spin_name_flag: A flag which if True will cause the spin name column to be written. 432 @type spin_name_flag: bool 433 @keyword spin_num_flag: A flag which if True will cause the spin number column to be written. 434 @keyword force: A flag which if True will cause an existing file to be overwritten. 435 @type force: bin 436 """ 437 438 # Test if the sequence data is loaded. 439 if not count_spins(): 440 raise RelaxNoSequenceError 441 442 # Init the data. 443 mol_names = [] 444 res_nums = [] 445 res_names = [] 446 spin_nums = [] 447 spin_names = [] 448 449 # Spin loop. 450 for spin, mol_name, res_num, res_name in spin_loop(full_info=True): 451 mol_names.append(mol_name) 452 res_nums.append(res_num) 453 res_names.append(res_name) 454 spin_nums.append(spin.num) 455 spin_names.append(spin.name) 456 457 # Remove unwanted data. 458 if not mol_name_flag: 459 mol_names = None 460 if not res_num_flag: 461 res_nums = None 462 if not res_name_flag: 463 res_names = None 464 if not spin_num_flag: 465 spin_nums = None 466 if not spin_name_flag: 467 spin_names = None 468 469 # Write the data. 470 write_spin_data(file=file, dir=dir, sep=sep, mol_names=mol_names, res_nums=res_nums, res_names=res_names, spin_nums=spin_nums, spin_names=spin_names, force=force)

471

Source Code for Module generic_fns.sequence