generic_fns.sequence

1 ############################################################################### 2 # # 3 # Copyright (C) 2003-2013 Edward d'Auvergne # 4 # # 5 # This file is part of the program relax (http://www.nmr-relax.com). # 6 # # 7 # This program is free software: you can redistribute it and/or modify # 8 # it under the terms of the GNU General Public License as published by # 9 # the Free Software Foundation, either version 3 of the License, or # 10 # (at your option) any later version. # 11 # # 12 # This program is distributed in the hope that it will be useful, # 13 # but WITHOUT ANY WARRANTY; without even the implied warranty of # 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15 # GNU General Public License for more details. # 16 # # 17 # You should have received a copy of the GNU General Public License # 18 # along with this program. If not, see <http://www.gnu.org/licenses/>. # 19 # # 20 ############################################################################### 21 22 # Module docstring. 23 """Module for handling the molecule, residue, and spin sequence.""" 24 25 # Python module imports. 26 from copy import deepcopy 27 from re import search 28 import sys 29 30 # relax module imports. 31 from arg_check import is_int 32 from generic_fns.interatomic import return_interatom_list 33 from generic_fns.mol_res_spin import count_molecules, count_residues, count_spins, create_molecule, create_residue, create_spin, exists_mol_res_spin_data, generate_spin_id, return_molecule, return_residue, return_spin, set_spin_element, set_spin_isotope, spin_id_to_data_list, spin_loop 34 from generic_fns import pipes 35 from relax_errors import RelaxError, RelaxDiffMolNumError, RelaxDiffResNumError, RelaxDiffSeqError, RelaxDiffSpinNumError, RelaxFileEmptyError, RelaxInvalidSeqError, RelaxNoSequenceError, RelaxSequenceError 36 from relax_io import open_write_file, read_spin_data, write_spin_data 37 38 39

40 -def attach_protons():

41 """Attach a single proton to all heteronuclei.""" 42 43 # Loop over all spins. 44 mol_names = [] 45 res_nums = [] 46 res_names = [] 47 for spin, mol_name, res_num, res_name, spin_id in spin_loop(full_info=True, return_id=True): 48 # The spin is already a proton. 49 if hasattr(spin, 'element') and spin.element == 'H': 50 continue 51 52 # Get the interatomic data container. 53 interatoms = return_interatom_list(spin_id) 54 proton_found = False 55 if len(interatoms): 56 for i in range(len(interatoms)): 57 # Get the attached spin. 58 spin_attached = return_spin(interatoms[i].spin_id1) 59 if id(spin_attached) == id(spin): 60 spin_attached = return_spin(interatoms[i].spin_id2) 61 62 # Is it a proton? 63 if hasattr(spin_attached, 'element') and spin_attached.element == 'H' or spin.name == 'H': 64 proton_found = True 65 break 66 67 # Attached proton found. 68 if proton_found: 69 continue 70 71 # Store the sequence info. 72 mol_names.append(mol_name) 73 res_nums.append(res_num) 74 res_names.append(res_name) 75 76 # Create all protons (this must be done out of the spin loop, as it affects the looping!). 77 for i in range(len(mol_names)): 78 # Create the spin container. 79 create_spin(spin_name='H', res_name=res_names[i], res_num=res_nums[i], mol_name=mol_names[i]) 80 81 # Set the element and spin type. 82 set_spin_element(spin_id='@H', element='H') 83 set_spin_isotope(spin_id='@H', isotope='1H')

84 85

86 -def copy(pipe_from=None, pipe_to=None, preserve_select=False, empty=True, verbose=True):

87 """Copy the molecule, residue, and spin sequence data from one data pipe to another. 88 89 @keyword pipe_from: The data pipe to copy the sequence data from. This defaults to the current data pipe. 90 @type pipe_from: str 91 @keyword pipe_to: The data pipe to copy the sequence data to. This defaults to the current data pipe. 92 @type pipe_to: str 93 @keyword preserve_select: A flag which if True will cause spin selections to be preserved. 94 @type preserve_select: bool 95 @keyword empty: A flag which if True will create a molecule, residue, and spin sequence in the target pipe lacking all of the spin data of the source pipe. If False, then the spin data will also be copied. 96 @keyword verbose: A flag which if True will cause info about each spin to be printed out as the sequence is generated. 97 @type verbose: bool 98 """ 99 100 # Defaults. 101 if pipe_from == None and pipe_to == None: 102 raise RelaxError("The pipe_from and pipe_to arguments cannot both be set to None.") 103 elif pipe_from == None: 104 pipe_from = pipes.cdp_name() 105 elif pipe_to == None: 106 pipe_to = pipes.cdp_name() 107 108 # Test if the pipe_from and pipe_to data pipes exist. 109 pipes.test(pipe_from) 110 pipes.test(pipe_to) 111 112 # Test if pipe_from contains sequence data. 113 if not exists_mol_res_spin_data(pipe_from): 114 raise RelaxNoSequenceError 115 116 # Test if pipe_to contains sequence data. 117 if exists_mol_res_spin_data(pipe_to): 118 raise RelaxSequenceError 119 120 # Loop over the spins of the pipe_from data pipe. 121 for spin, mol_name, res_num, res_name in spin_loop(pipe=pipe_from, full_info=True): 122 # Generate the new sequence. 123 new_spin = create_spin(spin_num=spin.num, spin_name=spin.name, res_num=res_num, res_name=res_name, mol_name=mol_name, pipe=pipe_to) 124 125 # Preserve selection. 126 if preserve_select: 127 new_spin.select = spin.select 128 else: 129 select = True 130 131 # Copy all the spin data. 132 if not empty: 133 # Duplicate all the objects of the container. 134 for name in dir(spin): 135 # Skip special objects. 136 if search('^_', name): 137 continue 138 139 # Skip the spin ID. 140 #if name in ['spin_id']: 141 # continue 142 143 # Skip class methods. 144 if name in list(spin.__class__.__dict__.keys()): 145 continue 146 147 # Duplicate all other objects. 148 obj = deepcopy(getattr(spin, name)) 149 setattr(new_spin, name, obj) 150 151 # Print out. 152 if verbose: 153 display(mol_name_flag=True, res_num_flag=True, res_name_flag=True, spin_num_flag=True, spin_name_flag=True)

154 155

156 -def compare_sequence(pipe1=None, pipe2=None, fail=True):

157 """Compare the sequence in two data pipes. 158 159 @keyword pipe1: The name of the first data pipe. 160 @type pipe1: str 161 @keyword pipe2: The name of the second data pipe. 162 @type pipe2: str 163 @keyword fail: A flag which if True causes a RelaxError to be raised. 164 @type fail: bool 165 @return: 1 if the sequence is the same, 0 if different. 166 @rtype: int 167 @raises RelaxError: If the sequence is different and the fail flag is True. 168 """ 169 170 # Failure status. 171 status = 1 172 173 # Molecule number. 174 if count_molecules(pipe=pipe1) != count_molecules(pipe=pipe2): 175 status = 0 176 if fail: 177 raise RelaxDiffMolNumError(pipe1, pipe2) 178 179 # Residue number. 180 if count_residues(pipe=pipe1) != count_residues(pipe=pipe2): 181 status = 0 182 if fail: 183 raise RelaxDiffResNumError(pipe1, pipe2) 184 185 # Spin number. 186 if count_spins(pipe=pipe1) != count_spins(pipe=pipe2): 187 status = 0 188 if fail: 189 raise RelaxDiffSpinNumError(pipe1, pipe2) 190 191 # Create a string representation of the 2 sequences. 192 seq1 = '' 193 seq2 = '' 194 for spin, spin_id in spin_loop(return_id=True, pipe=pipe1): 195 seq1 = seq1 + spin_id + '\n' 196 for spin, spin_id in spin_loop(return_id=True, pipe=pipe2): 197 seq2 = seq2 + spin_id + '\n' 198 199 # Sequence check. 200 if seq1 != seq2: 201 status = 0 202 if fail: 203 raise RelaxDiffSeqError(pipe1, pipe2) 204 205 # Return the status. 206 return status

207 208

209 -def display(sep=None, mol_name_flag=False, res_num_flag=False, res_name_flag=False, spin_num_flag=False, spin_name_flag=False):

210 """Display the molecule, residue, and/or spin sequence data. 211 212 This calls the write() function to do most of the work. 213 214 215 @keyword sep: The column seperator which, if None, defaults to whitespace. 216 @type sep: str or None 217 @keyword mol_name_flag: A flag which if True will cause the molecule name column to be 218 written. 219 @type mol_name_flag: bool 220 @keyword res_num_flag: A flag which if True will cause the residue number column to be 221 written. 222 @type res_num_flag: bool 223 @keyword res_name_flag: A flag which if True will cause the residue name column to be 224 written. 225 @type res_name_flag: bool 226 @keyword spin_name_flag: A flag which if True will cause the spin name column to be written. 227 @type spin_name_flag: bool 228 @keyword spin_num_flag: A flag which if True will cause the spin number column to be 229 written. 230 @type spin_num_flag: bool 231 @param mol_name_flag: The column to contain the molecule name information. 232 """ 233 234 # Test if the sequence data is loaded. 235 if not count_spins(): 236 raise RelaxNoSequenceError 237 238 # Write the data. 239 write(file=sys.stdout, sep=sep, mol_name_flag=mol_name_flag, res_num_flag=res_num_flag, res_name_flag=res_name_flag, spin_num_flag=spin_num_flag, spin_name_flag=spin_name_flag)

240 241

242 -def generate(mol_name=None, res_num=None, res_name=None, spin_num=None, spin_name=None, pipe=None, select=True, verbose=True):

243 """Generate the sequence item-by-item by adding a single molecule/residue/spin container as necessary. 244 245 @keyword mol_name: The molecule name. 246 @type mol_name: str or None 247 @keyword res_num: The residue number. 248 @type res_num: int or None 249 @keyword res_name: The residue name. 250 @type res_name: str or None 251 @keyword spin_num: The spin number. 252 @type spin_num: int or None 253 @keyword spin_name: The spin name. 254 @type spin_name: str or None 255 @keyword pipe: The data pipe in which to generate the sequence. This defaults to the current data pipe. 256 @type pipe: str 257 @keyword select: The spin selection flag. 258 @type select: bool 259 @keyword verbose: A flag which if True will cause info about each spin to be printed out as the sequence is generated. 260 @type verbose: bool 261 """ 262 263 # The current data pipe. 264 if pipe == None: 265 pipe = pipes.cdp_name() 266 267 # A new molecule. 268 if not return_molecule(generate_spin_id(mol_name=mol_name), pipe=pipe): 269 create_molecule(mol_name=mol_name, pipe=pipe) 270 271 # A new residue. 272 curr_res = return_residue(generate_spin_id(mol_name=mol_name, res_num=res_num, res_name=res_name), pipe=pipe) 273 if not curr_res or ((res_num != None and curr_res.num != res_num) or (res_name != None and curr_res.name != res_name)): 274 create_residue(mol_name=mol_name, res_num=res_num, res_name=res_name, pipe=pipe) 275 276 # A new spin. 277 curr_spin = return_spin(generate_spin_id(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=spin_num, spin_name=spin_name), pipe=pipe) 278 if not curr_spin or ((spin_num != None and curr_spin.num != spin_num) or (spin_name != None and curr_spin.name != spin_name)): 279 # Add the spin. 280 curr_spin = create_spin(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=spin_num, spin_name=spin_name, pipe=pipe) 281 282 # Set the selection flag. 283 curr_spin.select = select

284 285

286 -def read(file=None, dir=None, file_data=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, sep=None, spin_id=None):

287 """Read the molecule, residue, and/or spin sequence data from file. 288 289 @param file: The name of the file to open. 290 @type file: str 291 @param dir: The directory containing the file (defaults to the current directory if 292 None). 293 @type dir: str or None 294 @keyword file_data: An alternative to opening a file, if the data already exists in the 295 correct format. The format is a list of lists where the first index 296 corresponds to the row and the second the column. 297 @type file_data: list of lists 298 @keyword spin_id_col: The column containing the spin ID strings. If supplied, the 299 mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col 300 arguments must be none. 301 @type spin_id_col: int or None 302 @keyword mol_name_col: The column containing the molecule name information. If supplied, 303 spin_id_col must be None. 304 @type mol_name_col: int or None 305 @keyword res_name_col: The column containing the residue name information. If supplied, 306 spin_id_col must be None. 307 @type res_name_col: int or None 308 @keyword res_num_col: The column containing the residue number information. If supplied, 309 spin_id_col must be None. 310 @type res_num_col: int or None 311 @keyword spin_name_col: The column containing the spin name information. If supplied, 312 spin_id_col must be None. 313 @type spin_name_col: int or None 314 @keyword spin_num_col: The column containing the spin number information. If supplied, 315 spin_id_col must be None. 316 @type spin_num_col: int or None 317 @keyword sep: The column separator which, if None, defaults to whitespace. 318 @type sep: str or None 319 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all 320 spins. 321 @type spin_id: None or str 322 """ 323 324 # Test if the current data pipe exists. 325 pipes.test() 326 327 # Test if sequence data already exists. 328 if exists_mol_res_spin_data(): 329 raise RelaxSequenceError 330 331 # Init the data. 332 mol_names = [] 333 res_nums = [] 334 res_names = [] 335 spin_nums = [] 336 spin_names = [] 337 338 # Generate the sequence. 339 for mol_name, res_num, res_name, spin_num, spin_name in read_spin_data(file=file, dir=dir, file_data=file_data, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, sep=sep, spin_id=spin_id): 340 # Add the spin. 341 generate(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=spin_num, spin_name=spin_name) 342 343 # Append the new spin. 344 mol_names.append(mol_name) 345 res_nums.append(res_num) 346 res_names.append(res_name) 347 spin_nums.append(spin_num) 348 spin_names.append(spin_name) 349 350 # No data, so fail. 351 if not len(spin_names): 352 raise RelaxError("No sequence data could be loaded.") 353 354 # Write the data. 355 write_spin_data(sys.stdout, mol_names=mol_names, res_nums=res_nums, res_names=res_names, spin_nums=spin_nums, spin_names=spin_names)

356 357

358 -def validate_sequence(data, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, data_col=None, error_col=None):

359 """Test if the sequence data is valid. 360 361 The only function this performs is to raise a RelaxError if the data is invalid. 362 363 364 @param data: The sequence data. 365 @type data: list of lists. 366 @keyword spin_id_col: The column containing the spin ID strings. 367 @type spin_id_col: int or None 368 @param mol_name_col: The column containing the molecule name information. 369 @type mol_name_col: int or None 370 @param res_name_col: The column containing the residue name information. 371 @type res_name_col: int or None 372 @param res_num_col: The column containing the residue number information. 373 @type res_num_col: int or None 374 @param spin_name_col: The column containing the spin name information. 375 @type spin_name_col: int or None 376 @param spin_num_col: The column containing the spin number information. 377 @type spin_num_col: int or None 378 """ 379 380 # Spin ID. 381 if spin_id_col: 382 if len(data) < spin_id_col: 383 raise RelaxInvalidSeqError(data, "the Spin ID data is missing") 384 385 # Molecule name data. 386 if mol_name_col: 387 if len(data) < mol_name_col: 388 raise RelaxInvalidSeqError(data, "the molecule name data is missing") 389 390 # Residue number data. 391 if res_num_col: 392 # No data in column. 393 if len(data) < res_num_col: 394 raise RelaxInvalidSeqError(data, "the residue number data is missing") 395 396 # Bad data in column. 397 try: 398 res_num = eval(data[res_num_col-1]) 399 if not (res_num == None or is_int(res_num, raise_error=False)): 400 raise ValueError 401 except: 402 raise RelaxInvalidSeqError(data, "the residue number data '%s' is invalid" % data[res_num_col-1]) 403 404 # Residue name data. 405 if res_name_col: 406 if len(data) < res_name_col: 407 raise RelaxInvalidSeqError(data, "the residue name data is missing") 408 409 # Spin number data. 410 if spin_num_col: 411 # No data in column. 412 if len(data) < spin_num_col: 413 raise RelaxInvalidSeqError(data, "the spin number data is missing") 414 415 # Bad data in column. 416 try: 417 res_num = eval(data[res_num_col-1]) 418 if not (res_num == None or is_int(res_num, raise_error=False)): 419 raise ValueError 420 except: 421 raise RelaxInvalidSeqError(data, "the spin number data '%s' is invalid" % data[res_num_col-1]) 422 423 # Spin name data. 424 if spin_name_col: 425 if len(data) < spin_name_col: 426 raise RelaxInvalidSeqError(data, "the spin name data is missing") 427 428 # Data. 429 if data_col: 430 if len(data) < data_col: 431 raise RelaxInvalidSeqError(data, "the data is missing") 432 433 # Errors 434 if error_col: 435 if len(data) < error_col: 436 raise RelaxInvalidSeqError(data, "the error data is missing")

437 438

439 -def write(file, dir=None, sep=None, mol_name_flag=True, res_num_flag=True, res_name_flag=True, spin_num_flag=True, spin_name_flag=True, force=False):

440 """Write the molecule, residue, and/or sequence data. 441 442 This calls the relax_io.write_spin_data() function to do most of the work. 443 444 445 @param file: The name of the file to write the data to. 446 @type file: str 447 @keyword dir: The directory to contain the file (defaults to the current directory if None). 448 @type dir: str or None 449 @keyword sep: The column seperator which, if None, defaults to whitespace. 450 @type sep: str or None 451 @keyword mol_name_flag: A flag which if True will cause the molecule name column to be written. 452 @type mol_name_flag: bool 453 @keyword res_num_flag: A flag which if True will cause the residue number column to be written. 454 @type res_num_flag: bool 455 @keyword res_name_flag: A flag which if True will cause the residue name column to be written. 456 @type res_name_flag: bool 457 @keyword spin_name_flag: A flag which if True will cause the spin name column to be written. 458 @type spin_name_flag: bool 459 @keyword spin_num_flag: A flag which if True will cause the spin number column to be written. 460 @keyword force: A flag which if True will cause an existing file to be overwritten. 461 @type force: bin 462 """ 463 464 # Test if the sequence data is loaded. 465 if not count_spins(): 466 raise RelaxNoSequenceError 467 468 # Init the data. 469 mol_names = [] 470 res_nums = [] 471 res_names = [] 472 spin_nums = [] 473 spin_names = [] 474 475 # Spin loop. 476 for spin, mol_name, res_num, res_name in spin_loop(full_info=True): 477 mol_names.append(mol_name) 478 res_nums.append(res_num) 479 res_names.append(res_name) 480 spin_nums.append(spin.num) 481 spin_names.append(spin.name) 482 483 # Remove unwanted data. 484 if not mol_name_flag: 485 mol_names = None 486 if not res_num_flag: 487 res_nums = None 488 if not res_name_flag: 489 res_names = None 490 if not spin_num_flag: 491 spin_nums = None 492 if not spin_name_flag: 493 spin_names = None 494 495 # Write the data. 496 write_spin_data(file=file, dir=dir, sep=sep, mol_names=mol_names, res_nums=res_nums, res_names=res_names, spin_nums=spin_nums, spin_names=spin_names, force=force)

497

Source Code for Module generic_fns.sequence