pipe_control.sequence

1 ############################################################################### 2 # # 3 # Copyright (C) 2003-2004,2006-2009,2011-2013 Edward d'Auvergne # 4 # # 5 # This file is part of the program relax (http://www.nmr-relax.com). # 6 # # 7 # This program is free software: you can redistribute it and/or modify # 8 # it under the terms of the GNU General Public License as published by # 9 # the Free Software Foundation, either version 3 of the License, or # 10 # (at your option) any later version. # 11 # # 12 # This program is distributed in the hope that it will be useful, # 13 # but WITHOUT ANY WARRANTY; without even the implied warranty of # 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15 # GNU General Public License for more details. # 16 # # 17 # You should have received a copy of the GNU General Public License # 18 # along with this program. If not, see <http://www.gnu.org/licenses/>. # 19 # # 20 ############################################################################### 21 22 # Module docstring. 23 """Module for handling the molecule, residue, and spin sequence.""" 24 25 # Python module imports. 26 from copy import deepcopy 27 from re import search 28 import sys 29 30 # relax module imports. 31 from lib.errors import RelaxError, RelaxDiffMolNumError, RelaxDiffResNumError, RelaxDiffSeqError, RelaxDiffSpinNumError, RelaxNoSequenceError, RelaxSequenceError 32 from lib.sequence import read_spin_data, write_spin_data 33 from pipe_control import pipes 34 from pipe_control.interatomic import return_interatom_list 35 from pipe_control.mol_res_spin import count_molecules, count_residues, count_spins, create_molecule, create_residue, create_spin, exists_mol_res_spin_data, generate_spin_id, return_molecule, return_residue, return_spin, set_spin_element, set_spin_isotope, spin_loop 36 from pipe_control.pipes import check_pipe 37 38 39

40 -def attach_protons():

41 """Attach a single proton to all heteronuclei.""" 42 43 # Loop over all spins. 44 mol_names = [] 45 res_nums = [] 46 res_names = [] 47 for spin, mol_name, res_num, res_name, spin_id in spin_loop(full_info=True, return_id=True): 48 # The spin is already a proton. 49 if hasattr(spin, 'element') and spin.element == 'H': 50 continue 51 52 # Get the interatomic data container. 53 interatoms = return_interatom_list(spin_hash=spin._hash) 54 proton_found = False 55 if len(interatoms): 56 for i in range(len(interatoms)): 57 # Get the attached spin. 58 spin_attached = return_spin(spin_hash=interatoms[i]._spin_hash1) 59 if id(spin_attached) == id(spin): 60 spin_attached = return_spin(spin_hash=interatoms[i]._spin_hash2) 61 62 # Is it a proton? 63 if hasattr(spin_attached, 'element') and spin_attached.element == 'H' or spin.name == 'H': 64 proton_found = True 65 break 66 67 # Attached proton found. 68 if proton_found: 69 continue 70 71 # Store the sequence info. 72 mol_names.append(mol_name) 73 res_nums.append(res_num) 74 res_names.append(res_name) 75 76 # Create all protons (this must be done out of the spin loop, as it affects the looping!). 77 ids = [] 78 for i in range(len(mol_names)): 79 # Create the spin container. 80 spin = create_spin(spin_name='H', res_name=res_names[i], res_num=res_nums[i], mol_name=mol_names[i])[0] 81 ids.append(generate_spin_id(mol_name=mol_names[i], res_num=res_nums[i], res_name=res_names[i], spin_name='H')) 82 print("Creating the spins %s." % ids) 83 84 # Set the element and spin type. 85 set_spin_element(spin_id='@H', element='H') 86 set_spin_isotope(spin_id='@H', isotope='1H')

87 88

89 -def copy(pipe_from=None, pipe_to=None, preserve_select=False, empty=True, verbose=True):

90 """Copy the molecule, residue, and spin sequence data from one data pipe to another. 91 92 @keyword pipe_from: The data pipe to copy the sequence data from. This defaults to the current data pipe. 93 @type pipe_from: str 94 @keyword pipe_to: The data pipe to copy the sequence data to. This defaults to the current data pipe. 95 @type pipe_to: str 96 @keyword preserve_select: A flag which if True will cause spin selections to be preserved. 97 @type preserve_select: bool 98 @keyword empty: A flag which if True will create a molecule, residue, and spin sequence in the target pipe lacking all of the spin data of the source pipe. If False, then the spin data will also be copied. 99 @keyword verbose: A flag which if True will cause info about each spin to be printed out as the sequence is generated. 100 @type verbose: bool 101 """ 102 103 # Defaults. 104 if pipe_from == None and pipe_to == None: 105 raise RelaxError("The pipe_from and pipe_to arguments cannot both be set to None.") 106 elif pipe_from == None: 107 pipe_from = pipes.cdp_name() 108 elif pipe_to == None: 109 pipe_to = pipes.cdp_name() 110 111 # Test if the pipe_from and pipe_to data pipes exist. 112 check_pipe(pipe_from) 113 check_pipe(pipe_to) 114 115 # Test if pipe_from contains sequence data. 116 if not exists_mol_res_spin_data(pipe_from): 117 raise RelaxNoSequenceError 118 119 # Test if pipe_to contains sequence data. 120 if exists_mol_res_spin_data(pipe_to): 121 raise RelaxSequenceError 122 123 # Loop over the spins of the pipe_from data pipe. 124 for spin, mol_name, res_num, res_name in spin_loop(pipe=pipe_from, full_info=True): 125 # Generate the new sequence. 126 new_spin = create_spin(spin_num=spin.num, spin_name=spin.name, res_num=res_num, res_name=res_name, mol_name=mol_name, pipe=pipe_to)[0] 127 128 # Preserve selection. 129 if preserve_select: 130 new_spin.select = spin.select 131 else: 132 select = True 133 134 # Copy all the spin data. 135 if not empty: 136 # Duplicate all the objects of the container. 137 for name in dir(spin): 138 # Skip special objects. 139 if search('^_', name): 140 continue 141 142 # Skip the spin ID. 143 #if name in ['spin_id']: 144 # continue 145 146 # Skip class methods. 147 if name in spin.__class__.__dict__: 148 continue 149 150 # Duplicate all other objects. 151 obj = deepcopy(getattr(spin, name)) 152 setattr(new_spin, name, obj) 153 154 # Print out. 155 if verbose: 156 display(mol_name_flag=True, res_num_flag=True, res_name_flag=True, spin_num_flag=True, spin_name_flag=True)

157 158

159 -def compare_sequence(pipe1=None, pipe2=None, fail=True):

160 """Compare the sequence in two data pipes. 161 162 @keyword pipe1: The name of the first data pipe. 163 @type pipe1: str 164 @keyword pipe2: The name of the second data pipe. 165 @type pipe2: str 166 @keyword fail: A flag which if True causes a RelaxError to be raised. 167 @type fail: bool 168 @return: 1 if the sequence is the same, 0 if different. 169 @rtype: int 170 @raises RelaxError: If the sequence is different and the fail flag is True. 171 """ 172 173 # Failure status. 174 status = 1 175 176 # Molecule number. 177 if count_molecules(pipe=pipe1) != count_molecules(pipe=pipe2): 178 status = 0 179 if fail: 180 raise RelaxDiffMolNumError(pipe1, pipe2) 181 182 # Residue number. 183 if count_residues(pipe=pipe1) != count_residues(pipe=pipe2): 184 status = 0 185 if fail: 186 raise RelaxDiffResNumError(pipe1, pipe2) 187 188 # Spin number. 189 if count_spins(pipe=pipe1) != count_spins(pipe=pipe2): 190 status = 0 191 if fail: 192 raise RelaxDiffSpinNumError(pipe1, pipe2) 193 194 # Create a string representation of the 2 sequences. 195 seq1 = '' 196 seq2 = '' 197 for spin, spin_id in spin_loop(return_id=True, pipe=pipe1): 198 seq1 = seq1 + spin_id + '\n' 199 for spin, spin_id in spin_loop(return_id=True, pipe=pipe2): 200 seq2 = seq2 + spin_id + '\n' 201 202 # Sequence check. 203 if seq1 != seq2: 204 status = 0 205 if fail: 206 raise RelaxDiffSeqError(pipe1, pipe2) 207 208 # Return the status. 209 return status

210 211

212 -def display(sep=None, mol_name_flag=False, res_num_flag=False, res_name_flag=False, spin_num_flag=False, spin_name_flag=False):

213 """Display the molecule, residue, and/or spin sequence data. 214 215 This calls the write() function to do most of the work. 216 217 218 @keyword sep: The column seperator which, if None, defaults to whitespace. 219 @type sep: str or None 220 @keyword mol_name_flag: A flag which if True will cause the molecule name column to be 221 written. 222 @type mol_name_flag: bool 223 @keyword res_num_flag: A flag which if True will cause the residue number column to be 224 written. 225 @type res_num_flag: bool 226 @keyword res_name_flag: A flag which if True will cause the residue name column to be 227 written. 228 @type res_name_flag: bool 229 @keyword spin_name_flag: A flag which if True will cause the spin name column to be written. 230 @type spin_name_flag: bool 231 @keyword spin_num_flag: A flag which if True will cause the spin number column to be 232 written. 233 @type spin_num_flag: bool 234 @param mol_name_flag: The column to contain the molecule name information. 235 """ 236 237 # Test if the sequence data is loaded. 238 if not count_spins(): 239 raise RelaxNoSequenceError 240 241 # Write the data. 242 write(file=sys.stdout, sep=sep, mol_name_flag=mol_name_flag, res_num_flag=res_num_flag, res_name_flag=res_name_flag, spin_num_flag=spin_num_flag, spin_name_flag=spin_name_flag)

243 244

245 -def generate(mol_name=None, res_num=None, res_name=None, spin_num=None, spin_name=None, pipe=None, select=True, verbose=True):

246 """Generate the sequence item-by-item by adding a single molecule/residue/spin container as necessary. 247 248 @keyword mol_name: The molecule name. 249 @type mol_name: str or None 250 @keyword res_num: The residue number. 251 @type res_num: int or None 252 @keyword res_name: The residue name. 253 @type res_name: str or None 254 @keyword spin_num: The spin number. 255 @type spin_num: int or None 256 @keyword spin_name: The spin name. 257 @type spin_name: str or None 258 @keyword pipe: The data pipe in which to generate the sequence. This defaults to the current data pipe. 259 @type pipe: str 260 @keyword select: The spin selection flag. 261 @type select: bool 262 @keyword verbose: A flag which if True will cause info about each spin to be printed out as the sequence is generated. 263 @type verbose: bool 264 """ 265 266 # The current data pipe. 267 if pipe == None: 268 pipe = pipes.cdp_name() 269 270 # A new molecule. 271 if not return_molecule(generate_spin_id(mol_name=mol_name), pipe=pipe): 272 create_molecule(mol_name=mol_name, pipe=pipe) 273 274 # A new residue. 275 curr_res = return_residue(generate_spin_id(mol_name=mol_name, res_num=res_num, res_name=res_name), pipe=pipe) 276 if not curr_res or ((res_num != None and curr_res.num != res_num) or (res_name != None and curr_res.name != res_name)): 277 create_residue(mol_name=mol_name, res_num=res_num, res_name=res_name, pipe=pipe) 278 279 # A new spin. 280 curr_spin = return_spin(spin_id=generate_spin_id(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=spin_num, spin_name=spin_name), pipe=pipe) 281 if not curr_spin or ((spin_num != None and curr_spin.num != spin_num) or (spin_name != None and curr_spin.name != spin_name)): 282 # Add the spin. 283 curr_spin = create_spin(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=spin_num, spin_name=spin_name, pipe=pipe)[0] 284 285 # Set the selection flag. 286 curr_spin.select = select

287 288

289 -def read(file=None, dir=None, file_data=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, sep=None, spin_id=None):

290 """Read the molecule, residue, and/or spin sequence data from file. 291 292 @param file: The name of the file to open. 293 @type file: str 294 @param dir: The directory containing the file (defaults to the current directory if 295 None). 296 @type dir: str or None 297 @keyword file_data: An alternative to opening a file, if the data already exists in the 298 correct format. The format is a list of lists where the first index 299 corresponds to the row and the second the column. 300 @type file_data: list of lists 301 @keyword spin_id_col: The column containing the spin ID strings. If supplied, the 302 mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col 303 arguments must be none. 304 @type spin_id_col: int or None 305 @keyword mol_name_col: The column containing the molecule name information. If supplied, 306 spin_id_col must be None. 307 @type mol_name_col: int or None 308 @keyword res_name_col: The column containing the residue name information. If supplied, 309 spin_id_col must be None. 310 @type res_name_col: int or None 311 @keyword res_num_col: The column containing the residue number information. If supplied, 312 spin_id_col must be None. 313 @type res_num_col: int or None 314 @keyword spin_name_col: The column containing the spin name information. If supplied, 315 spin_id_col must be None. 316 @type spin_name_col: int or None 317 @keyword spin_num_col: The column containing the spin number information. If supplied, 318 spin_id_col must be None. 319 @type spin_num_col: int or None 320 @keyword sep: The column separator which, if None, defaults to whitespace. 321 @type sep: str or None 322 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all 323 spins. 324 @type spin_id: None or str 325 """ 326 327 # Test if the current data pipe exists. 328 check_pipe() 329 330 # Test if sequence data already exists. 331 if exists_mol_res_spin_data(): 332 raise RelaxSequenceError 333 334 # Init the data. 335 mol_names = [] 336 res_nums = [] 337 res_names = [] 338 spin_nums = [] 339 spin_names = [] 340 341 # Generate the sequence. 342 for mol_name, res_num, res_name, spin_num, spin_name in read_spin_data(file=file, dir=dir, file_data=file_data, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, sep=sep, spin_id=spin_id): 343 # Add the spin. 344 generate(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=spin_num, spin_name=spin_name) 345 346 # Append the new spin. 347 mol_names.append(mol_name) 348 res_nums.append(res_num) 349 res_names.append(res_name) 350 spin_nums.append(spin_num) 351 spin_names.append(spin_name) 352 353 # No data, so fail. 354 if not len(spin_names): 355 raise RelaxError("No sequence data could be loaded.") 356 357 # Write the data. 358 write_spin_data(sys.stdout, mol_names=mol_names, res_nums=res_nums, res_names=res_names, spin_nums=spin_nums, spin_names=spin_names)

359 360

361 -def return_attached_protons(spin_hash=None):

362 """Return a list of all proton spin containers attached to the given spin. 363 364 @keyword spin_hash: The unique spin hash. 365 @type spin_hash: str 366 @return: The list of proton spin containers attached to the given spin. 367 @rtype: list of SpinContainer instances 368 """ 369 370 # Initialise. 371 spin_list = [] 372 373 # Get all interatomic data containers. 374 interatoms = return_interatom_list(spin_hash=spin_hash) 375 376 # No containers. 377 if not len(interatoms): 378 return spin_list 379 380 # Loop over the containers. 381 for i in range(len(interatoms)): 382 # Get the attached spin. 383 if interatoms[i]._spin_hash1 == spin_hash: 384 attached = return_spin(spin_hash=interatoms[i]._spin_hash2) 385 else: 386 attached = return_spin(spin_hash=interatoms[i]._spin_hash1) 387 388 # Is it a proton? 389 if (hasattr(attached, 'element') and attached.element == 'H') or attached.name == 'H': 390 spin_list.append(attached) 391 392 # Return the list. 393 return spin_list

394 395

396 -def write(file, dir=None, sep=None, mol_name_flag=True, res_num_flag=True, res_name_flag=True, spin_num_flag=True, spin_name_flag=True, force=False):

397 """Write the molecule, residue, and/or sequence data. 398 399 This calls the lib.io.write_spin_data() function to do most of the work. 400 401 402 @param file: The name of the file to write the data to. 403 @type file: str 404 @keyword dir: The directory to contain the file (defaults to the current directory if None). 405 @type dir: str or None 406 @keyword sep: The column seperator which, if None, defaults to whitespace. 407 @type sep: str or None 408 @keyword mol_name_flag: A flag which if True will cause the molecule name column to be written. 409 @type mol_name_flag: bool 410 @keyword res_num_flag: A flag which if True will cause the residue number column to be written. 411 @type res_num_flag: bool 412 @keyword res_name_flag: A flag which if True will cause the residue name column to be written. 413 @type res_name_flag: bool 414 @keyword spin_name_flag: A flag which if True will cause the spin name column to be written. 415 @type spin_name_flag: bool 416 @keyword spin_num_flag: A flag which if True will cause the spin number column to be written. 417 @keyword force: A flag which if True will cause an existing file to be overwritten. 418 @type force: bin 419 """ 420 421 # Test if the sequence data is loaded. 422 if not count_spins(): 423 raise RelaxNoSequenceError 424 425 # Init the data. 426 mol_names = [] 427 res_nums = [] 428 res_names = [] 429 spin_nums = [] 430 spin_names = [] 431 432 # Spin loop. 433 for spin, mol_name, res_num, res_name in spin_loop(full_info=True): 434 mol_names.append(mol_name) 435 res_nums.append(res_num) 436 res_names.append(res_name) 437 spin_nums.append(spin.num) 438 spin_names.append(spin.name) 439 440 # Remove unwanted data. 441 if not mol_name_flag: 442 mol_names = None 443 if not res_num_flag: 444 res_nums = None 445 if not res_name_flag: 446 res_names = None 447 if not spin_num_flag: 448 spin_nums = None 449 if not spin_name_flag: 450 spin_names = None 451 452 # Write the data. 453 write_spin_data(file=file, dir=dir, sep=sep, mol_names=mol_names, res_nums=res_nums, res_names=res_names, spin_nums=spin_nums, spin_names=spin_names, force=force)

454

Source Code for Module pipe_control.sequence