pipe_control.sequence

1 ############################################################################### 2 # # 3 # Copyright (C) 2003-2014 Edward d'Auvergne # 4 # # 5 # This file is part of the program relax (http://www.nmr-relax.com). # 6 # # 7 # This program is free software: you can redistribute it and/or modify # 8 # it under the terms of the GNU General Public License as published by # 9 # the Free Software Foundation, either version 3 of the License, or # 10 # (at your option) any later version. # 11 # # 12 # This program is distributed in the hope that it will be useful, # 13 # but WITHOUT ANY WARRANTY; without even the implied warranty of # 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15 # GNU General Public License for more details. # 16 # # 17 # You should have received a copy of the GNU General Public License # 18 # along with this program. If not, see <http://www.gnu.org/licenses/>. # 19 # # 20 ############################################################################### 21 22 # Module docstring. 23 """Module for handling the molecule, residue, and spin sequence.""" 24 25 # Python module imports. 26 from copy import deepcopy 27 from re import search 28 import sys 29 30 # relax module imports. 31 from lib.errors import RelaxError, RelaxDiffMolNumError, RelaxDiffResNumError, RelaxDiffSeqError, RelaxDiffSpinNumError, RelaxNoSequenceError, RelaxSequenceError 32 from lib.sequence import read_spin_data, write_spin_data 33 from pipe_control import pipes 34 from pipe_control.interatomic import return_interatom_list 35 from pipe_control.mol_res_spin import count_molecules, count_residues, count_spins, create_molecule, create_residue, create_spin, exists_mol_res_spin_data, generate_spin_id, return_molecule, return_residue, return_spin, set_spin_element, set_spin_isotope, spin_loop 36 37 38

39 -def attach_protons():

40 """Attach a single proton to all heteronuclei.""" 41 42 # Loop over all spins. 43 mol_names = [] 44 res_nums = [] 45 res_names = [] 46 for spin, mol_name, res_num, res_name, spin_id in spin_loop(full_info=True, return_id=True): 47 # The spin is already a proton. 48 if hasattr(spin, 'element') and spin.element == 'H': 49 continue 50 51 # Get the interatomic data container. 52 interatoms = return_interatom_list(spin_id) 53 proton_found = False 54 if len(interatoms): 55 for i in range(len(interatoms)): 56 # Get the attached spin. 57 spin_attached = return_spin(interatoms[i].spin_id1) 58 if id(spin_attached) == id(spin): 59 spin_attached = return_spin(interatoms[i].spin_id2) 60 61 # Is it a proton? 62 if hasattr(spin_attached, 'element') and spin_attached.element == 'H' or spin.name == 'H': 63 proton_found = True 64 break 65 66 # Attached proton found. 67 if proton_found: 68 continue 69 70 # Store the sequence info. 71 mol_names.append(mol_name) 72 res_nums.append(res_num) 73 res_names.append(res_name) 74 75 # Create all protons (this must be done out of the spin loop, as it affects the looping!). 76 for i in range(len(mol_names)): 77 # Create the spin container. 78 create_spin(spin_name='H', res_name=res_names[i], res_num=res_nums[i], mol_name=mol_names[i]) 79 80 # Set the element and spin type. 81 set_spin_element(spin_id='@H', element='H') 82 set_spin_isotope(spin_id='@H', isotope='1H')

83 84

85 -def copy(pipe_from=None, pipe_to=None, preserve_select=False, empty=True, verbose=True):

86 """Copy the molecule, residue, and spin sequence data from one data pipe to another. 87 88 @keyword pipe_from: The data pipe to copy the sequence data from. This defaults to the current data pipe. 89 @type pipe_from: str 90 @keyword pipe_to: The data pipe to copy the sequence data to. This defaults to the current data pipe. 91 @type pipe_to: str 92 @keyword preserve_select: A flag which if True will cause spin selections to be preserved. 93 @type preserve_select: bool 94 @keyword empty: A flag which if True will create a molecule, residue, and spin sequence in the target pipe lacking all of the spin data of the source pipe. If False, then the spin data will also be copied. 95 @keyword verbose: A flag which if True will cause info about each spin to be printed out as the sequence is generated. 96 @type verbose: bool 97 """ 98 99 # Defaults. 100 if pipe_from == None and pipe_to == None: 101 raise RelaxError("The pipe_from and pipe_to arguments cannot both be set to None.") 102 elif pipe_from == None: 103 pipe_from = pipes.cdp_name() 104 elif pipe_to == None: 105 pipe_to = pipes.cdp_name() 106 107 # Test if the pipe_from and pipe_to data pipes exist. 108 pipes.test(pipe_from) 109 pipes.test(pipe_to) 110 111 # Test if pipe_from contains sequence data. 112 if not exists_mol_res_spin_data(pipe_from): 113 raise RelaxNoSequenceError 114 115 # Test if pipe_to contains sequence data. 116 if exists_mol_res_spin_data(pipe_to): 117 raise RelaxSequenceError 118 119 # Loop over the spins of the pipe_from data pipe. 120 for spin, mol_name, res_num, res_name in spin_loop(pipe=pipe_from, full_info=True): 121 # Generate the new sequence. 122 new_spin = create_spin(spin_num=spin.num, spin_name=spin.name, res_num=res_num, res_name=res_name, mol_name=mol_name, pipe=pipe_to) 123 124 # Preserve selection. 125 if preserve_select: 126 new_spin.select = spin.select 127 else: 128 select = True 129 130 # Copy all the spin data. 131 if not empty: 132 # Duplicate all the objects of the container. 133 for name in dir(spin): 134 # Skip special objects. 135 if search('^_', name): 136 continue 137 138 # Skip the spin ID. 139 #if name in ['spin_id']: 140 # continue 141 142 # Skip class methods. 143 if name in list(spin.__class__.__dict__.keys()): 144 continue 145 146 # Duplicate all other objects. 147 obj = deepcopy(getattr(spin, name)) 148 setattr(new_spin, name, obj) 149 150 # Print out. 151 if verbose: 152 display(mol_name_flag=True, res_num_flag=True, res_name_flag=True, spin_num_flag=True, spin_name_flag=True)

153 154

155 -def compare_sequence(pipe1=None, pipe2=None, fail=True):

156 """Compare the sequence in two data pipes. 157 158 @keyword pipe1: The name of the first data pipe. 159 @type pipe1: str 160 @keyword pipe2: The name of the second data pipe. 161 @type pipe2: str 162 @keyword fail: A flag which if True causes a RelaxError to be raised. 163 @type fail: bool 164 @return: 1 if the sequence is the same, 0 if different. 165 @rtype: int 166 @raises RelaxError: If the sequence is different and the fail flag is True. 167 """ 168 169 # Failure status. 170 status = 1 171 172 # Molecule number. 173 if count_molecules(pipe=pipe1) != count_molecules(pipe=pipe2): 174 status = 0 175 if fail: 176 raise RelaxDiffMolNumError(pipe1, pipe2) 177 178 # Residue number. 179 if count_residues(pipe=pipe1) != count_residues(pipe=pipe2): 180 status = 0 181 if fail: 182 raise RelaxDiffResNumError(pipe1, pipe2) 183 184 # Spin number. 185 if count_spins(pipe=pipe1) != count_spins(pipe=pipe2): 186 status = 0 187 if fail: 188 raise RelaxDiffSpinNumError(pipe1, pipe2) 189 190 # Create a string representation of the 2 sequences. 191 seq1 = '' 192 seq2 = '' 193 for spin, spin_id in spin_loop(return_id=True, pipe=pipe1): 194 seq1 = seq1 + spin_id + '\n' 195 for spin, spin_id in spin_loop(return_id=True, pipe=pipe2): 196 seq2 = seq2 + spin_id + '\n' 197 198 # Sequence check. 199 if seq1 != seq2: 200 status = 0 201 if fail: 202 raise RelaxDiffSeqError(pipe1, pipe2) 203 204 # Return the status. 205 return status

206 207

208 -def display(sep=None, mol_name_flag=False, res_num_flag=False, res_name_flag=False, spin_num_flag=False, spin_name_flag=False):

209 """Display the molecule, residue, and/or spin sequence data. 210 211 This calls the write() function to do most of the work. 212 213 214 @keyword sep: The column seperator which, if None, defaults to whitespace. 215 @type sep: str or None 216 @keyword mol_name_flag: A flag which if True will cause the molecule name column to be 217 written. 218 @type mol_name_flag: bool 219 @keyword res_num_flag: A flag which if True will cause the residue number column to be 220 written. 221 @type res_num_flag: bool 222 @keyword res_name_flag: A flag which if True will cause the residue name column to be 223 written. 224 @type res_name_flag: bool 225 @keyword spin_name_flag: A flag which if True will cause the spin name column to be written. 226 @type spin_name_flag: bool 227 @keyword spin_num_flag: A flag which if True will cause the spin number column to be 228 written. 229 @type spin_num_flag: bool 230 @param mol_name_flag: The column to contain the molecule name information. 231 """ 232 233 # Test if the sequence data is loaded. 234 if not count_spins(): 235 raise RelaxNoSequenceError 236 237 # Write the data. 238 write(file=sys.stdout, sep=sep, mol_name_flag=mol_name_flag, res_num_flag=res_num_flag, res_name_flag=res_name_flag, spin_num_flag=spin_num_flag, spin_name_flag=spin_name_flag)

239 240

241 -def generate(mol_name=None, res_num=None, res_name=None, spin_num=None, spin_name=None, pipe=None, select=True, verbose=True):

242 """Generate the sequence item-by-item by adding a single molecule/residue/spin container as necessary. 243 244 @keyword mol_name: The molecule name. 245 @type mol_name: str or None 246 @keyword res_num: The residue number. 247 @type res_num: int or None 248 @keyword res_name: The residue name. 249 @type res_name: str or None 250 @keyword spin_num: The spin number. 251 @type spin_num: int or None 252 @keyword spin_name: The spin name. 253 @type spin_name: str or None 254 @keyword pipe: The data pipe in which to generate the sequence. This defaults to the current data pipe. 255 @type pipe: str 256 @keyword select: The spin selection flag. 257 @type select: bool 258 @keyword verbose: A flag which if True will cause info about each spin to be printed out as the sequence is generated. 259 @type verbose: bool 260 """ 261 262 # The current data pipe. 263 if pipe == None: 264 pipe = pipes.cdp_name() 265 266 # A new molecule. 267 if not return_molecule(generate_spin_id(mol_name=mol_name), pipe=pipe): 268 create_molecule(mol_name=mol_name, pipe=pipe) 269 270 # A new residue. 271 curr_res = return_residue(generate_spin_id(mol_name=mol_name, res_num=res_num, res_name=res_name), pipe=pipe) 272 if not curr_res or ((res_num != None and curr_res.num != res_num) or (res_name != None and curr_res.name != res_name)): 273 create_residue(mol_name=mol_name, res_num=res_num, res_name=res_name, pipe=pipe) 274 275 # A new spin. 276 curr_spin = return_spin(generate_spin_id(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=spin_num, spin_name=spin_name), pipe=pipe) 277 if not curr_spin or ((spin_num != None and curr_spin.num != spin_num) or (spin_name != None and curr_spin.name != spin_name)): 278 # Add the spin. 279 curr_spin = create_spin(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=spin_num, spin_name=spin_name, pipe=pipe) 280 281 # Set the selection flag. 282 curr_spin.select = select

283 284

285 -def read(file=None, dir=None, file_data=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, sep=None, spin_id=None):

286 """Read the molecule, residue, and/or spin sequence data from file. 287 288 @param file: The name of the file to open. 289 @type file: str 290 @param dir: The directory containing the file (defaults to the current directory if 291 None). 292 @type dir: str or None 293 @keyword file_data: An alternative to opening a file, if the data already exists in the 294 correct format. The format is a list of lists where the first index 295 corresponds to the row and the second the column. 296 @type file_data: list of lists 297 @keyword spin_id_col: The column containing the spin ID strings. If supplied, the 298 mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col 299 arguments must be none. 300 @type spin_id_col: int or None 301 @keyword mol_name_col: The column containing the molecule name information. If supplied, 302 spin_id_col must be None. 303 @type mol_name_col: int or None 304 @keyword res_name_col: The column containing the residue name information. If supplied, 305 spin_id_col must be None. 306 @type res_name_col: int or None 307 @keyword res_num_col: The column containing the residue number information. If supplied, 308 spin_id_col must be None. 309 @type res_num_col: int or None 310 @keyword spin_name_col: The column containing the spin name information. If supplied, 311 spin_id_col must be None. 312 @type spin_name_col: int or None 313 @keyword spin_num_col: The column containing the spin number information. If supplied, 314 spin_id_col must be None. 315 @type spin_num_col: int or None 316 @keyword sep: The column separator which, if None, defaults to whitespace. 317 @type sep: str or None 318 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all 319 spins. 320 @type spin_id: None or str 321 """ 322 323 # Test if the current data pipe exists. 324 pipes.test() 325 326 # Test if sequence data already exists. 327 if exists_mol_res_spin_data(): 328 raise RelaxSequenceError 329 330 # Init the data. 331 mol_names = [] 332 res_nums = [] 333 res_names = [] 334 spin_nums = [] 335 spin_names = [] 336 337 # Generate the sequence. 338 for mol_name, res_num, res_name, spin_num, spin_name in read_spin_data(file=file, dir=dir, file_data=file_data, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, sep=sep, spin_id=spin_id): 339 # Add the spin. 340 generate(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=spin_num, spin_name=spin_name) 341 342 # Append the new spin. 343 mol_names.append(mol_name) 344 res_nums.append(res_num) 345 res_names.append(res_name) 346 spin_nums.append(spin_num) 347 spin_names.append(spin_name) 348 349 # No data, so fail. 350 if not len(spin_names): 351 raise RelaxError("No sequence data could be loaded.") 352 353 # Write the data. 354 write_spin_data(sys.stdout, mol_names=mol_names, res_nums=res_nums, res_names=res_names, spin_nums=spin_nums, spin_names=spin_names)

355 356

357 -def return_attached_protons(spin_id=None):

358 """Return a list of all proton spin containers attached to the given spin. 359 360 @keyword spin_id: The spin ID string. 361 @type spin_id: str 362 @return: The list of proton spin containers attached to the given spin. 363 @rtype: list of SpinContainer instances 364 """ 365 366 # Initialise. 367 spin_list = [] 368 369 # Get all interatomic data containers. 370 interatoms = return_interatom_list(spin_id) 371 372 # No containers. 373 if not len(interatoms): 374 return spin_list 375 376 # Loop over the containers. 377 for i in range(len(interatoms)): 378 # Get the attached spin. 379 if interatoms[i].spin_id1 == spin_id: 380 attached = return_spin(interatoms[i].spin_id2) 381 else: 382 attached = return_spin(interatoms[i].spin_id1) 383 384 # Is it a proton? 385 if (hasattr(attached, 'element') and attached.element == 'H') or attached.name == 'H': 386 spin_list.append(attached) 387 388 # Return the list. 389 return spin_list

390 391

392 -def write(file, dir=None, sep=None, mol_name_flag=True, res_num_flag=True, res_name_flag=True, spin_num_flag=True, spin_name_flag=True, force=False):

393 """Write the molecule, residue, and/or sequence data. 394 395 This calls the lib.io.write_spin_data() function to do most of the work. 396 397 398 @param file: The name of the file to write the data to. 399 @type file: str 400 @keyword dir: The directory to contain the file (defaults to the current directory if None). 401 @type dir: str or None 402 @keyword sep: The column seperator which, if None, defaults to whitespace. 403 @type sep: str or None 404 @keyword mol_name_flag: A flag which if True will cause the molecule name column to be written. 405 @type mol_name_flag: bool 406 @keyword res_num_flag: A flag which if True will cause the residue number column to be written. 407 @type res_num_flag: bool 408 @keyword res_name_flag: A flag which if True will cause the residue name column to be written. 409 @type res_name_flag: bool 410 @keyword spin_name_flag: A flag which if True will cause the spin name column to be written. 411 @type spin_name_flag: bool 412 @keyword spin_num_flag: A flag which if True will cause the spin number column to be written. 413 @keyword force: A flag which if True will cause an existing file to be overwritten. 414 @type force: bin 415 """ 416 417 # Test if the sequence data is loaded. 418 if not count_spins(): 419 raise RelaxNoSequenceError 420 421 # Init the data. 422 mol_names = [] 423 res_nums = [] 424 res_names = [] 425 spin_nums = [] 426 spin_names = [] 427 428 # Spin loop. 429 for spin, mol_name, res_num, res_name in spin_loop(full_info=True): 430 mol_names.append(mol_name) 431 res_nums.append(res_num) 432 res_names.append(res_name) 433 spin_nums.append(spin.num) 434 spin_names.append(spin.name) 435 436 # Remove unwanted data. 437 if not mol_name_flag: 438 mol_names = None 439 if not res_num_flag: 440 res_nums = None 441 if not res_name_flag: 442 res_names = None 443 if not spin_num_flag: 444 spin_nums = None 445 if not spin_name_flag: 446 spin_names = None 447 448 # Write the data. 449 write_spin_data(file=file, dir=dir, sep=sep, mol_names=mol_names, res_nums=res_nums, res_names=res_names, spin_nums=spin_nums, spin_names=spin_names, force=force)

450

Source Code for Module pipe_control.sequence