generic_fns.sequence

1 ############################################################################### 2 # # 3 # Copyright (C) 2003-2011 Edward d'Auvergne # 4 # # 5 # This file is part of the program relax. # 6 # # 7 # relax is free software; you can redistribute it and/or modify # 8 # it under the terms of the GNU General Public License as published by # 9 # the Free Software Foundation; either version 2 of the License, or # 10 # (at your option) any later version. # 11 # # 12 # relax is distributed in the hope that it will be useful, # 13 # but WITHOUT ANY WARRANTY; without even the implied warranty of # 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15 # GNU General Public License for more details. # 16 # # 17 # You should have received a copy of the GNU General Public License # 18 # along with relax; if not, write to the Free Software # 19 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # 20 # # 21 ############################################################################### 22 23 # Module docstring. 24 """Module for handling the molecule, residue, and spin sequence.""" 25 26 # Python module imports 27 from types import IntType, NoneType 28 29 # relax module imports. 30 from generic_fns.mol_res_spin import count_molecules, count_residues, count_spins, exists_mol_res_spin_data, generate_spin_id, return_molecule, return_residue, return_spin, spin_id_to_data_list, spin_loop 31 import pipes 32 from relax_errors import RelaxError, RelaxDiffMolNumError, RelaxDiffResNumError, RelaxDiffSeqError, RelaxDiffSpinNumError, RelaxFileEmptyError, RelaxInvalidSeqError, RelaxNoSequenceError, RelaxSequenceError 33 from relax_io import open_write_file, read_spin_data, write_spin_data 34 import sys 35 36 37

38 -def copy(pipe_from=None, pipe_to=None, preserve_select=False, verbose=True):

39 """Copy the molecule, residue, and spin sequence data from one data pipe to another. 40 41 @keyword pipe_from: The data pipe to copy the sequence data from. This defaults to the 42 current data pipe. 43 @type pipe_from: str 44 @keyword pipe_to: The data pipe to copy the sequence data to. This defaults to the 45 current data pipe. 46 @type pipe_to: str 47 @keyword preserve_select: A flag which if True will cause spin selections to be preserved. 48 @type preserve_select: bool 49 @keyword verbose: A flag which if True will cause info about each spin to be printed 50 out as the sequence is generated. 51 @type verbose: bool 52 """ 53 54 # Defaults. 55 if pipe_from == None and pipe_to == None: 56 raise RelaxError("The pipe_from and pipe_to arguments cannot both be set to None.") 57 elif pipe_from == None: 58 pipe_from = pipes.cdp_name() 59 elif pipe_to == None: 60 pipe_to = pipes.cdp_name() 61 62 # Test if the pipe_from and pipe_to data pipes exist. 63 pipes.test(pipe_from) 64 pipes.test(pipe_to) 65 66 # Test if pipe_from contains sequence data. 67 if not exists_mol_res_spin_data(pipe_from): 68 raise RelaxNoSequenceError 69 70 # Test if pipe_to contains sequence data. 71 if exists_mol_res_spin_data(pipe_to): 72 raise RelaxSequenceError 73 74 # Loop over the spins of the pipe_from data pipe. 75 for spin, mol_name, res_num, res_name in spin_loop(pipe=pipe_from, full_info=True): 76 # Preserve selection. 77 if preserve_select: 78 select = spin.select 79 else: 80 select = True 81 82 # Generate the new sequence. 83 generate(mol_name, res_num, res_name, spin.num, spin.name, pipe_to, select=select, verbose=verbose)

84 85

86 -def compare_sequence(pipe1=None, pipe2=None, fail=True):

87 """Compare the sequence in two data pipes. 88 89 @keyword pipe1: The name of the first data pipe. 90 @type pipe1: str 91 @keyword pipe2: The name of the second data pipe. 92 @type pipe2: str 93 @keyword fail: A flag which if True causes a RelaxError to be raised. 94 @type fail: bool 95 @return: 1 if the sequence is the same, 0 if different. 96 @rtype: int 97 @raises RelaxError: If the sequence is different and the fail flag is True. 98 """ 99 100 # Failure status. 101 status = 1 102 103 # Molecule number. 104 if count_molecules(pipe=pipe1) != count_molecules(pipe=pipe2): 105 status = 0 106 if fail: 107 raise RelaxDiffMolNumError(pipe1, pipe2) 108 109 # Residue number. 110 if count_residues(pipe=pipe1) != count_residues(pipe=pipe2): 111 status = 0 112 if fail: 113 raise RelaxDiffResNumError(pipe1, pipe2) 114 115 # Spin number. 116 if count_spins(pipe=pipe1) != count_spins(pipe=pipe2): 117 status = 0 118 if fail: 119 raise RelaxDiffSpinNumError(pipe1, pipe2) 120 121 # Create a string representation of the 2 sequences. 122 seq1 = '' 123 seq2 = '' 124 for spin, spin_id in spin_loop(return_id=True, pipe=pipe1): 125 seq1 = seq1 + spin_id + '\n' 126 for spin, spin_id in spin_loop(return_id=True, pipe=pipe2): 127 seq2 = seq2 + spin_id + '\n' 128 129 # Sequence check. 130 if seq1 != seq2: 131 status = 0 132 if fail: 133 raise RelaxDiffSeqError(pipe1, pipe2) 134 135 # Return the status. 136 return status

137 138

139 -def display(sep=None, mol_name_flag=False, res_num_flag=False, res_name_flag=False, spin_num_flag=False, spin_name_flag=False):

140 """Display the molecule, residue, and/or spin sequence data. 141 142 This calls the write() function to do most of the work. 143 144 145 @keyword sep: The column seperator which, if None, defaults to whitespace. 146 @type sep: str or None 147 @keyword mol_name_flag: A flag which if True will cause the molecule name column to be 148 written. 149 @type mol_name_flag: bool 150 @keyword res_num_flag: A flag which if True will cause the residue number column to be 151 written. 152 @type res_num_flag: bool 153 @keyword res_name_flag: A flag which if True will cause the residue name column to be 154 written. 155 @type res_name_flag: bool 156 @keyword spin_name_flag: A flag which if True will cause the spin name column to be written. 157 @type spin_name_flag: bool 158 @keyword spin_num_flag: A flag which if True will cause the spin number column to be 159 written. 160 @type spin_num_flag: bool 161 @param mol_name_flag: The column to contain the molecule name information. 162 """ 163 164 # Test if the sequence data is loaded. 165 if not count_spins(): 166 raise RelaxNoSequenceError 167 168 # Write the data. 169 write(file=sys.stdout, sep=sep, mol_name_flag=mol_name_flag, res_num_flag=res_num_flag, res_name_flag=res_name_flag, spin_num_flag=spin_num_flag, spin_name_flag=spin_name_flag)

170 171

172 -def generate(mol_name=None, res_num=None, res_name=None, spin_num=None, spin_name=None, pipe=None, select=True, verbose=True):

173 """Generate the sequence item-by-item by adding a single molecule/residue/spin container as necessary. 174 175 @keyword mol_name: The molecule name. 176 @type mol_name: str or None 177 @keyword res_num: The residue number. 178 @type res_num: int or None 179 @keyword res_name: The residue name. 180 @type res_name: str or None 181 @keyword spin_num: The spin number. 182 @type spin_num: int or None 183 @keyword spin_name: The spin name. 184 @type spin_name: str or None 185 @keyword pipe: The data pipe in which to generate the sequence. This defaults to the 186 current data pipe. 187 @type pipe: str 188 @keyword select: The spin selection flag. 189 @type select: bool 190 @keyword verbose: A flag which if True will cause info about each spin to be printed out as 191 the sequence is generated. 192 @type verbose: bool 193 """ 194 195 # The current data pipe. 196 if pipe == None: 197 pipe = pipes.cdp_name() 198 199 # Get the data pipe. 200 dp = pipes.get_pipe(pipe) 201 202 # Get the molecule. 203 curr_mol = return_molecule(generate_spin_id(mol_name=mol_name), pipe=pipe) 204 205 # A new molecule. 206 if not curr_mol: 207 # Add the molecule (and store it in the 'curr_mol' object). 208 dp.mol.add_item(mol_name=mol_name) 209 curr_mol = dp.mol[-1] 210 211 # Get the residue. 212 curr_res = return_residue(generate_spin_id(mol_name=mol_name, res_num=res_num, res_name=res_name), pipe=pipe) 213 214 # A new residue. 215 if not curr_res: 216 # Add the residue (and store it in the 'curr_res' object). 217 curr_mol.res.add_item(res_name=res_name, res_num=res_num) 218 curr_res = curr_mol.res[-1] 219 220 # Get the spin. 221 curr_spin = return_spin(generate_spin_id(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=spin_num, spin_name=spin_name), pipe=pipe) 222 223 # A new spin. 224 new_data = False 225 if not curr_spin: 226 # Add the spin. 227 curr_res.spin.add_item(spin_name=spin_name, spin_num=spin_num) 228 229 # Get the spin. 230 curr_spin = return_spin(generate_spin_id(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=spin_num, spin_name=spin_name), pipe=pipe) 231 232 # New data. 233 new_data = True 234 235 # Set the selection flag. 236 curr_spin.select = select 237 238 # Print out of all the spins. 239 if new_data: 240 return mol_name, res_num, res_name, spin_num, spin_name

241 242

243 -def read(file=None, dir=None, file_data=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, sep=None, spin_id=None):

244 """Read the molecule, residue, and/or spin sequence data from file. 245 246 @param file: The name of the file to open. 247 @type file: str 248 @param dir: The directory containing the file (defaults to the current directory if 249 None). 250 @type dir: str or None 251 @keyword file_data: An alternative to opening a file, if the data already exists in the 252 correct format. The format is a list of lists where the first index 253 corresponds to the row and the second the column. 254 @type file_data: list of lists 255 @keyword spin_id_col: The column containing the spin ID strings. If supplied, the 256 mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col 257 arguments must be none. 258 @type spin_id_col: int or None 259 @keyword mol_name_col: The column containing the molecule name information. If supplied, 260 spin_id_col must be None. 261 @type mol_name_col: int or None 262 @keyword res_name_col: The column containing the residue name information. If supplied, 263 spin_id_col must be None. 264 @type res_name_col: int or None 265 @keyword res_num_col: The column containing the residue number information. If supplied, 266 spin_id_col must be None. 267 @type res_num_col: int or None 268 @keyword spin_name_col: The column containing the spin name information. If supplied, 269 spin_id_col must be None. 270 @type spin_name_col: int or None 271 @keyword spin_num_col: The column containing the spin number information. If supplied, 272 spin_id_col must be None. 273 @type spin_num_col: int or None 274 @keyword sep: The column separator which, if None, defaults to whitespace. 275 @type sep: str or None 276 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all 277 spins. 278 @type spin_id: None or str 279 """ 280 281 # Test if the current data pipe exists. 282 pipes.test() 283 284 # Test if sequence data already exists. 285 if exists_mol_res_spin_data(): 286 raise RelaxSequenceError 287 288 # Init the data. 289 mol_names = [] 290 res_nums = [] 291 res_names = [] 292 spin_nums = [] 293 spin_names = [] 294 295 # Generate the sequence. 296 for id in read_spin_data(file=file, dir=dir, file_data=file_data, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, sep=sep, spin_id=spin_id): 297 # Add the spin. 298 new_spin = generate(*spin_id_to_data_list(id)) 299 300 # Append the new spin. 301 if new_spin: 302 mol_names.append(new_spin[0]) 303 res_nums.append(new_spin[1]) 304 res_names.append(new_spin[2]) 305 spin_nums.append(new_spin[3]) 306 spin_names.append(new_spin[4]) 307 308 # Write the data. 309 write_spin_data(sys.stdout, mol_names=mol_names, res_nums=res_nums, res_names=res_names, spin_nums=spin_nums, spin_names=spin_names)

310 311

312 -def validate_sequence(data, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, data_col=None, error_col=None):

313 """Test if the sequence data is valid. 314 315 The only function this performs is to raise a RelaxError if the data is invalid. 316 317 318 @param data: The sequence data. 319 @type data: list of lists. 320 @keyword spin_id_col: The column containing the spin ID strings. 321 @type spin_id_col: int or None 322 @param mol_name_col: The column containing the molecule name information. 323 @type mol_name_col: int or None 324 @param res_name_col: The column containing the residue name information. 325 @type res_name_col: int or None 326 @param res_num_col: The column containing the residue number information. 327 @type res_num_col: int or None 328 @param spin_name_col: The column containing the spin name information. 329 @type spin_name_col: int or None 330 @param spin_num_col: The column containing the spin number information. 331 @type spin_num_col: int or None 332 """ 333 334 # Spin ID. 335 if spin_id_col: 336 if len(data) < spin_id_col: 337 raise RelaxInvalidSeqError(data, "the Spin ID data is missing") 338 339 # Molecule name data. 340 if mol_name_col: 341 if len(data) < mol_name_col: 342 raise RelaxInvalidSeqError(data, "the molecule name data is missing") 343 344 # Residue number data. 345 if res_num_col: 346 # No data in column. 347 if len(data) < res_num_col: 348 raise RelaxInvalidSeqError(data, "the residue number data is missing") 349 350 # Bad data in column. 351 try: 352 res_num = eval(data[res_num_col-1]) 353 if not (isinstance(res_num, NoneType) or isinstance(res_num, IntType)): 354 raise ValueError 355 except: 356 raise RelaxInvalidSeqError(data, "the residue number data '%s' is invalid" % data[res_num_col-1]) 357 358 # Residue name data. 359 if res_name_col: 360 if len(data) < res_name_col: 361 raise RelaxInvalidSeqError(data, "the residue name data is missing") 362 363 # Spin number data. 364 if spin_num_col: 365 # No data in column. 366 if len(data) < spin_num_col: 367 raise RelaxInvalidSeqError(data, "the spin number data is missing") 368 369 # Bad data in column. 370 try: 371 res_num = eval(data[res_num_col-1]) 372 if not (isinstance(res_num, NoneType) or isinstance(res_num, IntType)): 373 raise ValueError 374 except: 375 raise RelaxInvalidSeqError(data, "the spin number data '%s' is invalid" % data[res_num_col-1]) 376 377 # Spin name data. 378 if spin_name_col: 379 if len(data) < spin_name_col: 380 raise RelaxInvalidSeqError(data, "the spin name data is missing") 381 382 # Data. 383 if data_col: 384 if len(data) < data_col: 385 raise RelaxInvalidSeqError(data, "the data is missing") 386 387 # Errors 388 if error_col: 389 if len(data) < error_col: 390 raise RelaxInvalidSeqError(data, "the error data is missing")

391 392

393 -def write(file, dir=None, sep=None, mol_name_flag=False, res_num_flag=False, res_name_flag=False, spin_num_flag=False, spin_name_flag=False, force=False):

394 """Write the molecule, residue, and/or sequence data. 395 396 This calls the relax_io.write_spin_data() function to do most of the work. 397 398 399 @param file: The name of the file to write the data to. 400 @type file: str 401 @keyword dir: The directory to contain the file (defaults to the current directory 402 if None). 403 @type dir: str or None 404 @keyword sep: The column seperator which, if None, defaults to whitespace. 405 @type sep: str or None 406 @keyword mol_name_flag: A flag which if True will cause the molecule name column to be 407 written. 408 @type mol_name_flag: bool 409 @keyword res_num_flag: A flag which if True will cause the residue number column to be 410 written. 411 @type res_num_flag: bool 412 @keyword res_name_flag: A flag which if True will cause the residue name column to be 413 written. 414 @type res_name_flag: bool 415 @keyword spin_name_flag: A flag which if True will cause the spin name column to be written. 416 @type spin_name_flag: bool 417 @keyword spin_num_flag: A flag which if True will cause the spin number column to be 418 written. 419 @keyword force: A flag which if True will cause an existing file to be overwritten. 420 @type force: bin 421 """ 422 423 # Test if the sequence data is loaded. 424 if not count_spins(): 425 raise RelaxNoSequenceError 426 427 # Init the data. 428 mol_names = [] 429 res_nums = [] 430 res_names = [] 431 spin_nums = [] 432 spin_names = [] 433 434 # Spin loop. 435 for spin, mol_name, res_num, res_name in spin_loop(full_info=True): 436 mol_names.append(mol_name) 437 res_nums.append(res_num) 438 res_names.append(res_name) 439 spin_nums.append(spin.num) 440 spin_names.append(spin.name) 441 442 # Remove unwanted data. 443 if not mol_name_flag: 444 mol_names = None 445 if not res_num_flag: 446 res_nums = None 447 if not res_name_flag: 448 res_names = None 449 if not spin_num_flag: 450 spin_nums = None 451 if not spin_name_flag: 452 spin_names = None 453 454 # Write the data. 455 write_spin_data(file=file, dir=dir, sep=sep, mol_names=mol_names, res_nums=res_nums, res_names=res_names, spin_nums=spin_nums, spin_names=spin_names, force=force)

456

Source Code for Module generic_fns.sequence