generic_fns.sequence

1 ############################################################################### 2 # # 3 # Copyright (C) 2003-2012 Edward d'Auvergne # 4 # # 5 # This file is part of the program relax. # 6 # # 7 # relax is free software; you can redistribute it and/or modify # 8 # it under the terms of the GNU General Public License as published by # 9 # the Free Software Foundation; either version 2 of the License, or # 10 # (at your option) any later version. # 11 # # 12 # relax is distributed in the hope that it will be useful, # 13 # but WITHOUT ANY WARRANTY; without even the implied warranty of # 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15 # GNU General Public License for more details. # 16 # # 17 # You should have received a copy of the GNU General Public License # 18 # along with relax; if not, write to the Free Software # 19 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # 20 # # 21 ############################################################################### 22 23 # Module docstring. 24 """Module for handling the molecule, residue, and spin sequence.""" 25 26 # Python module imports 27 from types import IntType, NoneType 28 29 # relax module imports. 30 from generic_fns.mol_res_spin import count_molecules, count_residues, count_spins, exists_mol_res_spin_data, generate_spin_id, return_molecule, return_residue, return_spin, spin_id_to_data_list, spin_loop 31 import pipes 32 from relax_errors import RelaxError, RelaxDiffMolNumError, RelaxDiffResNumError, RelaxDiffSeqError, RelaxDiffSpinNumError, RelaxFileEmptyError, RelaxInvalidSeqError, RelaxNoSequenceError, RelaxSequenceError 33 from relax_io import open_write_file, read_spin_data, write_spin_data 34 import sys 35 36 37

38 -def copy(pipe_from=None, pipe_to=None, preserve_select=False, verbose=True):

39 """Copy the molecule, residue, and spin sequence data from one data pipe to another. 40 41 @keyword pipe_from: The data pipe to copy the sequence data from. This defaults to the 42 current data pipe. 43 @type pipe_from: str 44 @keyword pipe_to: The data pipe to copy the sequence data to. This defaults to the 45 current data pipe. 46 @type pipe_to: str 47 @keyword preserve_select: A flag which if True will cause spin selections to be preserved. 48 @type preserve_select: bool 49 @keyword verbose: A flag which if True will cause info about each spin to be printed 50 out as the sequence is generated. 51 @type verbose: bool 52 """ 53 54 # Defaults. 55 if pipe_from == None and pipe_to == None: 56 raise RelaxError("The pipe_from and pipe_to arguments cannot both be set to None.") 57 elif pipe_from == None: 58 pipe_from = pipes.cdp_name() 59 elif pipe_to == None: 60 pipe_to = pipes.cdp_name() 61 62 # Test if the pipe_from and pipe_to data pipes exist. 63 pipes.test(pipe_from) 64 pipes.test(pipe_to) 65 66 # Test if pipe_from contains sequence data. 67 if not exists_mol_res_spin_data(pipe_from): 68 raise RelaxNoSequenceError 69 70 # Test if pipe_to contains sequence data. 71 if exists_mol_res_spin_data(pipe_to): 72 raise RelaxSequenceError 73 74 # Loop over the spins of the pipe_from data pipe. 75 for spin, mol_name, res_num, res_name in spin_loop(pipe=pipe_from, full_info=True): 76 # Preserve selection. 77 if preserve_select: 78 select = spin.select 79 else: 80 select = True 81 82 # Generate the new sequence. 83 generate(mol_name, res_num, res_name, spin.num, spin.name, pipe_to, select=select, verbose=verbose)

84 85

86 -def compare_sequence(pipe1=None, pipe2=None, fail=True):

87 """Compare the sequence in two data pipes. 88 89 @keyword pipe1: The name of the first data pipe. 90 @type pipe1: str 91 @keyword pipe2: The name of the second data pipe. 92 @type pipe2: str 93 @keyword fail: A flag which if True causes a RelaxError to be raised. 94 @type fail: bool 95 @return: 1 if the sequence is the same, 0 if different. 96 @rtype: int 97 @raises RelaxError: If the sequence is different and the fail flag is True. 98 """ 99 100 # Failure status. 101 status = 1 102 103 # Molecule number. 104 if count_molecules(pipe=pipe1) != count_molecules(pipe=pipe2): 105 status = 0 106 if fail: 107 raise RelaxDiffMolNumError(pipe1, pipe2) 108 109 # Residue number. 110 if count_residues(pipe=pipe1) != count_residues(pipe=pipe2): 111 status = 0 112 if fail: 113 raise RelaxDiffResNumError(pipe1, pipe2) 114 115 # Spin number. 116 if count_spins(pipe=pipe1) != count_spins(pipe=pipe2): 117 status = 0 118 if fail: 119 raise RelaxDiffSpinNumError(pipe1, pipe2) 120 121 # Create a string representation of the 2 sequences. 122 seq1 = '' 123 seq2 = '' 124 for spin, spin_id in spin_loop(return_id=True, pipe=pipe1): 125 seq1 = seq1 + spin_id + '\n' 126 for spin, spin_id in spin_loop(return_id=True, pipe=pipe2): 127 seq2 = seq2 + spin_id + '\n' 128 129 # Sequence check. 130 if seq1 != seq2: 131 status = 0 132 if fail: 133 raise RelaxDiffSeqError(pipe1, pipe2) 134 135 # Return the status. 136 return status

137 138

139 -def display(sep=None, mol_name_flag=False, res_num_flag=False, res_name_flag=False, spin_num_flag=False, spin_name_flag=False):

140 """Display the molecule, residue, and/or spin sequence data. 141 142 This calls the write() function to do most of the work. 143 144 145 @keyword sep: The column seperator which, if None, defaults to whitespace. 146 @type sep: str or None 147 @keyword mol_name_flag: A flag which if True will cause the molecule name column to be 148 written. 149 @type mol_name_flag: bool 150 @keyword res_num_flag: A flag which if True will cause the residue number column to be 151 written. 152 @type res_num_flag: bool 153 @keyword res_name_flag: A flag which if True will cause the residue name column to be 154 written. 155 @type res_name_flag: bool 156 @keyword spin_name_flag: A flag which if True will cause the spin name column to be written. 157 @type spin_name_flag: bool 158 @keyword spin_num_flag: A flag which if True will cause the spin number column to be 159 written. 160 @type spin_num_flag: bool 161 @param mol_name_flag: The column to contain the molecule name information. 162 """ 163 164 # Test if the sequence data is loaded. 165 if not count_spins(): 166 raise RelaxNoSequenceError 167 168 # Write the data. 169 write(file=sys.stdout, sep=sep, mol_name_flag=mol_name_flag, res_num_flag=res_num_flag, res_name_flag=res_name_flag, spin_num_flag=spin_num_flag, spin_name_flag=spin_name_flag)

170 171

172 -def generate(mol_name=None, res_num=None, res_name=None, spin_num=None, spin_name=None, pipe=None, select=True, verbose=True):

173 """Generate the sequence item-by-item by adding a single molecule/residue/spin container as necessary. 174 175 @keyword mol_name: The molecule name. 176 @type mol_name: str or None 177 @keyword res_num: The residue number. 178 @type res_num: int or None 179 @keyword res_name: The residue name. 180 @type res_name: str or None 181 @keyword spin_num: The spin number. 182 @type spin_num: int or None 183 @keyword spin_name: The spin name. 184 @type spin_name: str or None 185 @keyword pipe: The data pipe in which to generate the sequence. This defaults to the current data pipe. 186 @type pipe: str 187 @keyword select: The spin selection flag. 188 @type select: bool 189 @keyword verbose: A flag which if True will cause info about each spin to be printed out as the sequence is generated. 190 @type verbose: bool 191 @return: True if a new spin was created, False otherwise. 192 @rtype: bool 193 """ 194 195 # The current data pipe. 196 if pipe == None: 197 pipe = pipes.cdp_name() 198 199 # Get the data pipe. 200 dp = pipes.get_pipe(pipe) 201 202 # Get the molecule. 203 curr_mol = return_molecule(generate_spin_id(mol_name=mol_name), pipe=pipe) 204 205 # A new molecule. 206 if not curr_mol: 207 # Add the molecule (and store it in the 'curr_mol' object). 208 dp.mol.add_item(mol_name=mol_name) 209 curr_mol = dp.mol[-1] 210 211 # Get the residue. 212 curr_res = return_residue(generate_spin_id(mol_name=mol_name, res_num=res_num, res_name=res_name), pipe=pipe) 213 214 # A new residue. 215 if not curr_res: 216 # Add the residue (and store it in the 'curr_res' object). 217 curr_mol.res.add_item(res_name=res_name, res_num=res_num) 218 curr_res = curr_mol.res[-1] 219 220 # Get the spin. 221 curr_spin = return_spin(generate_spin_id(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=spin_num, spin_name=spin_name), pipe=pipe) 222 223 # A new spin. 224 new_data = False 225 if not curr_spin: 226 # Add the spin. 227 curr_res.spin.add_item(spin_name=spin_name, spin_num=spin_num) 228 229 # Get the spin. 230 curr_spin = return_spin(generate_spin_id(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=spin_num, spin_name=spin_name), pipe=pipe) 231 232 # New data. 233 new_data = True 234 235 # Set the selection flag. 236 curr_spin.select = select 237 238 # Return the creation status. 239 return new_data

240 241

242 -def read(file=None, dir=None, file_data=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, sep=None, spin_id=None):

243 """Read the molecule, residue, and/or spin sequence data from file. 244 245 @param file: The name of the file to open. 246 @type file: str 247 @param dir: The directory containing the file (defaults to the current directory if 248 None). 249 @type dir: str or None 250 @keyword file_data: An alternative to opening a file, if the data already exists in the 251 correct format. The format is a list of lists where the first index 252 corresponds to the row and the second the column. 253 @type file_data: list of lists 254 @keyword spin_id_col: The column containing the spin ID strings. If supplied, the 255 mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col 256 arguments must be none. 257 @type spin_id_col: int or None 258 @keyword mol_name_col: The column containing the molecule name information. If supplied, 259 spin_id_col must be None. 260 @type mol_name_col: int or None 261 @keyword res_name_col: The column containing the residue name information. If supplied, 262 spin_id_col must be None. 263 @type res_name_col: int or None 264 @keyword res_num_col: The column containing the residue number information. If supplied, 265 spin_id_col must be None. 266 @type res_num_col: int or None 267 @keyword spin_name_col: The column containing the spin name information. If supplied, 268 spin_id_col must be None. 269 @type spin_name_col: int or None 270 @keyword spin_num_col: The column containing the spin number information. If supplied, 271 spin_id_col must be None. 272 @type spin_num_col: int or None 273 @keyword sep: The column separator which, if None, defaults to whitespace. 274 @type sep: str or None 275 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all 276 spins. 277 @type spin_id: None or str 278 """ 279 280 # Test if the current data pipe exists. 281 pipes.test() 282 283 # Test if sequence data already exists. 284 if exists_mol_res_spin_data(): 285 raise RelaxSequenceError 286 287 # Init the data. 288 mol_names = [] 289 res_nums = [] 290 res_names = [] 291 spin_nums = [] 292 spin_names = [] 293 294 # Generate the sequence. 295 for mol_name, res_num, res_name, spin_num, spin_name in read_spin_data(file=file, dir=dir, file_data=file_data, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, sep=sep, spin_id=spin_id): 296 # Add the spin. 297 new_spin = generate(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=spin_num, spin_name=spin_name) 298 299 # Append the new spin. 300 if new_spin: 301 mol_names.append(mol_name) 302 res_nums.append(res_num) 303 res_names.append(res_name) 304 spin_nums.append(spin_num) 305 spin_names.append(spin_name) 306 307 # Write the data. 308 write_spin_data(sys.stdout, mol_names=mol_names, res_nums=res_nums, res_names=res_names, spin_nums=spin_nums, spin_names=spin_names)

309 310

311 -def validate_sequence(data, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, data_col=None, error_col=None):

312 """Test if the sequence data is valid. 313 314 The only function this performs is to raise a RelaxError if the data is invalid. 315 316 317 @param data: The sequence data. 318 @type data: list of lists. 319 @keyword spin_id_col: The column containing the spin ID strings. 320 @type spin_id_col: int or None 321 @param mol_name_col: The column containing the molecule name information. 322 @type mol_name_col: int or None 323 @param res_name_col: The column containing the residue name information. 324 @type res_name_col: int or None 325 @param res_num_col: The column containing the residue number information. 326 @type res_num_col: int or None 327 @param spin_name_col: The column containing the spin name information. 328 @type spin_name_col: int or None 329 @param spin_num_col: The column containing the spin number information. 330 @type spin_num_col: int or None 331 """ 332 333 # Spin ID. 334 if spin_id_col: 335 if len(data) < spin_id_col: 336 raise RelaxInvalidSeqError(data, "the Spin ID data is missing") 337 338 # Molecule name data. 339 if mol_name_col: 340 if len(data) < mol_name_col: 341 raise RelaxInvalidSeqError(data, "the molecule name data is missing") 342 343 # Residue number data. 344 if res_num_col: 345 # No data in column. 346 if len(data) < res_num_col: 347 raise RelaxInvalidSeqError(data, "the residue number data is missing") 348 349 # Bad data in column. 350 try: 351 res_num = eval(data[res_num_col-1]) 352 if not (isinstance(res_num, NoneType) or isinstance(res_num, IntType)): 353 raise ValueError 354 except: 355 raise RelaxInvalidSeqError(data, "the residue number data '%s' is invalid" % data[res_num_col-1]) 356 357 # Residue name data. 358 if res_name_col: 359 if len(data) < res_name_col: 360 raise RelaxInvalidSeqError(data, "the residue name data is missing") 361 362 # Spin number data. 363 if spin_num_col: 364 # No data in column. 365 if len(data) < spin_num_col: 366 raise RelaxInvalidSeqError(data, "the spin number data is missing") 367 368 # Bad data in column. 369 try: 370 res_num = eval(data[res_num_col-1]) 371 if not (isinstance(res_num, NoneType) or isinstance(res_num, IntType)): 372 raise ValueError 373 except: 374 raise RelaxInvalidSeqError(data, "the spin number data '%s' is invalid" % data[res_num_col-1]) 375 376 # Spin name data. 377 if spin_name_col: 378 if len(data) < spin_name_col: 379 raise RelaxInvalidSeqError(data, "the spin name data is missing") 380 381 # Data. 382 if data_col: 383 if len(data) < data_col: 384 raise RelaxInvalidSeqError(data, "the data is missing") 385 386 # Errors 387 if error_col: 388 if len(data) < error_col: 389 raise RelaxInvalidSeqError(data, "the error data is missing")

390 391

392 -def write(file, dir=None, sep=None, mol_name_flag=True, res_num_flag=True, res_name_flag=True, spin_num_flag=True, spin_name_flag=True, force=False):

393 """Write the molecule, residue, and/or sequence data. 394 395 This calls the relax_io.write_spin_data() function to do most of the work. 396 397 398 @param file: The name of the file to write the data to. 399 @type file: str 400 @keyword dir: The directory to contain the file (defaults to the current directory if None). 401 @type dir: str or None 402 @keyword sep: The column seperator which, if None, defaults to whitespace. 403 @type sep: str or None 404 @keyword mol_name_flag: A flag which if True will cause the molecule name column to be written. 405 @type mol_name_flag: bool 406 @keyword res_num_flag: A flag which if True will cause the residue number column to be written. 407 @type res_num_flag: bool 408 @keyword res_name_flag: A flag which if True will cause the residue name column to be written. 409 @type res_name_flag: bool 410 @keyword spin_name_flag: A flag which if True will cause the spin name column to be written. 411 @type spin_name_flag: bool 412 @keyword spin_num_flag: A flag which if True will cause the spin number column to be written. 413 @keyword force: A flag which if True will cause an existing file to be overwritten. 414 @type force: bin 415 """ 416 417 # Test if the sequence data is loaded. 418 if not count_spins(): 419 raise RelaxNoSequenceError 420 421 # Init the data. 422 mol_names = [] 423 res_nums = [] 424 res_names = [] 425 spin_nums = [] 426 spin_names = [] 427 428 # Spin loop. 429 for spin, mol_name, res_num, res_name in spin_loop(full_info=True): 430 mol_names.append(mol_name) 431 res_nums.append(res_num) 432 res_names.append(res_name) 433 spin_nums.append(spin.num) 434 spin_names.append(spin.name) 435 436 # Remove unwanted data. 437 if not mol_name_flag: 438 mol_names = None 439 if not res_num_flag: 440 res_nums = None 441 if not res_name_flag: 442 res_names = None 443 if not spin_num_flag: 444 spin_nums = None 445 if not spin_name_flag: 446 spin_names = None 447 448 # Write the data. 449 write_spin_data(file=file, dir=dir, sep=sep, mol_names=mol_names, res_nums=res_nums, res_names=res_names, spin_nums=spin_nums, spin_names=spin_names, force=force)

450

Source Code for Module generic_fns.sequence