generic_fns.dasha

1 ############################################################################### 2 # # 3 # Copyright (C) 2005-2012 Edward d'Auvergne # 4 # # 5 # This file is part of the program relax (http://www.nmr-relax.com). # 6 # # 7 # This program is free software: you can redistribute it and/or modify # 8 # it under the terms of the GNU General Public License as published by # 9 # the Free Software Foundation, either version 3 of the License, or # 10 # (at your option) any later version. # 11 # # 12 # This program is distributed in the hope that it will be useful, # 13 # but WITHOUT ANY WARRANTY; without even the implied warranty of # 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15 # GNU General Public License for more details. # 16 # # 17 # You should have received a copy of the GNU General Public License # 18 # along with this program. If not, see <http://www.gnu.org/licenses/>. # 19 # # 20 ############################################################################### 21 22 # Module docstring. 23 """Module for interfacing with Dasha.""" 24 25 # Dependencies. 26 import dep_check 27 28 # Python module imports. 29 from math import pi 30 from os import F_OK, access, chdir, getcwd, sep 31 PIPE, Popen = None, None 32 if dep_check.subprocess_module: 33 from subprocess import PIPE, Popen 34 import sys 35 36 # relax module imports. 37 from generic_fns import angles, diffusion_tensor, pipes, relax_data, value 38 from generic_fns.interatomic import return_interatom_list 39 from generic_fns.mol_res_spin import exists_mol_res_spin_data, first_residue_num, last_residue_num, residue_loop, return_spin, spin_loop 40 from relax_errors import RelaxDirError, RelaxError, RelaxFileError, RelaxNoPdbError, RelaxNoSequenceError, RelaxNoTensorError 41 from relax_io import extract_data, mkdir_nofail, open_write_file, strip, test_binary 42 from specific_fns.setup import model_free_obj 43 44

45 -def __deselect_spins():

46 """Deselect spins with no or too little data, that are overfitting, etc.""" 47 48 # Test if sequence data exists. 49 if not exists_mol_res_spin_data(): 50 raise RelaxNoSequenceError 51 52 # Is structural data required? 53 need_vect = False 54 if hasattr(cdp, 'diff_tensor') and (cdp.diff_tensor.type == 'spheroid' or cdp.diff_tensor.type == 'ellipsoid'): 55 need_vect = True 56 57 # Loop over the sequence. 58 for spin in spin_loop(): 59 # Relaxation data must exist! 60 if not hasattr(spin, 'ri_data'): 61 spin.select = False 62 63 # Require 3 or more relaxation data points. 64 elif len(spin.ri_data) < 3: 65 spin.select = False 66 67 # Require at least as many data points as params to prevent over-fitting. 68 elif hasattr(spin, 'params') and spin.params and len(spin.params) > len(spin.ri_data): 69 spin.select = False

70 71

72 -def create(algor='LM', dir=None, force=False):

73 """Create the Dasha script file 'dasha_script' for controlling the program. 74 75 @keyword algor: The optimisation algorithm to use. This can be the Levenberg-Marquardt algorithm 'LM' or the Newton-Raphson algorithm 'NR'. 76 @type algor: str 77 @keyword dir: The optional directory to place the script into. 78 @type dir: str or None 79 @keyword force: A flag which if True will cause any pre-existing file to be overwritten. 80 @type force: bool 81 """ 82 83 # Test if the current pipe exists. 84 pipes.test() 85 86 # Test if sequence data is loaded. 87 if not exists_mol_res_spin_data(): 88 raise RelaxNoSequenceError 89 90 # Determine the parameter set. 91 model_type = model_free_obj._determine_model_type() 92 93 # Test if diffusion tensor data for the data_pipe exists. 94 if model_type != 'local_tm' and not hasattr(cdp, 'diff_tensor'): 95 raise RelaxNoTensorError('diffusion') 96 97 # Test if the PDB file has been loaded (for the spheroid and ellipsoid). 98 if model_type != 'local_tm' and cdp.diff_tensor.type != 'sphere' and not hasattr(cdp, 'structure'): 99 raise RelaxNoPdbError 100 101 # Test the optimisation algorithm. 102 if algor not in ['LM', 'NR']: 103 raise RelaxError("The Dasha optimisation algorithm '%s' is unknown, it should either be 'LM' or 'NR'." % algor) 104 105 # Deselect certain spins. 106 __deselect_spins() 107 108 # Directory creation. 109 if dir == None: 110 dir = pipes.cdp_name() 111 mkdir_nofail(dir, verbosity=0) 112 113 # Calculate the angle alpha of the XH vector in the spheroid diffusion frame. 114 if cdp.diff_tensor.type == 'spheroid': 115 angles.spheroid_frame() 116 117 # Calculate the angles theta and phi of the XH vector in the ellipsoid diffusion frame. 118 elif cdp.diff_tensor.type == 'ellipsoid': 119 angles.ellipsoid_frame() 120 121 # The 'dasha_script' file. 122 script = open_write_file(file_name='dasha_script', dir=dir, force=force) 123 create_script(script, model_type, algor) 124 script.close()

125 126

127 -def create_script(file, model_type, algor):

128 """Create the Dasha script file. 129 130 @param file: The opened file descriptor. 131 @type file: file object 132 @param model_type: The model-free model type. 133 @type model_type: str 134 @param algor: The optimisation algorithm to use. This can be the Levenberg-Marquardt algorithm 'LM' or the Newton-Raphson algorithm 'NR'. 135 @type algor: str 136 """ 137 138 # Delete all data. 139 file.write("# Delete all data.\n") 140 file.write("del 1 10000\n") 141 142 # Nucleus type. 143 file.write("\n# Nucleus type.\n") 144 nucleus = None 145 for spin in spin_loop(): 146 # Skip protons. 147 if spin.isotope == '1H': 148 continue 149 150 # Can only handle one spin type. 151 if nucleus and spin.isotope != nucleus: 152 raise RelaxError("The nuclei '%s' and '%s' do not match, relax can only handle one nucleus type in Dasha." % (nucleus, spin.isotope)) 153 154 # Set the nucleus. 155 if not nucleus: 156 nucleus = spin.isotope 157 158 # Convert the name and write it. 159 if nucleus == '15N': 160 nucleus = 'N15' 161 elif nucleus == '13C': 162 nucleus = 'C13' 163 else: 164 raise RelaxError("Cannot handle the nucleus type '%s' within Dasha." % nucleus) 165 file.write("set nucl %s\n" % nucleus) 166 167 # Number of frequencies. 168 file.write("\n# Number of frequencies.\n") 169 file.write("set n_freq %s\n" % relax_data.num_frq()) 170 171 # Frequency values. 172 file.write("\n# Frequency values.\n") 173 count = 1 174 for frq in relax_data.frq_loop(): 175 file.write("set H1_freq %s %s\n" % (frq / 1e6, count)) 176 count += 1 177 178 # Set the diffusion tensor. 179 file.write("\n# Set the diffusion tensor.\n") 180 if model_type != 'local_tm': 181 # Sphere. 182 if cdp.diff_tensor.type == 'sphere': 183 file.write("set tr %s\n" % (cdp.diff_tensor.tm / 1e-9)) 184 185 # Spheroid. 186 elif cdp.diff_tensor.type == 'spheroid': 187 file.write('set tr %s\n' % (cdp.diff_tensor.tm / 1e-9)) 188 189 # Ellipsoid. 190 elif cdp.diff_tensor.type == 'ellipsoid': 191 # Get the eigenvales. 192 Dx, Dy, Dz = diffusion_tensor.return_eigenvalues() 193 194 # Geometric parameters. 195 file.write("set tr %s\n" % (cdp.diff_tensor.tm / 1e-9)) 196 file.write("set D1/D3 %s\n" % (Dx / Dz)) 197 file.write("set D2/D3 %s\n" % (Dy / Dz)) 198 199 # Orientational parameters. 200 file.write("set alfa %s\n" % (cdp.diff_tensor.alpha / (2.0 * pi) * 360.0)) 201 file.write("set betta %s\n" % (cdp.diff_tensor.beta / (2.0 * pi) * 360.0)) 202 file.write("set gamma %s\n" % (cdp.diff_tensor.gamma / (2.0 * pi) * 360.0)) 203 204 # Reading the relaxation data. 205 file.write("\n# Reading the relaxation data.\n") 206 file.write("echo Reading the relaxation data.\n") 207 noe_index = 1 208 r1_index = 1 209 r2_index = 1 210 for ri_id in cdp.ri_ids: 211 # NOE. 212 if cdp.ri_type[ri_id] == 'NOE': 213 # Data set number. 214 number = noe_index 215 216 # Data type. 217 data_type = 'noe' 218 219 # Increment the data set index. 220 noe_index = noe_index + 1 221 222 # R1. 223 elif cdp.ri_type[ri_id] == 'R1': 224 # Data set number. 225 number = r1_index 226 227 # Data type. 228 data_type = '1/T1' 229 230 # Increment the data set index. 231 r1_index = r1_index + 1 232 233 # R2. 234 elif cdp.ri_type[ri_id] == 'R2': 235 # Data set number. 236 number = r2_index 237 238 # Data type. 239 data_type = '1/T2' 240 241 # Increment the data set index. 242 r2_index = r2_index + 1 243 244 # Set the data type. 245 if number == 1: 246 file.write("\nread < %s\n" % data_type) 247 else: 248 file.write("\nread < %s %s\n" % (data_type, number)) 249 250 # The relaxation data. 251 for residue in residue_loop(): 252 # Alias the spin. 253 spin = residue.spin[0] 254 255 # Skip deselected spins. 256 if not spin.select: 257 continue 258 259 # Skip and deselect spins for which relaxation data is missing. 260 if len(spin.ri_data) != len(cdp.ri_ids) or spin.ri_data[ri_id] == None: 261 spin.select = False 262 continue 263 264 # Data and errors. 265 file.write("%s %s %s\n" % (residue.num, spin.ri_data[ri_id], spin.ri_data_err[ri_id])) 266 267 # Terminate the reading. 268 file.write("exit\n") 269 270 # Individual residue optimisation. 271 if model_type == 'mf': 272 # Loop over the residues. 273 for residue in residue_loop(): 274 # Alias the spin. 275 spin = residue.spin[0] 276 277 # Skip deselected spins. 278 if not spin.select: 279 continue 280 281 # Get the interatomic data containers. 282 interatoms = return_interatom_list(spin._spin_ids[0]) 283 if len(interatoms) == 0: 284 raise RelaxNoInteratomError 285 elif len(interatoms) > 1: 286 raise RelaxError("Only one interatomic data container, hence dipole-dipole interaction, is supported per spin.") 287 288 # Comment. 289 file.write("\n\n\n# Residue %s\n\n" % residue.num) 290 291 # Echo. 292 file.write("echo Optimisation of residue %s\n" % residue.num) 293 294 # Select the spin. 295 file.write("\n# Select the residue.\n") 296 file.write("set cres %s\n" % residue.num) 297 298 # The angle alpha of the XH vector in the spheroid diffusion frame. 299 if cdp.diff_tensor.type == 'spheroid': 300 file.write("set teta %s\n" % spin.alpha) 301 302 # The angles theta and phi of the XH vector in the ellipsoid diffusion frame. 303 elif cdp.diff_tensor.type == 'ellipsoid': 304 file.write("\n# Setting the spherical angles of the XH vector in the ellipsoid diffusion frame.\n") 305 file.write("set teta %s\n" % spin.theta) 306 file.write("set fi %s\n" % spin.phi) 307 308 # The 'jmode'. 309 if 'ts' in spin.params: 310 jmode = 3 311 elif 'te' in spin.params: 312 jmode = 2 313 elif 's2' in spin.params: 314 jmode = 1 315 316 # Chemical exchange. 317 if 'rex' in spin.params: 318 exch = True 319 else: 320 exch = False 321 322 # Anisotropic diffusion. 323 if cdp.diff_tensor.type == 'sphere': 324 anis = False 325 else: 326 anis = True 327 328 # Axial symmetry. 329 if cdp.diff_tensor.type == 'spheroid': 330 sym = True 331 else: 332 sym = False 333 334 # Set the jmode. 335 file.write("\n# Set the jmode.\n") 336 file.write("set def jmode %s" % jmode) 337 if exch: 338 file.write(" exch") 339 if anis: 340 file.write(" anis") 341 if sym: 342 file.write(" sym") 343 file.write("\n") 344 345 # Parameter default values. 346 file.write("\n# Parameter default values.\n") 347 file.write("reset jmode %s\n" % residue.num) 348 349 # Bond length. 350 file.write("\n# Bond length.\n") 351 file.write("set r_hx %s\n" % (interatoms[0].r / 1e-10)) 352 353 # CSA value. 354 file.write("\n# CSA value.\n") 355 file.write("set csa %s\n" % (spin.csa / 1e-6)) 356 357 # Fix the tf parameter if it isn't in the model. 358 if not 'tf' in spin.params and jmode == 3: 359 file.write("\n# Fix the tf parameter.\n") 360 file.write("fix tf 0\n") 361 362 # Optimisation of all residues. 363 file.write("\n\n\n# Optimisation of all residues.\n") 364 if algor == 'LM': 365 file.write("lmin %s %s" % (first_residue_num(), last_residue_num())) 366 elif algor == 'NR': 367 file.write("min %s %s" % (first_residue_num(), last_residue_num())) 368 369 # Show the results. 370 file.write("\n# Show the results.\n") 371 file.write("echo\n") 372 file.write("show all\n") 373 374 # Write the results. 375 file.write("\n# Write the results.\n") 376 file.write("write s2.out S\n") 377 file.write("write s2f.out Sf\n") 378 file.write("write s2s.out Ss\n") 379 file.write("write te.out te\n") 380 file.write("write tf.out tf\n") 381 file.write("write ts.out ts\n") 382 file.write("write rex.out rex\n") 383 file.write("write chi2.out F\n") 384 385 else: 386 raise RelaxError("Optimisation of the parameter set '%s' currently not supported." % model_type)

387 388

389 -def execute(dir, force, binary):

390 """Execute Dasha. 391 392 @param dir: The optional directory where the script is located. 393 @type dir: str or None 394 @param force: A flag which if True will cause any pre-existing files to be overwritten by Dasha. 395 @type force: bool 396 @param binary: The name of the Dasha binary file. This can include the path to the binary. 397 @type binary: str 398 """ 399 400 # Test the binary file string corresponds to a valid executable. 401 test_binary(binary) 402 403 # The current directory. 404 orig_dir = getcwd() 405 406 # The directory. 407 if dir == None: 408 dir = pipes.cdp_name() 409 if not access(dir, F_OK): 410 raise RelaxDirError('Dasha', dir) 411 412 # Change to this directory. 413 chdir(dir) 414 415 # Catch failures and return to the correct directory. 416 try: 417 # Test if the 'dasha_script' script file exists. 418 if not access('dasha_script', F_OK): 419 raise RelaxFileError('dasha script', 'dasha_script') 420 421 # Python 2.3 and earlier. 422 if Popen == None: 423 raise RelaxError("The subprocess module is not available in this version of Python.") 424 425 # Execute Dasha. 426 pipe = Popen(binary, shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=False) 427 428 # Get the contents of the script and pump it into Dasha. 429 script = open('dasha_script') 430 lines = script.readlines() 431 script.close() 432 for line in lines: 433 # Encode to a Python 3 byte array. 434 if hasattr(line, 'encode'): 435 line = line.encode() 436 437 # Write out. 438 pipe.stdin.write(line) 439 440 # Close the pipe. 441 pipe.stdin.close() 442 443 # Write to stdout. 444 for line in pipe.stdout.readlines(): 445 # Decode Python 3 byte arrays. 446 if hasattr(line, 'decode'): 447 line = line.decode() 448 449 # Write. 450 sys.stdout.write(line) 451 452 # Write to stderr. 453 for line in pipe.stderr.readlines(): 454 # Decode Python 3 byte arrays. 455 if hasattr(line, 'decode'): 456 line = line.decode() 457 458 # Write. 459 sys.stderr.write(line) 460 461 # Failure. 462 except: 463 # Change back to the original directory. 464 chdir(orig_dir) 465 466 # Reraise the error. 467 raise 468 469 # Change back to the original directory. 470 chdir(orig_dir) 471 472 # Print some blank lines (aesthetics) 473 sys.stdout.write("\n\n")

474 475

476 -def extract(dir):

477 """Extract the data from the Dasha results files. 478 479 @param dir: The optional directory where the results file is located. 480 @type dir: str or None 481 """ 482 483 # Test if sequence data is loaded. 484 if not exists_mol_res_spin_data(): 485 raise RelaxNoSequenceError 486 487 # The directory. 488 if dir == None: 489 dir = pipes.cdp_name() 490 if not access(dir, F_OK): 491 raise RelaxDirError('Dasha', dir) 492 493 # Loop over the parameters. 494 for param in ['s2', 's2f', 's2s', 'te', 'tf', 'ts', 'rex']: 495 # The file name. 496 file_name = dir + sep + param + '.out' 497 498 # Test if the file exists. 499 if not access(file_name, F_OK): 500 raise RelaxFileError('Dasha', file_name) 501 502 # Scaling. 503 if param in ['te', 'tf', 'ts']: 504 scaling = 1e-9 505 elif param == 'rex': 506 scaling = 1.0 / (2.0 * pi * cdp.frq[cdp.ri_ids[0]]) ** 2 507 else: 508 scaling = 1.0 509 510 # Read the values. 511 data = read_results(file=file_name, scaling=scaling) 512 513 # Set the values. 514 for i in range(len(data)): 515 value.set(val=data[i][1], error=data[i][0], param=param, spin_id=data[i][0]) 516 517 # Clean up of non-existent parameters (set the parameter to None!). 518 for spin in spin_loop(): 519 # Skip the spin (don't set the parameter to None) if the parameter exists in the model. 520 if param in spin.params: 521 continue 522 523 # Set the parameter to None. 524 setattr(spin, param.lower(), None) 525 526 # Extract the chi-squared values. 527 file_name = dir + sep+'chi2.out' 528 529 # Test if the file exists. 530 if not access(file_name, F_OK): 531 raise RelaxFileError('Dasha', file_name) 532 533 # Read the values. 534 data = read_results(file=file_name) 535 536 # Set the values. 537 for i in range(len(data)): 538 spin = return_spin(data[i][0]) 539 spin.chi2 = data[i][1]

540 541

542 -def read_results(file=None, dir=None, scaling=1.0):

543 """Extract the data from the Dasha results file. 544 545 @keyword file: The name of the file to open. 546 @type file: str 547 @keyword dir: The directory containing the file (defaults to the current directory if None). 548 @type dir: str or None 549 @keyword scaling: The parameter scaling factor. 550 @type scaling: float 551 """ 552 553 # Extract the data. 554 data = extract_data(file=file, dir=dir) 555 556 # Remove comments. 557 data = strip(data) 558 559 # Repackage the data as a list of lists of spin ID, value, error. 560 new_data = [] 561 for i in range(len(data)): 562 spin_id = ':%s@N' % data[i][0] 563 value = float(data[i][1]) * scaling 564 error = float(data[i][2]) * scaling 565 new_data.append([spin_id, value, error]) 566 567 # Return the data. 568 return new_data

569

Source Code for Module generic_fns.dasha