pipe_control.dasha

1 ############################################################################### 2 # # 3 # Copyright (C) 2005-2014 Edward d'Auvergne # 4 # # 5 # This file is part of the program relax (http://www.nmr-relax.com). # 6 # # 7 # This program is free software: you can redistribute it and/or modify # 8 # it under the terms of the GNU General Public License as published by # 9 # the Free Software Foundation, either version 3 of the License, or # 10 # (at your option) any later version. # 11 # # 12 # This program is distributed in the hope that it will be useful, # 13 # but WITHOUT ANY WARRANTY; without even the implied warranty of # 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15 # GNU General Public License for more details. # 16 # # 17 # You should have received a copy of the GNU General Public License # 18 # along with this program. If not, see <http://www.gnu.org/licenses/>. # 19 # # 20 ############################################################################### 21 22 # Module docstring. 23 """Module for interfacing with Dasha.""" 24 25 # Dependencies. 26 import dep_check 27 28 # Python module imports. 29 from math import pi 30 from os import F_OK, access, chdir, getcwd, sep 31 PIPE, Popen = None, None 32 if dep_check.subprocess_module: 33 from subprocess import PIPE, Popen 34 import sys 35 36 # relax module imports. 37 from lib.errors import RelaxDirError, RelaxError, RelaxFileError, RelaxNoPdbError, RelaxNoSequenceError, RelaxNoTensorError 38 from lib.io import extract_data, mkdir_nofail, open_write_file, strip, test_binary 39 from pipe_control import angles, diffusion_tensor, pipes, value 40 from pipe_control.interatomic import return_interatom_list 41 from pipe_control.mol_res_spin import exists_mol_res_spin_data, first_residue_num, last_residue_num, residue_loop, return_spin, spin_loop 42 from pipe_control.spectrometer import loop_frequencies 43 from specific_analyses.model_free.model import determine_model_type 44 45

46 -def __deselect_spins():

47 """Deselect spins with no or too little data, that are overfitting, etc.""" 48 49 # Test if sequence data exists. 50 if not exists_mol_res_spin_data(): 51 raise RelaxNoSequenceError 52 53 # Is structural data required? 54 need_vect = False 55 if hasattr(cdp, 'diff_tensor') and (cdp.diff_tensor.type == 'spheroid' or cdp.diff_tensor.type == 'ellipsoid'): 56 need_vect = True 57 58 # Loop over the sequence. 59 for spin in spin_loop(): 60 # Relaxation data must exist! 61 if not hasattr(spin, 'ri_data'): 62 spin.select = False 63 64 # Require 3 or more relaxation data points. 65 elif len(spin.ri_data) < 3: 66 spin.select = False 67 68 # Require at least as many data points as params to prevent over-fitting. 69 elif hasattr(spin, 'params') and spin.params and len(spin.params) > len(spin.ri_data): 70 spin.select = False

71 72

73 -def create(algor='LM', dir=None, force=False):

74 """Create the Dasha script file 'dasha_script' for controlling the program. 75 76 @keyword algor: The optimisation algorithm to use. This can be the Levenberg-Marquardt algorithm 'LM' or the Newton-Raphson algorithm 'NR'. 77 @type algor: str 78 @keyword dir: The optional directory to place the script into. 79 @type dir: str or None 80 @keyword force: A flag which if True will cause any pre-existing file to be overwritten. 81 @type force: bool 82 """ 83 84 # Test if the current pipe exists. 85 pipes.test() 86 87 # Test if sequence data is loaded. 88 if not exists_mol_res_spin_data(): 89 raise RelaxNoSequenceError 90 91 # Determine the parameter set. 92 model_type = determine_model_type() 93 94 # Test if diffusion tensor data for the data_pipe exists. 95 if model_type != 'local_tm' and not hasattr(cdp, 'diff_tensor'): 96 raise RelaxNoTensorError('diffusion') 97 98 # Test if the PDB file has been loaded (for the spheroid and ellipsoid). 99 if model_type != 'local_tm' and cdp.diff_tensor.type != 'sphere' and not hasattr(cdp, 'structure'): 100 raise RelaxNoPdbError 101 102 # Test the optimisation algorithm. 103 if algor not in ['LM', 'NR']: 104 raise RelaxError("The Dasha optimisation algorithm '%s' is unknown, it should either be 'LM' or 'NR'." % algor) 105 106 # Deselect certain spins. 107 __deselect_spins() 108 109 # Directory creation. 110 if dir == None: 111 dir = pipes.cdp_name() 112 mkdir_nofail(dir, verbosity=0) 113 114 # Calculate the angle alpha of the XH vector in the spheroid diffusion frame. 115 if cdp.diff_tensor.type == 'spheroid': 116 angles.spheroid_frame() 117 118 # Calculate the angles theta and phi of the XH vector in the ellipsoid diffusion frame. 119 elif cdp.diff_tensor.type == 'ellipsoid': 120 angles.ellipsoid_frame() 121 122 # The 'dasha_script' file. 123 script = open_write_file(file_name='dasha_script', dir=dir, force=force) 124 create_script(script, model_type, algor) 125 script.close()

126 127

128 -def create_script(file, model_type, algor):

129 """Create the Dasha script file. 130 131 @param file: The opened file descriptor. 132 @type file: file object 133 @param model_type: The model-free model type. 134 @type model_type: str 135 @param algor: The optimisation algorithm to use. This can be the Levenberg-Marquardt algorithm 'LM' or the Newton-Raphson algorithm 'NR'. 136 @type algor: str 137 """ 138 139 # Delete all data. 140 file.write("# Delete all data.\n") 141 file.write("del 1 10000\n") 142 143 # Nucleus type. 144 file.write("\n# Nucleus type.\n") 145 nucleus = None 146 for spin in spin_loop(): 147 # Skip protons. 148 if spin.isotope == '1H': 149 continue 150 151 # Can only handle one spin type. 152 if nucleus and spin.isotope != nucleus: 153 raise RelaxError("The nuclei '%s' and '%s' do not match, relax can only handle one nucleus type in Dasha." % (nucleus, spin.isotope)) 154 155 # Set the nucleus. 156 if not nucleus: 157 nucleus = spin.isotope 158 159 # Convert the name and write it. 160 if nucleus == '15N': 161 nucleus = 'N15' 162 elif nucleus == '13C': 163 nucleus = 'C13' 164 else: 165 raise RelaxError("Cannot handle the nucleus type '%s' within Dasha." % nucleus) 166 file.write("set nucl %s\n" % nucleus) 167 168 # Number of frequencies. 169 file.write("\n# Number of frequencies.\n") 170 file.write("set n_freq %s\n" % cdp.spectrometer_frq_count) 171 172 # Frequency values. 173 file.write("\n# Frequency values.\n") 174 count = 1 175 for frq in loop_frequencies(): 176 file.write("set H1_freq %s %s\n" % (frq / 1e6, count)) 177 count += 1 178 179 # Set the diffusion tensor. 180 file.write("\n# Set the diffusion tensor.\n") 181 if model_type != 'local_tm': 182 # Sphere. 183 if cdp.diff_tensor.type == 'sphere': 184 file.write("set tr %s\n" % (cdp.diff_tensor.tm / 1e-9)) 185 186 # Spheroid. 187 elif cdp.diff_tensor.type == 'spheroid': 188 file.write('set tr %s\n' % (cdp.diff_tensor.tm / 1e-9)) 189 190 # Ellipsoid. 191 elif cdp.diff_tensor.type == 'ellipsoid': 192 # Get the eigenvales. 193 Dx, Dy, Dz = diffusion_tensor.return_eigenvalues() 194 195 # Geometric parameters. 196 file.write("set tr %s\n" % (cdp.diff_tensor.tm / 1e-9)) 197 file.write("set D1/D3 %s\n" % (Dx / Dz)) 198 file.write("set D2/D3 %s\n" % (Dy / Dz)) 199 200 # Orientational parameters. 201 file.write("set alfa %s\n" % (cdp.diff_tensor.alpha / (2.0 * pi) * 360.0)) 202 file.write("set betta %s\n" % (cdp.diff_tensor.beta / (2.0 * pi) * 360.0)) 203 file.write("set gamma %s\n" % (cdp.diff_tensor.gamma / (2.0 * pi) * 360.0)) 204 205 # Reading the relaxation data. 206 file.write("\n# Reading the relaxation data.\n") 207 file.write("echo Reading the relaxation data.\n") 208 noe_index = 1 209 r1_index = 1 210 r2_index = 1 211 for ri_id in cdp.ri_ids: 212 # NOE. 213 if cdp.ri_type[ri_id] == 'NOE': 214 # Data set number. 215 number = noe_index 216 217 # Data type. 218 data_type = 'noe' 219 220 # Increment the data set index. 221 noe_index = noe_index + 1 222 223 # R1. 224 elif cdp.ri_type[ri_id] == 'R1': 225 # Data set number. 226 number = r1_index 227 228 # Data type. 229 data_type = '1/T1' 230 231 # Increment the data set index. 232 r1_index = r1_index + 1 233 234 # R2. 235 elif cdp.ri_type[ri_id] == 'R2': 236 # Data set number. 237 number = r2_index 238 239 # Data type. 240 data_type = '1/T2' 241 242 # Increment the data set index. 243 r2_index = r2_index + 1 244 245 # Set the data type. 246 if number == 1: 247 file.write("\nread < %s\n" % data_type) 248 else: 249 file.write("\nread < %s %s\n" % (data_type, number)) 250 251 # The relaxation data. 252 for residue in residue_loop(): 253 # Alias the spin. 254 spin = residue.spin[0] 255 256 # Skip deselected spins. 257 if not spin.select: 258 continue 259 260 # Skip and deselect spins for which relaxation data is missing. 261 if len(spin.ri_data) != len(cdp.ri_ids) or spin.ri_data[ri_id] == None: 262 spin.select = False 263 continue 264 265 # Data and errors. 266 file.write("%s %s %s\n" % (residue.num, spin.ri_data[ri_id], spin.ri_data_err[ri_id])) 267 268 # Terminate the reading. 269 file.write("exit\n") 270 271 # Individual residue optimisation. 272 if model_type == 'mf': 273 # Loop over the residues. 274 for residue in residue_loop(): 275 # Alias the spin. 276 spin = residue.spin[0] 277 278 # Skip deselected spins. 279 if not spin.select: 280 continue 281 282 # Get the interatomic data containers. 283 interatoms = return_interatom_list(spin._spin_ids[0]) 284 if len(interatoms) == 0: 285 raise RelaxNoInteratomError 286 elif len(interatoms) > 1: 287 raise RelaxError("Only one interatomic data container, hence dipole-dipole interaction, is supported per spin.") 288 289 # Comment. 290 file.write("\n\n\n# Residue %s\n\n" % residue.num) 291 292 # Echo. 293 file.write("echo Optimisation of residue %s\n" % residue.num) 294 295 # Select the spin. 296 file.write("\n# Select the residue.\n") 297 file.write("set cres %s\n" % residue.num) 298 299 # The angle alpha of the XH vector in the spheroid diffusion frame. 300 if cdp.diff_tensor.type == 'spheroid': 301 file.write("set teta %s\n" % spin.alpha) 302 303 # The angles theta and phi of the XH vector in the ellipsoid diffusion frame. 304 elif cdp.diff_tensor.type == 'ellipsoid': 305 file.write("\n# Setting the spherical angles of the XH vector in the ellipsoid diffusion frame.\n") 306 file.write("set teta %s\n" % spin.theta) 307 file.write("set fi %s\n" % spin.phi) 308 309 # The 'jmode'. 310 if 'ts' in spin.params: 311 jmode = 3 312 elif 'te' in spin.params: 313 jmode = 2 314 elif 's2' in spin.params: 315 jmode = 1 316 317 # Chemical exchange. 318 if 'rex' in spin.params: 319 exch = True 320 else: 321 exch = False 322 323 # Anisotropic diffusion. 324 if cdp.diff_tensor.type == 'sphere': 325 anis = False 326 else: 327 anis = True 328 329 # Axial symmetry. 330 if cdp.diff_tensor.type == 'spheroid': 331 sym = True 332 else: 333 sym = False 334 335 # Set the jmode. 336 file.write("\n# Set the jmode.\n") 337 file.write("set def jmode %s" % jmode) 338 if exch: 339 file.write(" exch") 340 if anis: 341 file.write(" anis") 342 if sym: 343 file.write(" sym") 344 file.write("\n") 345 346 # Parameter default values. 347 file.write("\n# Parameter default values.\n") 348 file.write("reset jmode %s\n" % residue.num) 349 350 # Bond length. 351 file.write("\n# Bond length.\n") 352 file.write("set r_hx %s\n" % (interatoms[0].r / 1e-10)) 353 354 # CSA value. 355 file.write("\n# CSA value.\n") 356 file.write("set csa %s\n" % (spin.csa / 1e-6)) 357 358 # Fix the tf parameter if it isn't in the model. 359 if not 'tf' in spin.params and jmode == 3: 360 file.write("\n# Fix the tf parameter.\n") 361 file.write("fix tf 0\n") 362 363 # Optimisation of all residues. 364 file.write("\n\n\n# Optimisation of all residues.\n") 365 if algor == 'LM': 366 file.write("lmin %s %s" % (first_residue_num(), last_residue_num())) 367 elif algor == 'NR': 368 file.write("min %s %s" % (first_residue_num(), last_residue_num())) 369 370 # Show the results. 371 file.write("\n# Show the results.\n") 372 file.write("echo\n") 373 file.write("show all\n") 374 375 # Write the results. 376 file.write("\n# Write the results.\n") 377 file.write("write s2.out S\n") 378 file.write("write s2f.out Sf\n") 379 file.write("write s2s.out Ss\n") 380 file.write("write te.out te\n") 381 file.write("write tf.out tf\n") 382 file.write("write ts.out ts\n") 383 file.write("write rex.out rex\n") 384 file.write("write chi2.out F\n") 385 386 else: 387 raise RelaxError("Optimisation of the parameter set '%s' currently not supported." % model_type)

388 389

390 -def execute(dir, force, binary):

391 """Execute Dasha. 392 393 @param dir: The optional directory where the script is located. 394 @type dir: str or None 395 @param force: A flag which if True will cause any pre-existing files to be overwritten by Dasha. 396 @type force: bool 397 @param binary: The name of the Dasha binary file. This can include the path to the binary. 398 @type binary: str 399 """ 400 401 # Test the binary file string corresponds to a valid executable. 402 test_binary(binary) 403 404 # The current directory. 405 orig_dir = getcwd() 406 407 # The directory. 408 if dir == None: 409 dir = pipes.cdp_name() 410 if not access(dir, F_OK): 411 raise RelaxDirError('Dasha', dir) 412 413 # Change to this directory. 414 chdir(dir) 415 416 # Catch failures and return to the correct directory. 417 try: 418 # Test if the 'dasha_script' script file exists. 419 if not access('dasha_script', F_OK): 420 raise RelaxFileError('dasha script', 'dasha_script') 421 422 # Python 2.3 and earlier. 423 if Popen == None: 424 raise RelaxError("The subprocess module is not available in this version of Python.") 425 426 # Execute Dasha. 427 pipe = Popen(binary, shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=False) 428 429 # Get the contents of the script and pump it into Dasha. 430 script = open('dasha_script') 431 lines = script.readlines() 432 script.close() 433 for line in lines: 434 # Encode to a Python 3 byte array. 435 if hasattr(line, 'encode'): 436 line = line.encode() 437 438 # Write out. 439 pipe.stdin.write(line) 440 441 # Close the pipe. 442 pipe.stdin.close() 443 444 # Write to stdout. 445 for line in pipe.stdout.readlines(): 446 # Decode Python 3 byte arrays. 447 if hasattr(line, 'decode'): 448 line = line.decode() 449 450 # Write. 451 sys.stdout.write(line) 452 453 # Write to stderr. 454 for line in pipe.stderr.readlines(): 455 # Decode Python 3 byte arrays. 456 if hasattr(line, 'decode'): 457 line = line.decode() 458 459 # Write. 460 sys.stderr.write(line) 461 462 # Failure. 463 except: 464 # Change back to the original directory. 465 chdir(orig_dir) 466 467 # Reraise the error. 468 raise 469 470 # Change back to the original directory. 471 chdir(orig_dir) 472 473 # Print some blank lines (aesthetics) 474 sys.stdout.write("\n\n")

475 476

477 -def extract(dir):

478 """Extract the data from the Dasha results files. 479 480 @param dir: The optional directory where the results file is located. 481 @type dir: str or None 482 """ 483 484 # Test if sequence data is loaded. 485 if not exists_mol_res_spin_data(): 486 raise RelaxNoSequenceError 487 488 # The directory. 489 if dir == None: 490 dir = pipes.cdp_name() 491 if not access(dir, F_OK): 492 raise RelaxDirError('Dasha', dir) 493 494 # Loop over the parameters. 495 for param in ['s2', 's2f', 's2s', 'te', 'tf', 'ts', 'rex']: 496 # The file name. 497 file_name = dir + sep + param + '.out' 498 499 # Test if the file exists. 500 if not access(file_name, F_OK): 501 raise RelaxFileError('Dasha', file_name) 502 503 # Scaling. 504 if param in ['te', 'tf', 'ts']: 505 scaling = 1e-9 506 elif param == 'rex': 507 scaling = 1.0 / (2.0 * pi * cdp.spectrometer_frq[cdp.ri_ids[0]]) ** 2 508 else: 509 scaling = 1.0 510 511 # Read the values. 512 data = read_results(file=file_name, scaling=scaling) 513 514 # Set the values. 515 for i in range(len(data)): 516 value.set(val=data[i][1], param=param, spin_id=data[i][0]) 517 value.set(val=data[i][0], param=param, spin_id=data[i][0], error=True) 518 519 # Clean up of non-existent parameters (set the parameter to None!). 520 for spin in spin_loop(): 521 # Skip the spin (don't set the parameter to None) if the parameter exists in the model. 522 if param in spin.params: 523 continue 524 525 # Set the parameter to None. 526 setattr(spin, param.lower(), None) 527 528 # Extract the chi-squared values. 529 file_name = dir + sep+'chi2.out' 530 531 # Test if the file exists. 532 if not access(file_name, F_OK): 533 raise RelaxFileError('Dasha', file_name) 534 535 # Read the values. 536 data = read_results(file=file_name) 537 538 # Set the values. 539 for i in range(len(data)): 540 spin = return_spin(data[i][0]) 541 spin.chi2 = data[i][1]

542 543

544 -def read_results(file=None, dir=None, scaling=1.0):

545 """Extract the data from the Dasha results file. 546 547 @keyword file: The name of the file to open. 548 @type file: str 549 @keyword dir: The directory containing the file (defaults to the current directory if None). 550 @type dir: str or None 551 @keyword scaling: The parameter scaling factor. 552 @type scaling: float 553 """ 554 555 # Extract the data. 556 data = extract_data(file=file, dir=dir) 557 558 # Remove comments. 559 data = strip(data) 560 561 # Repackage the data as a list of lists of spin ID, value, error. 562 new_data = [] 563 for i in range(len(data)): 564 spin_id = ':%s@N' % data[i][0] 565 value = float(data[i][1]) * scaling 566 error = float(data[i][2]) * scaling 567 new_data.append([spin_id, value, error]) 568 569 # Return the data. 570 return new_data

571

Source Code for Module pipe_control.dasha