generic_fns.rdc

1 ############################################################################### 2 # # 3 # Copyright (C) 2003-2012 Edward d'Auvergne # 4 # # 5 # This file is part of the program relax. # 6 # # 7 # relax is free software; you can redistribute it and/or modify # 8 # it under the terms of the GNU General Public License as published by # 9 # the Free Software Foundation; either version 2 of the License, or # 10 # (at your option) any later version. # 11 # # 12 # relax is distributed in the hope that it will be useful, # 13 # but WITHOUT ANY WARRANTY; without even the implied warranty of # 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15 # GNU General Public License for more details. # 16 # # 17 # You should have received a copy of the GNU General Public License # 18 # along with relax; if not, write to the Free Software # 19 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # 20 # # 21 ############################################################################### 22 23 # Module docstring. 24 """Module for the manipulation of RDC data.""" 25 26 # Python module imports. 27 from copy import deepcopy 28 from math import pi, sqrt 29 from numpy import float64, ones, zeros 30 from numpy.linalg import norm 31 import sys 32 from warnings import warn 33 34 # relax module imports. 35 from float import nan 36 from generic_fns import grace, pipes 37 from generic_fns.align_tensor import get_tensor_index 38 from generic_fns.mol_res_spin import exists_mol_res_spin_data, return_spin, spin_loop 39 from maths_fns.rdc import ave_rdc_tensor 40 from physical_constants import dipolar_constant, return_gyromagnetic_ratio 41 from relax_errors import RelaxError, RelaxNoRDCError, RelaxNoSequenceError, RelaxSpinTypeError 42 from relax_io import open_write_file, read_spin_data, write_spin_data 43 from relax_warnings import RelaxWarning, RelaxNoSpinWarning 44 45

46 -def back_calc(align_id=None):

47 """Back calculate the RDC from the alignment tensor and unit bond vectors. 48 49 @keyword align_id: The alignment tensor ID string. 50 @type align_id: str 51 """ 52 53 # Arg check. 54 if align_id and align_id not in cdp.align_ids: 55 raise RelaxError, "The alignment ID '%s' is not in the alignment ID list %s." % (align_id, cdp.align_ids) 56 57 # Convert the align IDs to an array, or take all IDs. 58 if align_id: 59 align_ids = [align_id] 60 else: 61 align_ids = cdp.align_ids 62 63 # Add the ID to the RDC IDs, if needed. 64 for align_id in align_ids: 65 # Init. 66 if not hasattr(cdp, 'rdc_ids'): 67 cdp.rdc_ids = [] 68 69 # Add the ID. 70 if align_id not in cdp.rdc_ids: 71 cdp.rdc_ids.append(align_id) 72 73 # The weights. 74 weights = ones(cdp.N, float64) / cdp.N 75 76 # Unit vector data structure init. 77 unit_vect = zeros((cdp.N, 3), float64) 78 79 # Loop over the spins. 80 for spin in spin_loop(): 81 # Skip spins with no bond vectors. 82 if not hasattr(spin, 'bond_vect') and not hasattr(spin, 'xh_vect'): 83 continue 84 85 # Check. 86 if not hasattr(spin, 'heteronuc_type'): 87 raise RelaxSpinTypeError 88 89 # Alias. 90 if hasattr(spin, 'bond_vect'): 91 vectors = spin.bond_vect 92 else: 93 vectors = spin.xh_vect 94 95 # Single vector. 96 if type(vectors[0]) in [float, float64]: 97 vectors = [vectors] 98 99 # Gyromagnetic ratios. 100 gx = return_gyromagnetic_ratio(spin.heteronuc_type) 101 gh = return_gyromagnetic_ratio(spin.proton_type) 102 103 # Calculate the RDC dipolar constant (in Hertz, and the 3 comes from the alignment tensor), and append it to the list. 104 dj = 3.0/(2.0*pi) * dipolar_constant(gx, gh, spin.r) 105 106 # Unit vectors. 107 for c in range(cdp.N): 108 unit_vect[c] = vectors[c] / norm(vectors[c]) 109 110 # Initialise if necessary. 111 if not hasattr(spin, 'rdc_bc'): 112 spin.rdc_bc = {} 113 114 # Calculate the RDCs. 115 for id in align_ids: 116 spin.rdc_bc[id] = ave_rdc_tensor(dj, unit_vect, cdp.N, cdp.align_tensors[get_tensor_index(id)].A, weights=weights)

117 118

119 -def convert(value, align_id, to_intern=False):

120 """Convert the RDC based on the 'D' or '2D' data type. 121 122 @param value: The value or error to convert. 123 @type value: float or None 124 @param align_id: The alignment tensor ID string. 125 @type align_id: str 126 @keyword to_intern: A flag which if True will convert to the internal D notation if needed, or if False will convert from the internal D notation to the external D or 2D format. 127 @type to_intern: bool 128 @return: The converted value. 129 @rtype: float or None 130 """ 131 132 # Handle values of None. 133 if value == None: 134 return None 135 136 # The conversion factor. 137 factor = 1.0 138 if hasattr(cdp, 'rdc_data_types') and cdp.rdc_data_types.has_key(align_id) and cdp.rdc_data_types[align_id] == '2D': 139 # Convert from 2D to D. 140 if to_intern: 141 factor = 2.0 142 143 # Convert from D to 2D. 144 else: 145 factor = 0.5 146 147 # Return the converted value. 148 return value * factor

149 150

151 -def corr_plot(format=None, file=None, dir=None, force=False):

152 """Generate a correlation plot of the measured vs. back-calculated RDCs. 153 154 @keyword format: The format for the plot file. The following values are accepted: 'grace', a Grace plot; None, a plain text file. 155 @type format: str or None 156 @keyword file: The file name or object to write to. 157 @type file: str or file object 158 @keyword dir: The name of the directory to place the file into (defaults to the current directory). 159 @type dir: str 160 @keyword force: A flag which if True will cause any pre-existing file to be overwritten. 161 @type force: bool 162 """ 163 164 # Test if the current pipe exists. 165 pipes.test() 166 167 # Test if the sequence data is loaded. 168 if not exists_mol_res_spin_data(): 169 raise RelaxNoSequenceError 170 171 # Does RDC data exist? 172 if not hasattr(cdp, 'rdc_ids') or not cdp.rdc_ids: 173 warn(RelaxWarning("No RDC data exists, skipping file creation.")) 174 return 175 176 # Open the file for writing. 177 file = open_write_file(file, dir, force) 178 179 # Init. 180 data = [] 181 182 # The diagonal. 183 data.append([[-100, -100, 0], [100, 100, 0]]) 184 185 # Loop over the RDC data. 186 for align_id in cdp.rdc_ids: 187 # Append a new list for this alignment. 188 data.append([]) 189 190 # Errors present? 191 err_flag = False 192 for spin in spin_loop(): 193 # Skip deselected spins. 194 if not spin.select: 195 continue 196 197 # Error present. 198 if hasattr(spin, 'rdc_err') and align_id in spin.rdc_err.keys(): 199 err_flag = True 200 break 201 202 # Loop over the spins. 203 for spin, spin_id in spin_loop(return_id=True): 204 # Skip deselected spins. 205 if not spin.select: 206 continue 207 208 # Skip if data is missing. 209 if not hasattr(spin, 'rdc') or not hasattr(spin, 'rdc_bc') or not align_id in spin.rdc.keys() or not align_id in spin.rdc_bc.keys(): 210 continue 211 212 # Append the data. 213 data[-1].append([convert(spin.rdc_bc[align_id], align_id), convert(spin.rdc[align_id], align_id)]) 214 215 # Errors. 216 if err_flag: 217 if hasattr(spin, 'rdc_err') and align_id in spin.rdc_err.keys(): 218 data[-1][-1].append(convert(spin.rdc_err[align_id], align_id)) 219 else: 220 data[-1][-1].append(None) 221 222 # Label. 223 data[-1][-1].append(spin_id) 224 225 # The data size. 226 size = len(data) 227 228 # Only one data set. 229 data = [data] 230 231 # Graph type. 232 if err_flag: 233 graph_type = 'xydy' 234 else: 235 graph_type = 'xy' 236 237 # Grace file. 238 if format == 'grace': 239 # The header. 240 grace.write_xy_header(file=file, title="RDC correlation plot", sets=size, set_names=[None]+cdp.rdc_ids, linestyle=[2]+[0]*size, data_type=['rdc_bc', 'rdc'], axis_min=[-10, -10], axis_max=[10, 10], legend_pos=[1, 0.5]) 241 242 # The main data. 243 grace.write_xy_data(data=data, file=file, graph_type=graph_type)

244 245

246 -def delete(align_id=None):

247 """Delete the RDC data corresponding to the alignment ID. 248 249 @keyword align_id: The alignment tensor ID string. If not specified, all data will be deleted. 250 @type align_id: str or None 251 """ 252 253 # Test if the current data pipe exists. 254 pipes.test() 255 256 # Test if sequence data exists. 257 if not exists_mol_res_spin_data(): 258 raise RelaxNoSequenceError 259 260 # Check that the ID exists. 261 if align_id and not align_id in cdp.rdc_ids: 262 raise RelaxError("The RDC alignment id '%s' does not exist" % align_id) 263 264 # The IDs. 265 if not align_id: 266 ids = deepcopy(cdp.rdc_ids) 267 else: 268 ids = [align_id] 269 270 # Loop over the alignments, removing all the corresponding data. 271 for id in ids: 272 # The RDC ID. 273 cdp.rdc_ids.pop(cdp.rdc_ids.index(id)) 274 275 # The data type. 276 if hasattr(cdp, 'rdc_data_types') and cdp.rdc_data_types.has_key(id): 277 cdp.rdc_data_types.pop(id) 278 279 # The spin data. 280 for spin in spin_loop(): 281 # The data. 282 if hasattr(spin, 'rdc') and spin.rdc.has_key(id): 283 spin.rdc.pop(id) 284 285 # The error. 286 if hasattr(spin, 'rdc_err') and spin.rdc_err.has_key(id): 287 spin.rdc_err.pop(id) 288 289 # Clean the global data. 290 if not hasattr(cdp, 'pcs_ids') or id not in cdp.pcs_ids: 291 cdp.align_ids.pop(id)

292 293

294 -def display(align_id=None, bc=False):

295 """Display the RDC data corresponding to the alignment ID. 296 297 @keyword align_id: The alignment tensor ID string. 298 @type align_id: str 299 @keyword bc: The back-calculation flag which if True will cause the back-calculated rather than measured data to be displayed. 300 @type bc: bool 301 """ 302 303 # Call the write method with sys.stdout as the file. 304 write(align_id=align_id, file=sys.stdout, bc=bc)

305 306

307 -def q_factors(spin_id=None):

308 """Calculate the Q-factors for the RDC data. 309 310 @keyword spin_id: The spin ID string used to restrict the Q-factor calculation to a subset of all spins. 311 @type spin_id: None or str 312 """ 313 314 # No RDCs, so no Q factors can be calculated. 315 if not hasattr(cdp, 'rdc_ids') or not len(cdp.rdc_ids): 316 warn(RelaxWarning("No RDC data exists, Q factors cannot be calculated.")) 317 return 318 319 # Q-factor dictonaries. 320 cdp.q_factors_rdc = {} 321 cdp.q_factors_rdc_norm2 = {} 322 323 # Loop over the alignments. 324 for align_id in cdp.rdc_ids: 325 # Init. 326 D2_sum = 0.0 327 sse = 0.0 328 329 # Spin loop. 330 dj = None 331 N = 0 332 spin_count = 0 333 rdc_data = False 334 rdc_bc_data = False 335 for spin in spin_loop(spin_id): 336 # Skip deselected spins. 337 if not spin.select: 338 continue 339 340 # Increment the spin counter. 341 spin_count += 1 342 343 # Data checks. 344 if hasattr(spin, 'rdc') and spin.rdc.has_key(align_id): 345 rdc_data = True 346 if hasattr(spin, 'rdc_bc') and spin.rdc_bc.has_key(align_id): 347 rdc_bc_data = True 348 349 # Skip spins without RDC data. 350 if not hasattr(spin, 'rdc') or not hasattr(spin, 'rdc_bc') or not spin.rdc.has_key(align_id) or spin.rdc[align_id] == None or not spin.rdc_bc.has_key(align_id) or spin.rdc_bc[align_id] == None: 351 continue 352 353 # Sum of squares. 354 sse = sse + (spin.rdc[align_id] - spin.rdc_bc[align_id])**2 355 356 # Sum the RDCs squared (for one type of normalisation). 357 D2_sum = D2_sum + spin.rdc[align_id]**2 358 359 # Gyromagnetic ratios. 360 gx = return_gyromagnetic_ratio(spin.heteronuc_type) 361 gh = return_gyromagnetic_ratio(spin.proton_type) 362 363 # Calculate the RDC dipolar constant (in Hertz, and the 3 comes from the alignment tensor), and append it to the list. 364 dj_new = 3.0/(2.0*pi) * dipolar_constant(gx, gh, spin.r) 365 if dj and dj_new != dj: 366 raise RelaxError("All the RDCs must come from the same nucleus type.") 367 else: 368 dj = dj_new 369 370 # Increment the number of data sets. 371 N = N + 1 372 373 # Warnings (and then exit). 374 if not spin_count: 375 warn(RelaxWarning("No spins have been used in the calculation.")) 376 return 377 if not rdc_data: 378 warn(RelaxWarning("No RDC data can be found.")) 379 return 380 if not rdc_bc_data: 381 warn(RelaxWarning("No back-calculated RDC data can be found.")) 382 return 383 384 # Normalisation factor of 2Da^2(4 + 3R)/5. 385 D = dj * cdp.align_tensors[cdp.align_ids.index(align_id)].A_diag 386 Da = 1.0/3.0 * (D[2, 2] - (D[0, 0]+D[1, 1])/2.0) 387 Dr = 1.0/3.0 * (D[0, 0] - D[1, 1]) 388 if Da == 0: 389 R = nan 390 else: 391 R = Dr / Da 392 norm = 2.0 * (Da)**2 * (4.0 + 3.0*R**2)/5.0 393 if Da == 0.0: 394 norm = 1e-15 395 396 # The Q-factor for the alignment. 397 Q = sqrt(sse / N / norm) 398 cdp.q_factors_rdc[align_id] = Q 399 cdp.q_factors_rdc_norm2[align_id] = sqrt(sse / D2_sum) 400 401 # The total Q-factor. 402 cdp.q_rdc = 0.0 403 cdp.q_rdc_norm2 = 0.0 404 for id in cdp.q_factors_rdc: 405 cdp.q_rdc = cdp.q_rdc + cdp.q_factors_rdc[id]**2 406 for id in cdp.q_factors_rdc_norm2: 407 cdp.q_rdc_norm2 = cdp.q_rdc_norm2 + cdp.q_factors_rdc_norm2[id]**2 408 cdp.q_rdc = sqrt(cdp.q_rdc / len(cdp.q_factors_rdc)) 409 cdp.q_rdc_norm2 = sqrt(cdp.q_rdc_norm2 / len(cdp.q_factors_rdc_norm2))

410 411

412 -def read(align_id=None, file=None, dir=None, file_data=None, data_type='D', spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, data_col=None, error_col=None, sep=None, spin_id=None, neg_g_corr=False):

413 """Read the RDC data from file. 414 415 @keyword align_id: The alignment tensor ID string. 416 @type align_id: str 417 @keyword file: The name of the file to open. 418 @type file: str 419 @keyword dir: The directory containing the file (defaults to the current directory if None). 420 @type dir: str or None 421 @keyword file_data: An alternative to opening a file, if the data already exists in the correct format. The format is a list of lists where the first index corresponds to the row and the second the column. 422 @type file_data: list of lists 423 @keyword data_type: A string which is set to 'D' means that the splitting in the aligned sample was assumed to be J + D, or if set to '2D' then the splitting was taken as J + 2D. 424 @keyword spin_id_col: The column containing the spin ID strings. If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none. 425 @type spin_id_col: int or None 426 @keyword mol_name_col: The column containing the molecule name information. If supplied, spin_id_col must be None. 427 @type mol_name_col: int or None 428 @keyword res_name_col: The column containing the residue name information. If supplied, spin_id_col must be None. 429 @type res_name_col: int or None 430 @keyword res_num_col: The column containing the residue number information. If supplied, spin_id_col must be None. 431 @type res_num_col: int or None 432 @keyword spin_name_col: The column containing the spin name information. If supplied, spin_id_col must be None. 433 @type spin_name_col: int or None 434 @keyword spin_num_col: The column containing the spin number information. If supplied, spin_id_col must be None. 435 @type spin_num_col: int or None 436 @keyword data_col: The column containing the RDC data in Hz. 437 @type data_col: int or None 438 @keyword error_col: The column containing the RDC errors. 439 @type error_col: int or None 440 @keyword sep: The column separator which, if None, defaults to whitespace. 441 @type sep: str or None 442 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins. 443 @type spin_id: None or str 444 @keyword neg_g_corr: A flag which is used to correct for the negative gyromagnetic ratio of 15N. If True, a sign inversion will be applied to all RDC values to be loaded. 445 @type neg_g_corr: bool 446 """ 447 448 # Test if the current data pipe exists. 449 pipes.test() 450 451 # Test if sequence data exists. 452 if not exists_mol_res_spin_data(): 453 raise RelaxNoSequenceError 454 455 # Either the data or error column must be supplied. 456 if data_col == None and error_col == None: 457 raise RelaxError("One of either the data or error column must be supplied.") 458 459 # Store the data type as global data (need for the conversion of spin data). 460 if not hasattr(cdp, 'rdc_data_types'): 461 cdp.rdc_data_types = {} 462 if not cdp.rdc_data_types.has_key(align_id): 463 cdp.rdc_data_types[align_id] = data_type 464 465 # Spin specific data. 466 ##################### 467 468 # Loop over the RDC data. 469 spin_ids = [] 470 values = [] 471 errors = [] 472 for data in read_spin_data(file=file, dir=dir, file_data=file_data, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, data_col=data_col, error_col=error_col, sep=sep, spin_id=spin_id): 473 # Unpack. 474 if data_col and error_col: 475 id, value, error = data 476 elif data_col: 477 id, value = data 478 error = None 479 else: 480 id, error = data 481 value = None 482 483 # Test the error value (cannot be 0.0). 484 if error == 0.0: 485 raise RelaxError("An invalid error value of zero has been encountered.") 486 487 # Get the corresponding spin container. 488 spin = return_spin(id) 489 if spin == None: 490 warn(RelaxNoSpinWarning(id)) 491 continue 492 493 # Add the data. 494 if data_col: 495 # Initialise. 496 if not hasattr(spin, 'rdc'): 497 spin.rdc = {} 498 499 # Data conversion. 500 value = convert(value, align_id, to_intern=True) 501 502 # Correction for the negative gyromagnetic ratio of 15N. 503 if neg_g_corr and value != None: 504 value = -value 505 506 # Append the value. 507 spin.rdc[align_id] = value 508 509 # Add the error. 510 if error_col: 511 # Initialise. 512 if not hasattr(spin, 'rdc_err'): 513 spin.rdc_err = {} 514 515 # Data conversion. 516 error = convert(error, align_id, to_intern=True) 517 518 # Append the error. 519 spin.rdc_err[align_id] = error 520 521 # Append the data for print out. 522 spin_ids.append(id) 523 values.append(value) 524 errors.append(error) 525 526 # Print out. 527 write_spin_data(file=sys.stdout, spin_ids=spin_ids, data=values, data_name='RDCs', error=errors, error_name='RDC_error') 528 529 530 # Global (non-spin specific) data. 531 ################################## 532 533 # No data, so return. 534 if not len(values): 535 return 536 537 # Initialise. 538 if not hasattr(cdp, 'align_ids'): 539 cdp.align_ids = [] 540 if not hasattr(cdp, 'rdc_ids'): 541 cdp.rdc_ids = [] 542 543 # Add the RDC id string. 544 if align_id not in cdp.align_ids: 545 cdp.align_ids.append(align_id) 546 if align_id not in cdp.rdc_ids: 547 cdp.rdc_ids.append(align_id)

548 549

550 -def weight(align_id=None, spin_id=None, weight=1.0):

551 """Set optimisation weights on the RDC data. 552 553 @keyword align_id: The alignment tensor ID string. 554 @type align_id: str 555 @keyword spin_id: The spin ID string. 556 @type spin_id: None or str 557 @keyword weight: The optimisation weight. The higher the value, the more importance the RDC will have. 558 @type weight: float or int. 559 """ 560 561 # Test if sequence data exists. 562 if not exists_mol_res_spin_data(): 563 raise RelaxNoSequenceError 564 565 # Test if data corresponding to 'align_id' exists. 566 if not hasattr(cdp, 'rdc_ids') or align_id not in cdp.rdc_ids: 567 raise RelaxNoRDCError(align_id) 568 569 # Loop over the spins. 570 for spin in spin_loop(spin_id): 571 # No data structure. 572 if not hasattr(spin, 'rdc_weight'): 573 spin.rdc_weight = {} 574 575 # Set the weight. 576 spin.rdc_weight[align_id] = weight

577 578

579 -def write(align_id=None, file=None, dir=None, bc=False, force=False):

580 """Display the RDC data corresponding to the alignment ID. 581 582 @keyword align_id: The alignment tensor ID string. 583 @type align_id: str 584 @keyword file: The file name or object to write to. 585 @type file: str or file object 586 @keyword dir: The name of the directory to place the file into (defaults to the current directory). 587 @type dir: str 588 @keyword bc: The back-calculation flag which if True will cause the back-calculated rather than measured data to be written. 589 @type bc: bool 590 @keyword force: A flag which if True will cause any pre-existing file to be overwritten. 591 @type force: bool 592 """ 593 594 # Test if the current pipe exists. 595 pipes.test() 596 597 # Test if the sequence data is loaded. 598 if not exists_mol_res_spin_data(): 599 raise RelaxNoSequenceError 600 601 # Test if data corresponding to 'align_id' exists. 602 if not hasattr(cdp, 'rdc_ids') or align_id not in cdp.rdc_ids: 603 raise RelaxNoRDCError(align_id) 604 605 # Open the file for writing. 606 file = open_write_file(file, dir, force) 607 608 # Loop over the spins and collect the data. 609 mol_names = [] 610 res_nums = [] 611 res_names = [] 612 spin_nums = [] 613 spin_names = [] 614 values = [] 615 errors = [] 616 for spin, mol_name, res_num, res_name in spin_loop(full_info=True): 617 # Skip spins with no RDCs. 618 if not bc and (not hasattr(spin, 'rdc') or align_id not in spin.rdc.keys()): 619 continue 620 elif bc and (not hasattr(spin, 'rdc_bc') or align_id not in spin.rdc_bc.keys()): 621 continue 622 623 # Append the spin data. 624 mol_names.append(mol_name) 625 res_nums.append(res_num) 626 res_names.append(res_name) 627 spin_nums.append(spin.num) 628 spin_names.append(spin.name) 629 630 # The value. 631 if bc: 632 values.append(convert(spin.rdc_bc[align_id], align_id)) 633 else: 634 values.append(convert(spin.rdc[align_id], align_id)) 635 636 # The error. 637 if hasattr(spin, 'rdc_err') and align_id in spin.rdc_err.keys(): 638 errors.append(convert(spin.rdc_err[align_id], align_id)) 639 else: 640 errors.append(None) 641 642 # Write out. 643 write_spin_data(file=file, mol_names=mol_names, res_nums=res_nums, res_names=res_names, spin_nums=spin_nums, spin_names=spin_names, data=values, data_name='RDCs', error=errors, error_name='RDC_error')

644

Source Code for Module generic_fns.rdc