relax_io

1 ############################################################################### 2 # # 3 # Copyright (C) 2003-2012 Edward d'Auvergne # 4 # # 5 # This file is part of the program relax (http://www.nmr-relax.com). # 6 # # 7 # This program is free software: you can redistribute it and/or modify # 8 # it under the terms of the GNU General Public License as published by # 9 # the Free Software Foundation, either version 3 of the License, or # 10 # (at your option) any later version. # 11 # # 12 # This program is distributed in the hope that it will be useful, # 13 # but WITHOUT ANY WARRANTY; without even the implied warranty of # 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15 # GNU General Public License for more details. # 16 # # 17 # You should have received a copy of the GNU General Public License # 18 # along with this program. If not, see <http://www.gnu.org/licenses/>. # 19 # # 20 ############################################################################### 21 22 # Module docstring. 23 """Module containing advanced IO functions for relax. 24 25 This includes IO redirection, automatic loading and writing of compressed files (both Gzip and BZ2 26 compression), reading and writing of files, processing of the contents of files, etc. 27 """ 28 29 # Dependency check module. 30 import dep_check 31 32 # Python module imports. 33 if dep_check.bz2_module: 34 import bz2 35 from bz2 import BZ2File 36 else: 37 BZ2File = object 38 if dep_check.gzip_module: 39 import gzip 40 if dep_check.devnull_import: 41 from os import devnull 42 if dep_check.io_module: 43 import io 44 from os import F_OK, X_OK, access, altsep, getenv, makedirs, pathsep, remove, sep 45 from os.path import expanduser, basename, splitext 46 from re import match, search 47 import sys 48 from sys import stdin, stdout, stderr 49 from warnings import warn 50 51 # relax module imports. 52 from check_types import is_filetype 53 import generic_fns 54 from relax_errors import RelaxError, RelaxFileError, RelaxFileOverwriteError, RelaxInvalidSeqError, RelaxMissingBinaryError, RelaxNoInPathError, RelaxNonExecError 55 from relax_warnings import RelaxWarning, RelaxFileEmptyWarning 56 57 58

59 -def delete(file_name, dir=None, fail=True):

60 """Deleting the given file, taking into account missing compression extensions. 61 62 @param file_name: The name of the file to delete. 63 @type file_name: str 64 @keyword dir: The directory containing the file. 65 @type dir: None or str 66 @keyword fail: A flag which if True will cause RelaxFileError to be raised. 67 @type fail: bool 68 @raises RelaxFileError: If the file does not exist, and fail is set to true. 69 """ 70 71 # File path. 72 file_path = get_file_path(file_name, dir) 73 74 # Test if the file exists and determine the compression type. 75 if access(file_path, F_OK): 76 pass 77 elif access(file_path + '.bz2', F_OK): 78 file_path = file_path + '.bz2' 79 elif access(file_path + '.gz', F_OK): 80 file_path = file_path + '.gz' 81 elif fail: 82 raise RelaxFileError(file_path) 83 else: 84 return 85 86 # Remove the file. 87 remove(file_path)

88 89

90 -def determine_compression(file_path):

91 """Function for determining the compression type, and for also testing if the file exists. 92 93 @param file_path: The full file path of the file. 94 @type file_path: str 95 @return: A tuple of the compression type and full path of the file (including its 96 extension). A value of 0 corresponds to no compression. Bzip2 compression 97 corresponds to a value of 1. Gzip compression corresponds to a value of 2. 98 @rtype: (int, str) 99 """ 100 101 # The file has been supplied without its compression extension. 102 if access(file_path, F_OK): 103 compress_type = 0 104 if search('.bz2$', file_path): 105 compress_type = 1 106 elif search('.gz$', file_path): 107 compress_type = 2 108 109 # The file has been supplied with the '.bz2' extension. 110 elif access(file_path + '.bz2', F_OK): 111 file_path = file_path + '.bz2' 112 compress_type = 1 113 114 # The file has been supplied with the '.gz' extension. 115 elif access(file_path + '.gz', F_OK): 116 file_path = file_path + '.gz' 117 compress_type = 2 118 119 # The file doesn't exist. 120 else: 121 raise RelaxFileError(file_path) 122 123 # Return the compression type. 124 return compress_type, file_path

125 126

127 -def extract_data(file=None, dir=None, file_data=None, sep=None):

128 """Return all data in the file as a list of lines where each line is a list of line elements. 129 130 @param file: The file to extract the data from. 131 @type file: str or file object 132 @param dir: The path where the file is located. If None and the file argument is a 133 string, then the current directory is assumed. 134 @type dir: str or None 135 @param file_data: If the file data has already been extracted from the file, it can be 136 passed into this function using this argument. If data is supplied 137 here, then the file_name and dir args are ignored. 138 @type file_data: list of str 139 @param sep: The character separating the columns in the file data. If None, then 140 whitespace is assumed. 141 @type sep: str 142 @return: The file data. 143 @rtype: list of lists of str 144 """ 145 146 # Data not already extracted from the file. 147 if not file_data: 148 # Open the file. 149 if isinstance(file, str): 150 file = open_read_file(file_name=file, dir=dir) 151 152 # Read lines. 153 file_data = file.readlines() 154 155 # Create a data structure from the contents of the file split by either whitespace or the separator, sep. 156 data = [] 157 for i in range(len(file_data)): 158 if sep: 159 row = file_data[i].split(sep) 160 else: 161 row = file_data[i].split() 162 data.append(row) 163 164 # Close the file. 165 if not file_data: 166 file.close() 167 168 # Return the data. 169 return data

170 171

172 -def file_root(file_path):

173 """Return the root file name, striped of path and extension details. 174 175 @param file_path: The full path to the file. 176 @type file_path: str 177 @return: The file root (with all directories and the extension stripped away). 178 @rtype: str 179 """ 180 181 root, ext = splitext(file_path) 182 return basename(root)

183 184

185 -def get_file_path(file_name=None, dir=None):

186 """Generate and expand the full file path. 187 188 @param file_name: The name of the file to extract the data from. 189 @type file_name: str 190 @param dir: The path where the file is located. If None, then the current directory is 191 assumed. 192 @type dir: str 193 @return: The full file path. 194 @rtype: str 195 """ 196 197 # File name. 198 file_path = file_name 199 200 # Add the directory. 201 if dir: 202 file_path = dir + sep + file_path 203 204 # Expand any ~ characters. 205 if file_path: # Catch a file path of None, as expanduser can't handle this. 206 file_path = expanduser(file_path) 207 208 # Return the file path. 209 return file_path

210 211

212 -def io_streams_restore(verbosity=1):

213 """Restore all IO streams to the Python defaults. 214 215 @param verbosity: The verbosity level. 216 @type verbosity: int 217 """ 218 219 # Print out. 220 if verbosity: 221 print("Restoring the sys.stdin IO stream to the Python STDIN IO stream.") 222 print("Restoring the sys.stdout IO stream to the Python STDOUT IO stream.") 223 print("Restoring the sys.stderr IO stream to the Python STDERR IO stream.") 224 225 # Restore streams. 226 sys.stdin = sys.__stdin__ 227 sys.stdout = sys.__stdout__ 228 sys.stderr = sys.__stderr__

229 230

231 -def io_streams_log(file_name=None, dir=None, verbosity=1):

232 """Turn on logging, sending both STDOUT and STDERR streams to a file. 233 234 @param file_name: The name of the file. 235 @type file_name: str 236 @param dir: The path where the file is located. If None, then the current directory is 237 assumed. 238 @type dir: str 239 @param verbosity: The verbosity level. 240 @type verbosity: int 241 """ 242 243 # Log file. 244 log_file, file_path = open_write_file(file_name=file_name, dir=dir, force=True, verbosity=verbosity, return_path=True) 245 246 # Logging IO streams. 247 log_stdin = stdin 248 log_stdout = None 249 log_stderr = SplitIO() 250 251 # Print out. 252 if verbosity: 253 print("Redirecting the sys.stdin IO stream to the python stdin IO stream.") 254 print("Redirecting the sys.stdout IO stream to the log file '%s'." % file_path) 255 print("Redirecting the sys.stderr IO stream to both the python stderr IO stream and the log file '%s'." % file_path) 256 257 # Set the logging IO streams. 258 log_stdout = log_file 259 log_stderr.split(stderr, log_file) 260 261 # IO stream redirection. 262 sys.stdin = log_stdin 263 sys.stdout = log_stdout 264 sys.stderr = log_stderr

265 266

267 -def io_streams_tee(file_name=None, dir=None, compress_type=0, verbosity=1):

268 """Turn on teeing to split both STDOUT and STDERR streams and sending second part to a file. 269 270 @param file_name: The name of the file. 271 @type file_name: str 272 @param dir: The path where the file is located. If None, then the current directory 273 is assumed. 274 @type dir: str 275 @param compress_type: The compression type. The integer values correspond to the compression 276 type: 0, no compression; 1, Bzip2 compression; 2, Gzip compression. 277 @type compress_type: int 278 @param verbosity: The verbosity level. 279 @type verbosity: int 280 """ 281 282 # Tee file. 283 tee_file, file_path = open_write_file(file_name=file_name, dir=dir, force=True, compress_type=compress_type, verbosity=verbosity, return_path=1) 284 285 # Tee IO streams. 286 tee_stdin = stdin 287 tee_stdout = SplitIO() 288 tee_stderr = SplitIO() 289 290 # Print out. 291 if verbosity: 292 print("Redirecting the sys.stdin IO stream to the python stdin IO stream.") 293 print("Redirecting the sys.stdout IO stream to both the python stdout IO stream and the log file '%s'." % file_path) 294 print("Redirecting the sys.stderr IO stream to both the python stderr IO stream and the log file '%s'." % file_path) 295 296 # Set the tee IO streams. 297 tee_stdout.split(stdout, tee_file) 298 tee_stderr.split(stderr, tee_file) 299 300 # IO stream redirection. 301 sys.stdin = tee_stdin 302 sys.stdout = tee_stdout 303 sys.stderr = tee_stderr

304 305

306 -def mkdir_nofail(dir=None, verbosity=1):

307 """Create the given directory, or exit without raising an error if the directory exists. 308 309 @param dir: The directory to create. 310 @type dir: str 311 @param verbosity: The verbosity level. 312 @type verbosity: int 313 """ 314 315 # No directory given. 316 if dir == None: 317 return 318 319 # Make the directory. 320 try: 321 makedirs(dir) 322 except OSError: 323 if verbosity: 324 print("Directory ." + sep + dir + " already exists.\n")

325 326

327 -def open_read_file(file_name=None, dir=None, verbosity=1):

328 """Open the file 'file' and return all the data. 329 330 @param file_name: The name of the file to extract the data from. 331 @type file_name: str 332 @param dir: The path where the file is located. If None, then the current directory is 333 assumed. 334 @type dir: str 335 @param verbosity: The verbosity level. 336 @type verbosity: int 337 @return: The open file object. 338 @rtype: file object 339 """ 340 341 # A file descriptor object. 342 if is_filetype(file_name): 343 # Nothing to do here! 344 return file_name 345 346 # Invalid file name. 347 if not file_name and not isinstance(file_name, str): 348 raise RelaxError("The file name " + repr(file_name) + " " + repr(type(file_name)) + " is invalid and cannot be opened.") 349 350 # File path. 351 file_path = get_file_path(file_name, dir) 352 353 # Test if the file exists and determine the compression type. 354 compress_type, file_path = determine_compression(file_path) 355 356 # Open the file for reading. 357 try: 358 # Print out. 359 if verbosity: 360 print("Opening the file " + repr(file_path) + " for reading.") 361 362 # Uncompressed text. 363 if compress_type == 0: 364 file_obj = open(file_path, 'r') 365 366 # Bzip2 compressed text. 367 elif compress_type == 1: 368 if dep_check.bz2_module: 369 # Python 3.3 text mode. 370 if sys.version_info[0] == 3 and sys.version_info[1] >= 3: 371 file_obj = bz2.open(file_path, 't') 372 373 # Python 3.0, 3.1 and 3.2 text mode. 374 elif sys.version_info[0] == 3 and sys.version_info[1] < 3: 375 file_obj = io.TextIOWrapper(Bzip2Fixed(file_path, 'r')) 376 377 # Python 2 text mode. 378 else: 379 file_obj = bz2.BZ2File(file_path, 'r') 380 else: 381 raise RelaxError("Cannot open the file " + repr(file_path) + ", try uncompressing first. " + dep_check.bz2_module_message + ".") 382 383 # Gzipped compressed text. 384 elif compress_type == 2: 385 # Python 3.3 text mode. 386 if sys.version_info[0] == 3 and sys.version_info[1] >= 3: 387 file_obj = gzip.open(file_path, 'rt') 388 389 # Python 3.0, 3.1 and 3.2 text mode. 390 elif sys.version_info[0] == 3 and sys.version_info[1] < 3: 391 file_obj = io.TextIOWrapper(GzipFixed(file_path, 'r')) 392 393 # Python 2 text mode. 394 else: 395 file_obj = gzip.GzipFile(file_path, 'r') 396 397 # Cannot open. 398 except IOError: 399 message = sys.exc_info()[1] 400 raise RelaxError("Cannot open the file " + repr(file_path) + ". " + message.args[1] + ".") 401 402 # Return the opened file. 403 return file_obj

404 405

406 -def open_write_file(file_name=None, dir=None, force=False, compress_type=0, verbosity=1, return_path=False):

407 """Function for opening a file for writing and creating directories if necessary. 408 409 @param file_name: The name of the file to extract the data from. 410 @type file_name: str 411 @param dir: The path where the file is located. If None, then the current directory 412 is assumed. 413 @type dir: str 414 @param force: Boolean argument which if True causes the file to be overwritten if it 415 already exists. 416 @type force: bool 417 @param compress_type: The compression type. The integer values correspond to the compression 418 type: 0, no compression; 1, Bzip2 compression; 2, Gzip compression. 419 @type compress_type: int 420 @param verbosity: The verbosity level. 421 @type verbosity: int 422 @param return_path: If True, the function will return a tuple of the file object and the 423 full file path. 424 @type return_path: bool 425 @return: The open, writable file object and, if the return_path is True, then the 426 full file path is returned as well. 427 @rtype: writable file object (if return_path, then a tuple of the writable file 428 and the full file path) 429 """ 430 431 # A file descriptor object. 432 if is_filetype(file_name): 433 # Nothing to do here! 434 return file_name 435 436 # Something pretending to be a file object. 437 if hasattr(file_name, 'write'): 438 # Nothing to do here! 439 return file_name 440 441 # The null device. 442 if search('devnull', file_name): 443 # Devnull could not be imported! 444 if not dep_check.devnull_import: 445 raise RelaxError(dep_check.devnull_import_message + ". To use devnull, please upgrade to Python >= 2.4.") 446 447 # Print out. 448 if verbosity: 449 print("Opening the null device file for writing.") 450 451 # Open the null device. 452 file_obj = open(devnull, 'w') 453 454 # Return the file. 455 if return_path: 456 return file_obj, None 457 else: 458 return file_obj 459 460 # Create the directories. 461 mkdir_nofail(dir, verbosity=0) 462 463 # File path. 464 file_path = get_file_path(file_name, dir) 465 466 # Bzip2 compression. 467 if compress_type == 1 and not search('.bz2$', file_path): 468 # Bz2 module exists. 469 if dep_check.bz2_module: 470 file_path = file_path + '.bz2' 471 472 # Switch to gzip compression. 473 else: 474 warn(RelaxWarning("Cannot use Bzip2 compression, using gzip compression instead. " + dep_check.bz2_module_message + ".")) 475 compress_type = 2 476 477 # Gzip compression. 478 if compress_type == 2 and not search('.gz$', file_path): 479 file_path = file_path + '.gz' 480 481 # Fail if the file already exists and the force flag is set to 0. 482 if access(file_path, F_OK) and not force: 483 raise RelaxFileOverwriteError(file_path, 'force flag') 484 485 # Open the file for writing. 486 try: 487 # Print out. 488 if verbosity: 489 print("Opening the file " + repr(file_path) + " for writing.") 490 491 # Uncompressed text. 492 if compress_type == 0: 493 file_obj = open(file_path, 'w') 494 495 # Bzip2 compressed text. 496 elif compress_type == 1: 497 # Python 3.3 text mode. 498 if sys.version_info[0] == 3 and sys.version_info[1] >= 3: 499 file_obj = bz2.open(file_path, 'wt') 500 501 # Python 3.0, 3.1 and 3.2 text mode. 502 elif sys.version_info[0] == 3 and sys.version_info[1] < 3: 503 file_obj = io.TextIOWrapper(Bzip2Fixed(file_path, 'w')) 504 505 # Python 2 text mode. 506 else: 507 file_obj = bz2.BZ2File(file_path, 'w') 508 509 # Gzipped compressed text. 510 elif compress_type == 2: 511 # Python 3.3 text mode. 512 if sys.version_info[0] == 3 and sys.version_info[1] >= 3: 513 file_obj = gzip.open(file_path, 'wt') 514 515 # Python 3.0, 3.1 and 3.2 text mode. 516 elif sys.version_info[0] == 3 and sys.version_info[1] < 3: 517 file_obj = io.TextIOWrapper(GzipFixed(file_path, 'w')) 518 519 # Python 2 text mode. 520 else: 521 file_obj = gzip.GzipFile(file_path, 'w') 522 523 # Cannot open. 524 except IOError: 525 message = sys.exc_info()[1] 526 raise RelaxError("Cannot open the file " + repr(file_path) + ". " + message.args[1] + ".") 527 528 # Return the opened file. 529 if return_path: 530 return file_obj, file_path 531 else: 532 return file_obj

533 534

535 -def read_spin_data(file=None, dir=None, file_data=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, data_col=None, error_col=None, sep=None, spin_id=None):

536 """Generator function for reading the spin specific data from file. 537 538 Description 539 =========== 540 541 This function reads a columnar formatted file where each line corresponds to a spin system. Spin identification is either through a spin ID string or through columns containing the molecule name, residue name and number, and/or spin name and number. 542 543 544 @keyword file: The name of the file to open. 545 @type file: str 546 @keyword dir: The directory containing the file (defaults to the current directory if None). 547 @type dir: str or None 548 @keyword file_data: An alternative to opening a file, if the data already exists in the correct format. The format is a list of lists where the first index corresponds to the row and the second the column. 549 @type file_data: list of lists 550 @keyword spin_id_col: The column containing the spin ID strings. If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none. 551 @type spin_id_col: int or None 552 @keyword mol_name_col: The column containing the molecule name information. If supplied, spin_id_col must be None. 553 @type mol_name_col: int or None 554 @keyword res_name_col: The column containing the residue name information. If supplied, spin_id_col must be None. 555 @type res_name_col: int or None 556 @keyword res_num_col: The column containing the residue number information. If supplied, spin_id_col must be None. 557 @type res_num_col: int or None 558 @keyword spin_name_col: The column containing the spin name information. If supplied, spin_id_col must be None. 559 @type spin_name_col: int or None 560 @keyword spin_num_col: The column containing the spin number information. If supplied, spin_id_col must be None. 561 @type spin_num_col: int or None 562 @keyword data_col: The column containing the data. 563 @type data_col: int or None 564 @keyword error_col: The column containing the errors. 565 @type error_col: int or None 566 @keyword sep: The column separator which, if None, defaults to whitespace. 567 @type sep: str or None 568 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins. 569 @type spin_id: None or str 570 @return: A list of the spin specific data is yielded. The format is a list consisting of the spin ID string, the data value (if data_col is give), and the error value (if error_col is given). If both data_col and error_col are None, then the spin ID string is simply yielded. 571 @rtype: str, list of [str, float], or list of [str, float, float] 572 """ 573 574 # Argument tests. 575 col_args = [spin_id_col, mol_name_col, res_name_col, res_num_col, spin_name_col, spin_num_col, data_col, error_col] 576 col_arg_names = ['spin_id_col', 'mol_name_col', 'res_name_col', 'res_num_col', 'spin_name_col', 'spin_num_col', 'data_col', 'error_col'] 577 for i in range(len(col_args)): 578 if col_args[i] == 0: 579 raise RelaxError("The '%s' argument cannot be zero, column numbering starts at one." % col_arg_names[i]) 580 if spin_id_col and (mol_name_col or res_name_col or res_num_col or spin_name_col or spin_num_col): 581 raise RelaxError("If the 'spin_id_col' argument has been supplied, then the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col must all be set to None.") 582 583 # Minimum number of columns. 584 min_col_num = max(filter(None, [spin_id_col, mol_name_col, res_num_col, res_name_col, spin_num_col, spin_name_col, data_col, error_col])) 585 586 # Extract the data from the file. 587 if not file_data: 588 # Extract. 589 file_data = extract_data(file, dir) 590 591 # Strip the data of all comments and empty lines. 592 if spin_id_col != None: 593 file_data = strip(file_data, comments=False) 594 else: 595 file_data = strip(file_data) 596 597 # No data! 598 if not file_data: 599 warn(RelaxFileEmptyWarning(file)) 600 return 601 602 # Yield the data, spin by spin. 603 missing_data = True 604 for line in file_data: 605 # Convert the spin IDs. 606 if spin_id_col != None and line[spin_id_col-1][0] in ["\"", "\'"]: 607 line[spin_id_col-1] = eval(line[spin_id_col-1]) 608 609 # Convert. 610 # Validate the sequence. 611 try: 612 generic_fns.sequence.validate_sequence(line, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, data_col=data_col, error_col=error_col) 613 except RelaxInvalidSeqError: 614 # Extract the message string, without the RelaxError bit. 615 msg = sys.exc_info()[1] 616 string = msg.__str__()[12:-1] 617 618 # Give a warning. 619 warn(RelaxWarning(string)) 620 621 # Skip the line. 622 continue 623 624 # Get the spin data from the ID. 625 if spin_id_col: 626 # Invalid spin ID. 627 if line[spin_id_col-1] == '#': 628 warn(RelaxWarning("Invalid spin ID, skipping the line %s" % line)) 629 continue 630 631 mol_name, res_num, res_name, spin_num, spin_name = generic_fns.mol_res_spin.spin_id_to_data_list(line[spin_id_col-1]) 632 633 # Convert the spin data. 634 else: 635 # The molecule. 636 mol_name = None 637 if mol_name_col != None and line[mol_name_col-1] != 'None': 638 mol_name = line[mol_name_col-1] 639 640 # The residue number, catching bad values. 641 res_num = None 642 if res_num_col != None: 643 try: 644 if line[res_num_col-1] == 'None': 645 res_num = None 646 else: 647 res_num = int(line[res_num_col-1]) 648 except ValueError: 649 warn(RelaxWarning("Invalid residue number, skipping the line %s" % line)) 650 continue 651 652 # The residue name. 653 res_name = None 654 if res_name_col != None and line[res_name_col-1] != 'None': 655 res_name = line[res_name_col-1] 656 657 # The spin number, catching bad values. 658 spin_num = None 659 if spin_num_col != None: 660 try: 661 if line[spin_num_col-1] == 'None': 662 spin_num = None 663 else: 664 spin_num = int(line[spin_num_col-1]) 665 except ValueError: 666 warn(RelaxWarning("Invalid spin number, skipping the line %s" % line)) 667 continue 668 669 # The spin name. 670 spin_name = None 671 if spin_name_col != None and line[spin_name_col-1] != 'None': 672 spin_name = line[spin_name_col-1] 673 674 # Convert the data. 675 value = None 676 if data_col != None: 677 try: 678 # None. 679 if line[data_col-1] == 'None': 680 value = None 681 682 # A float. 683 else: 684 value = float(line[data_col-1]) 685 686 # Bad data. 687 except ValueError: 688 warn(RelaxWarning("Invalid data, skipping the line %s" % line)) 689 continue 690 691 # Convert the errors. 692 error = None 693 if error_col != None: 694 try: 695 # None. 696 if line[error_col-1] == 'None': 697 error = None 698 699 # A float. 700 else: 701 error = float(line[error_col-1]) 702 703 # Bad data. 704 except ValueError: 705 warn(RelaxWarning("Invalid errors, skipping the line %s" % line)) 706 continue 707 708 # Right, data is OK and exists. 709 missing_data = False 710 711 # Yield the data. 712 if data_col and error_col: 713 yield mol_name, res_num, res_name, spin_num, spin_name, value, error 714 elif data_col: 715 yield mol_name, res_num, res_name, spin_num, spin_name, value 716 elif error_col: 717 yield mol_name, res_num, res_name, spin_num, spin_name, error 718 else: 719 yield mol_name, res_num, res_name, spin_num, spin_name 720 721 # Hmmm, no data! 722 if missing_data: 723 raise RelaxError("No corresponding data could be found within the file.")

724 725

726 -def strip(data, comments=True):

727 """Remove all comment and empty lines from the file data structure. 728 729 @param data: The file data to clean up. 730 @type data: list of lists of str 731 @keyword comments: A flag which if True will cause comments to be deleted. 732 @type comments: bool 733 @return: The input data with the empty and comment lines removed. 734 @rtype: list of lists of str 735 """ 736 737 # Initialise the new data array. 738 new = [] 739 740 # Loop over the data. 741 for i in range(len(data)): 742 # Empty lines. 743 if len(data[i]) == 0: 744 continue 745 746 # Comment lines. 747 if comments and search("^#", data[i][0]): 748 continue 749 750 # Data lines. 751 new.append(data[i]) 752 753 # Return the new data structure. 754 return new

755 756

757 -def test_binary(binary):

758 """Function for testing that the binary string corresponds to a valid executable file. 759 760 @param binary: The name or path of the binary executable file. 761 @type binary: str 762 """ 763 764 # Path separator RE string. 765 if altsep: 766 path_sep = '[' + sep + altsep + ']' 767 else: 768 path_sep = sep 769 770 # The full path of the program has been given (if a directory separatory has been supplied). 771 if search(path_sep, binary): 772 # Test that the binary exists. 773 if not access(binary, F_OK): 774 raise RelaxMissingBinaryError(binary) 775 776 # Test that if the binary is executable. 777 if not access(binary, X_OK): 778 raise RelaxNonExecError(binary) 779 780 # The path to the binary has not been given. 781 else: 782 # Get the PATH environmental variable. 783 path = getenv('PATH') 784 785 # Split PATH by the path separator. 786 path_list = path.split(pathsep) 787 788 # Test that the binary exists within the system path (and exit this function instantly once it has been found). 789 for path in path_list: 790 if access(path + sep + binary, F_OK): 791 return 792 793 # The binary is not located in the system path! 794 raise RelaxNoInPathError(binary)

795 796

797 -def write_data(out=None, headings=None, data=None, sep=None):

798 """Write out a table of the data to the given file handle. 799 800 @keyword out: The file handle to write to. 801 @type out: file handle 802 @keyword headings: The optional headings to print out. 803 @type headings: list of str or None 804 @keyword data: The data to print out. 805 @type data: list of list of str 806 @keyword sep: The column separator which, if None, defaults to whitespace. 807 @type sep: str or None 808 """ 809 810 # The number of rows and columns. 811 num_rows = len(data) 812 num_cols = len(data[0]) 813 814 # Pretty whitespace formatting. 815 if sep == None: 816 # Determine the widths for the headings. 817 widths = [] 818 for j in range(num_cols): 819 if headings != None: 820 if j == 0: 821 widths.append(len(headings[j]) + 2) 822 else: 823 widths.append(len(headings[j])) 824 825 # No headings. 826 else: 827 widths.append(0) 828 829 # Determine the maximum column widths for nice whitespace formatting. 830 for i in range(num_rows): 831 for j in range(num_cols): 832 size = len(data[i][j]) 833 if size > widths[j]: 834 widths[j] = size 835 836 # Convert to format strings. 837 formats = [] 838 for j in range(num_cols): 839 formats.append("%%-%ss" % (widths[j] + 4)) 840 841 # The headings. 842 if headings != None: 843 out.write(formats[0] % ("# " + headings[0])) 844 for j in range(1, num_cols): 845 out.write(formats[j] % headings[j]) 846 out.write('\n') 847 848 # The data. 849 for i in range(num_rows): 850 # The row. 851 for j in range(num_cols): 852 out.write(formats[j] % data[i][j]) 853 out.write('\n') 854 855 # Non-whitespace formatting. 856 else: 857 # The headings. 858 if headings != None: 859 out.write('#') 860 for j in range(num_cols): 861 # The column separator. 862 if j > 0: 863 out.write(sep) 864 865 # The heading. 866 out.write(headings[j]) 867 out.write('\n') 868 869 # The data. 870 for i in range(num_rows): 871 # The row. 872 for j in range(num_cols): 873 # The column separator. 874 if j > 0: 875 out.write(sep) 876 877 # The heading. 878 out.write(data[i][j]) 879 out.write('\n')

880 881

882 -def write_spin_data(file, dir=None, sep=None, spin_ids=None, mol_names=None, res_nums=None, res_names=None, spin_nums=None, spin_names=None, force=False, data=None, data_name=None, error=None, error_name=None):

883 """Generator function for reading the spin specific data from file. 884 885 Description 886 =========== 887 888 This function writes a columnar formatted file where each line corresponds to a spin system. 889 Spin identification is either through a spin ID string or through columns containing the 890 molecule name, residue name and number, and/or spin name and number. 891 892 893 @param file: The name of the file to write the data to (or alternatively an already opened file object). 894 @type file: str or file object 895 @keyword dir: The directory to place the file into (defaults to the current directory if None and the file argument is not a file object). 896 @type dir: str or None 897 @keyword sep: The column separator which, if None, defaults to whitespace. 898 @type sep: str or None 899 @keyword spin_ids: The list of spin ID strings. 900 @type spin_ids: None or list of str 901 @keyword mol_names: The list of molecule names. 902 @type mol_names: None or list of str 903 @keyword res_nums: The list of residue numbers. 904 @type res_nums: None or list of int 905 @keyword res_names: The list of residue names. 906 @type res_names: None or list of str 907 @keyword spin_nums: The list of spin numbers. 908 @type spin_nums: None or list of int 909 @keyword spin_names: The list of spin names. 910 @type spin_names: None or list of str 911 @keyword force: A flag which if True will cause an existing file to be overwritten. 912 @type force: bool 913 @keyword data: A list of the data to write out. The first dimension corresponds to the spins. A second dimension can also be given if multiple data sets across multiple columns are desired. 914 @type data: list or list of lists 915 @keyword data_name: A name corresponding to the data argument. If the data argument is a list of lists, then this must also be a list with the same length as the second dimension of the data arg. 916 @type data_name: str or list of str 917 @keyword error: A list of the errors to write out. The first dimension corresponds to the spins. A second dimension can also be given if multiple data sets across multiple columns are desired. These will be inter-dispersed between the data columns, if the data is given. If the data arg is not None, then this must have the same dimensions as that object. 918 @type error: list or list of lists 919 @keyword error_name: A name corresponding to the error argument. If the error argument is a list of lists, then this must also be a list with the same length at the second dimension of the error arg. 920 @type error_name: str or list of str 921 """ 922 923 # Data argument tests. 924 if data: 925 # Data is a list of lists. 926 if isinstance(data[0], list): 927 # Data and data_name don't match. 928 if not isinstance(data_name, list): 929 raise RelaxError("The data_name arg '%s' must be a list as the data argument is a list of lists." % data_name) 930 931 # Error doesn't match. 932 if error and (len(data) != len(error) or len(data[0]) != len(error[0])): 933 raise RelaxError("The data arg:\n%s\n\ndoes not have the same dimensions as the error arg:\n%s." % (data, error)) 934 935 # Data is a simple list. 936 else: 937 # Data and data_name don't match. 938 if not isinstance(data_name, str): 939 raise RelaxError("The data_name arg '%s' must be a string as the data argument is a simple list." % data_name) 940 941 # Error doesn't match. 942 if error and len(data) != len(error): 943 raise RelaxError("The data arg:\n%s\n\ndoes not have the same dimensions as the error arg:\n%s." % (data, error)) 944 945 # Error argument tests. 946 if error: 947 # Error is a list of lists. 948 if isinstance(error[0], list): 949 # Error and error_name don't match. 950 if not isinstance(error_name, list): 951 raise RelaxError("The error_name arg '%s' must be a list as the error argument is a list of lists." % error_name) 952 953 # Error is a simple list. 954 else: 955 # Error and error_name don't match. 956 if not isinstance(error_name, str): 957 raise RelaxError("The error_name arg '%s' must be a string as the error argument is a simple list." % error_name) 958 959 # Number of spins check. 960 args = [spin_ids, mol_names, res_nums, res_names, spin_nums, spin_names] 961 arg_names = ['spin_ids', 'mol_names', 'res_nums', 'res_names', 'spin_nums', 'spin_names'] 962 N = None 963 first_arg = None 964 first_arg_name = None 965 for i in range(len(args)): 966 if isinstance(args[i], list): 967 # First list match. 968 if N == None: 969 N = len(args[i]) 970 first_arg = args[i] 971 first_arg_name = arg_names[i] 972 973 # Length check. 974 if len(args[i]) != N: 975 raise RelaxError("The %s and %s arguments do not have the same number of spins ('%s' vs. '%s' respectively)." % (first_arg_name, arg_names[i], len(first_arg), len(args[i]))) 976 977 # Nothing?!? 978 if N == None: 979 raise RelaxError("No spin ID data is present.") 980 981 # Data and error length check. 982 if data and len(data) != N: 983 raise RelaxError("The %s and data arguments do not have the same number of spins ('%s' vs. '%s' respectively)." % (first_arg_name, len(first_arg), len(data))) 984 if error and len(error) != N: 985 raise RelaxError("The %s and error arguments do not have the same number of spins ('%s' vs. '%s' respectively)." % (first_arg_name, len(first_arg), len(error))) 986 987 # The spin arguments. 988 args = [spin_ids, mol_names, res_nums, res_names, spin_nums, spin_names] 989 arg_names = ['spin_id', 'mol_name', 'res_num', 'res_name', 'spin_num', 'spin_name'] 990 991 992 # Init. 993 headings = [] 994 file_data = [] 995 996 # Headers - the spin ID info. 997 for i in range(len(args)): 998 if args[i]: 999 headings.append(arg_names[i]) 1000 1001 # Headers - the data. 1002 if data: 1003 # List of lists. 1004 if isinstance(data[0], list): 1005 # Loop over the list. 1006 for i in range(len(data[0])): 1007 # The data. 1008 headings.append(data_name[i]) 1009 1010 # The error. 1011 if error: 1012 headings.append(error_name[i]) 1013 1014 # Simple list. 1015 else: 1016 # The data. 1017 headings.append(data_name) 1018 1019 # The error. 1020 if error: 1021 headings.append(error_name) 1022 1023 # Headers - only errors. 1024 elif error: 1025 # List of lists. 1026 if isinstance(error[0], list): 1027 for i in range(len(error[0])): 1028 headings.append(error_name[i]) 1029 1030 # Simple list. 1031 else: 1032 headings.append(error_name) 1033 1034 # No headings. 1035 if headings == []: 1036 headings = None 1037 1038 # Spin specific data. 1039 for spin_index in range(N): 1040 # Append a new data row. 1041 file_data.append([]) 1042 1043 # The spin ID info. 1044 for i in range(len(args)): 1045 if args[i]: 1046 value = args[i][spin_index] 1047 if not isinstance(value, str): 1048 value = repr(value) 1049 file_data[-1].append(value) 1050 1051 # The data. 1052 if data: 1053 # List of lists. 1054 if isinstance(data[0], list): 1055 # Loop over the list. 1056 for i in range(len(data[0])): 1057 # The data. 1058 file_data[-1].append(repr(data[spin_index][i])) 1059 1060 # The error. 1061 if error: 1062 file_data[-1].append(repr(error[spin_index][i])) 1063 1064 # Simple list. 1065 else: 1066 # The data. 1067 file_data[-1].append(repr(data[spin_index])) 1068 1069 # The error. 1070 if error: 1071 file_data[-1].append(repr(error[spin_index])) 1072 1073 # Only errors. 1074 elif error: 1075 # List of lists. 1076 if isinstance(error[0], list): 1077 for i in range(len(error[0])): 1078 file_data[-1].append(repr(error[spin_index][i])) 1079 1080 # Simple list. 1081 else: 1082 file_data[-1].append(repr(error[spin_index])) 1083 1084 # No data to write, so do nothing! 1085 if file_data == [] or file_data == [[]]: 1086 return 1087 1088 # Open the file for writing. 1089 file = open_write_file(file_name=file, dir=dir, force=force) 1090 1091 # Write out the file data. 1092 write_data(out=file, headings=headings, data=file_data, sep=sep)

1093 1094 1095

1096 -class DummyFileObject:

1097 - def __init__(self):

1098 """Set up the dummy object to act as a file object.""" 1099 1100 # Initialise an object for adding the string from all write calls to. 1101 self.data = '' 1102 1103 # Set the closed flag. 1104 self.closed = False

1105 1106

1107 - def close(self):

1108 """A method for 'closing' this object.""" 1109 1110 # Set the closed flag. 1111 self.closed = True

1112 1113

1114 - def write(self, str):

1115 """Mimic the file object write() method so that this class can be used as a file object. 1116 1117 @param str: The string to be written. 1118 @type str: str 1119 """ 1120 1121 # Check if the file is closed. 1122 if self.closed: 1123 raise ValueError('I/O operation on closed file') 1124 1125 # Append the string to the data object. 1126 self.data = self.data + str

1127 1128

1129 - def readlines(self):

1130 """Mimic the file object readlines() method. 1131 1132 This method works even if this dummy file object is closed! 1133 1134 1135 @return: The contents of the file object separated by newline characters. 1136 @rtype: list of str 1137 """ 1138 1139 # Split up the string. 1140 lines = self.data.split('\n') 1141 1142 # Remove the last line if empty. 1143 if lines[-1] == '': 1144 lines.pop() 1145 1146 # Loop over the lines, re-adding the newline character to match the file object readlines() method. 1147 for i in range(len(lines)): 1148 lines[i] = lines[i] + '\n' 1149 1150 # Return the file lines. 1151 return lines

1152 1153 1154

1155 -class Bzip2Fixed(BZ2File):

1156 """Incredibly nasty hack for bzip2 files support in Python 3.0, 3.1 and 3.2.""" 1157

1158 - def flush(self):

1159 pass

1160

1161 - def read1(self, n):

1162 return self.read(n)

1163

1164 - def readable(self):

1165 return True

1166

1167 - def seekable(self):

1168 return True

1169

1170 - def writable(self):

1171 return True

1172 1173 1174

1175 -class GzipFixed(gzip.GzipFile):

1176 """Incredibly nasty hack for gzipped files support in Python 3.0, 3.1 and 3.2.""" 1177 1178 closed = False 1179

1180 - def read1(self, n):

1181 return self.read(n)

1182

1183 - def readable(self):

1184 return True

1185

1186 - def seekable(self):

1187 return True

1188

1189 - def writable(self):

1190 return True

1191 1192 1193

1194 -class SplitIO:

1195 - def __init__(self):

1196 """Class for splitting an IO stream to two outputs."""

1197 1198

1199 - def flush(self):

1200 """Flush all streams.""" 1201 1202 # Call the streams' methods. 1203 self.stream1.flush() 1204 self.stream2.flush()

1205 1206

1207 - def isatty(self):

1208 """Check that both streams are TTYs. 1209 1210 @return: True, only if both streams are TTYs. 1211 @rtype: bool 1212 """ 1213 1214 # Check both streams. 1215 return self.stream1.isatty() & self.stream2.isatty()

1216 1217

1218 - def split(self, stream1, stream2):

1219 """Function for setting the streams.""" 1220 1221 # Arguments. 1222 self.stream1 = stream1 1223 self.stream2 = stream2

1224 1225

1226 - def write(self, text):

1227 """Replacement write function.""" 1228 1229 # Write to stream1. 1230 self.stream1.write(text) 1231 1232 # Write to stream2. 1233 self.stream2.write(text)

1234

Source Code for Module relax_io