lib.io

1 from __future__ import absolute_import 2 ############################################################################### 3 # # 4 # Copyright (C) 2003-2014 Edward d'Auvergne # 5 # Copyright (C) 2014 Troels E. Linnet # 6 # # 7 # This file is part of the program relax (http://www.nmr-relax.com). # 8 # # 9 # This program is free software: you can redistribute it and/or modify # 10 # it under the terms of the GNU General Public License as published by # 11 # the Free Software Foundation, either version 3 of the License, or # 12 # (at your option) any later version. # 13 # # 14 # This program is distributed in the hope that it will be useful, # 15 # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 # GNU General Public License for more details. # 18 # # 19 # You should have received a copy of the GNU General Public License # 20 # along with this program. If not, see <http://www.gnu.org/licenses/>. # 21 # # 22 ############################################################################### 23 24 # Module docstring. 25 """Module containing advanced IO functions for relax. 26 27 This includes IO redirection, automatic loading and writing of compressed files (both Gzip and BZ2 compression), reading and writing of files, processing of the contents of files, etc. 28 """ 29 30 # Python module imports. 31 import sys 32 try: 33 import bz2 34 except ImportError: 35 bz2 = None 36 message = sys.exc_info()[1] 37 bz2_module_message = message.args[0] 38 from os import devnull 39 from os import F_OK, X_OK, access, altsep, getenv, makedirs, pathsep, remove, sep 40 from os.path import expanduser, basename, splitext, isfile 41 from re import search, split 42 from sys import stdin, stdout, stderr 43 from warnings import warn 44 45 # relax module imports. 46 from lib.check_types import is_filetype 47 from lib.compat import bz2_open, gz_open 48 from lib.errors import RelaxError, RelaxFileError, RelaxFileOverwriteError, RelaxMissingBinaryError, RelaxNoInPathError, RelaxNonExecError 49 from lib.warnings import RelaxWarning 50 51 52

53 -def delete(file_name, dir=None, fail=True):

54 """Deleting the given file, taking into account missing compression extensions. 55 56 @param file_name: The name of the file to delete. 57 @type file_name: str 58 @keyword dir: The directory containing the file. 59 @type dir: None or str 60 @keyword fail: A flag which if True will cause RelaxFileError to be raised. 61 @type fail: bool 62 @raises RelaxFileError: If the file does not exist, and fail is set to true. 63 """ 64 65 # File path. 66 file_path = get_file_path(file_name, dir) 67 68 # Test if the file exists and determine the compression type. 69 if access(file_path, F_OK): 70 pass 71 elif access(file_path + '.bz2', F_OK): 72 file_path = file_path + '.bz2' 73 elif access(file_path + '.gz', F_OK): 74 file_path = file_path + '.gz' 75 elif fail: 76 raise RelaxFileError(file_path) 77 else: 78 return 79 80 # Remove the file. 81 remove(file_path)

82 83

84 -def determine_compression(file_path):

85 """Function for determining the compression type, and for also testing if the file exists. 86 87 @param file_path: The full file path of the file. 88 @type file_path: str 89 @return: A tuple of the compression type and full path of the file (including its extension). A value of 0 corresponds to no compression. Bzip2 compression corresponds to a value of 1. Gzip compression corresponds to a value of 2. 90 @rtype: (int, str) 91 """ 92 93 # The file has been supplied without its compression extension. 94 if access(file_path, F_OK): 95 compress_type = 0 96 if search('.bz2$', file_path): 97 compress_type = 1 98 elif search('.gz$', file_path): 99 compress_type = 2 100 101 # The file has been supplied with the '.bz2' extension. 102 elif access(file_path + '.bz2', F_OK): 103 file_path = file_path + '.bz2' 104 compress_type = 1 105 106 # The file has been supplied with the '.gz' extension. 107 elif access(file_path + '.gz', F_OK): 108 file_path = file_path + '.gz' 109 compress_type = 2 110 111 # The file doesn't exist. 112 else: 113 raise RelaxFileError(file_path) 114 115 # Return the compression type. 116 return compress_type, file_path

117 118

119 -def extract_data(file=None, dir=None, file_data=None, sep=None):

120 """Return all data in the file as a list of lines where each line is a list of line elements. 121 122 @keyword file: The file to extract the data from. 123 @type file: str or file object 124 @keyword dir: The path where the file is located. If None and the file argument is a string, then the current directory is assumed. 125 @type dir: str or None 126 @keyword file_data: If the file data has already been extracted from the file, it can be passed into this function using this argument. If data is supplied here, then the file_name and dir args are ignored. 127 @type file_data: list of str 128 @keyword sep: The character separating the columns in the file data. If None, then whitespace is assumed. 129 @type sep: str 130 @return: The file data. 131 @rtype: list of lists of str 132 """ 133 134 # Data not already extracted from the file. 135 if not file_data: 136 # Open the file. 137 if isinstance(file, str): 138 file = open_read_file(file_name=file, dir=dir) 139 140 # Read lines. 141 file_data = file.readlines() 142 143 # Create a data structure from the contents of the file split by either whitespace or the separator, sep. 144 data = [] 145 for i in range(len(file_data)): 146 if sep: 147 row = file_data[i].split(sep) 148 else: 149 row = file_data[i].split() 150 data.append(row) 151 152 # Close the file. 153 if not file_data: 154 file.close() 155 156 # Return the data. 157 return data

158 159

160 -def file_root(file_path):

161 """Return the root file name, striped of path and extension details. 162 163 @param file_path: The full path to the file. 164 @type file_path: str 165 @return: The file root (with all directories and the extension stripped away). 166 @rtype: str 167 """ 168 169 # Loop over all file extensions, stopping when none are left. 170 ext = None 171 while ext != '': 172 file_path, ext = splitext(file_path) 173 174 # Return the file root with the directories stripped. 175 return basename(file_path)

176 177

178 -def get_file_path(file_name=None, dir=None):

179 """Generate and expand the full file path. 180 181 @keyword file_name: The name of the file to extract the data from. 182 @type file_name: str 183 @keyword dir: The path where the file is located. If None, then the current directory is assumed. 184 @type dir: str 185 @return: The full file path. 186 @rtype: str 187 """ 188 189 # File name. 190 file_path = file_name 191 192 # Add the directory. 193 if dir: 194 file_path = dir + sep + file_path 195 196 # Expand any ~ characters. 197 if file_path: # Catch a file path of None, as expanduser can't handle this. 198 file_path = expanduser(file_path) 199 200 # Return the file path. 201 return file_path

202 203

204 -def io_streams_restore(verbosity=1):

205 """Restore all IO streams to the Python defaults. 206 207 @keyword verbosity: The verbosity level. 208 @type verbosity: int 209 """ 210 211 # Print out. 212 if verbosity: 213 print("Restoring the sys.stdin IO stream to the Python STDIN IO stream.") 214 print("Restoring the sys.stdout IO stream to the Python STDOUT IO stream.") 215 print("Restoring the sys.stderr IO stream to the Python STDERR IO stream.") 216 217 # Restore streams. 218 sys.stdin = sys.__stdin__ 219 sys.stdout = sys.__stdout__ 220 sys.stderr = sys.__stderr__

221 222

223 -def io_streams_log(file_name=None, dir=None, verbosity=1):

224 """Turn on logging, sending both STDOUT and STDERR streams to a file. 225 226 @keyword file_name: The name of the file. 227 @type file_name: str 228 @keyword dir: The path where the file is located. If None, then the current directory is assumed. 229 @type dir: str 230 @keyword verbosity: The verbosity level. 231 @type verbosity: int 232 """ 233 234 # Log file. 235 log_file, file_path = open_write_file(file_name=file_name, dir=dir, force=True, verbosity=verbosity, return_path=True) 236 237 # Logging IO streams. 238 log_stdin = stdin 239 log_stdout = None 240 log_stderr = SplitIO() 241 242 # Print out. 243 if verbosity: 244 print("Redirecting the sys.stdin IO stream to the Python stdin IO stream.") 245 print("Redirecting the sys.stdout IO stream to the log file '%s'." % file_path) 246 print("Redirecting the sys.stderr IO stream to both the Python stderr IO stream and the log file '%s'." % file_path) 247 248 # Set the logging IO streams. 249 log_stdout = log_file 250 log_stderr.split(stderr, log_file) 251 252 # IO stream redirection. 253 sys.stdin = log_stdin 254 sys.stdout = log_stdout 255 sys.stderr = log_stderr

256 257

258 -def io_streams_tee(file_name=None, dir=None, compress_type=0, verbosity=1):

259 """Turn on teeing to split both STDOUT and STDERR streams and sending second part to a file. 260 261 @keyword file_name: The name of the file. 262 @type file_name: str 263 @keyword dir: The path where the file is located. If None, then the current directory is assumed. 264 @type dir: str 265 @keyword compress_type: The compression type. The integer values correspond to the compression type: 0, no compression; 1, Bzip2 compression; 2, Gzip compression. 266 @type compress_type: int 267 @keyword verbosity: The verbosity level. 268 @type verbosity: int 269 """ 270 271 # Tee file. 272 tee_file, file_path = open_write_file(file_name=file_name, dir=dir, force=True, compress_type=compress_type, verbosity=verbosity, return_path=1) 273 274 # Tee IO streams. 275 tee_stdin = stdin 276 tee_stdout = SplitIO() 277 tee_stderr = SplitIO() 278 279 # Print out. 280 if verbosity: 281 print("Redirecting the sys.stdin IO stream to the Python stdin IO stream.") 282 print("Redirecting the sys.stdout IO stream to both the Python stdout IO stream and the log file '%s'." % file_path) 283 print("Redirecting the sys.stderr IO stream to both the Python stderr IO stream and the log file '%s'." % file_path) 284 285 # Set the tee IO streams. 286 tee_stdout.split(stdout, tee_file) 287 tee_stderr.split(stderr, tee_file) 288 289 # IO stream redirection. 290 sys.stdin = tee_stdin 291 sys.stdout = tee_stdout 292 sys.stderr = tee_stderr

293 294

295 -def mkdir_nofail(dir=None, verbosity=1):

296 """Create the given directory, or exit without raising an error if the directory exists. 297 298 @keyword dir: The directory to create. 299 @type dir: str 300 @keyword verbosity: The verbosity level. 301 @type verbosity: int 302 """ 303 304 # No directory given. 305 if dir == None: 306 return 307 308 # Expand any ~ characters. 309 dir = expanduser(dir) 310 311 # Make the directory. 312 try: 313 makedirs(dir) 314 except OSError: 315 if verbosity: 316 print("Directory ." + sep + dir + " already exists.\n")

317 318

319 -def open_read_file(file_name=None, dir=None, verbosity=1):

320 """Open the file 'file' and return all the data. 321 322 @keyword file_name: The name of the file to extract the data from. 323 @type file_name: str 324 @keyword dir: The path where the file is located. If None, then the current directory is assumed. 325 @type dir: str 326 @keyword verbosity: The verbosity level. 327 @type verbosity: int 328 @return: The open file object. 329 @rtype: file object 330 """ 331 332 # A file descriptor object. 333 if is_filetype(file_name): 334 # Nothing to do here! 335 return file_name 336 337 # Invalid file name. 338 if not file_name and not isinstance(file_name, str): 339 raise RelaxError("The file name " + repr(file_name) + " " + repr(type(file_name)) + " is invalid and cannot be opened.") 340 341 # File path. 342 file_path = get_file_path(file_name, dir) 343 344 # Test if the file exists and determine the compression type. 345 compress_type, file_path = determine_compression(file_path) 346 347 # Open the file for reading. 348 try: 349 # Print out. 350 if verbosity: 351 print("Opening the file " + repr(file_path) + " for reading.") 352 353 # Uncompressed text. 354 if compress_type == 0: 355 file_obj = open(file_path, 'r') 356 357 # Bzip2 compressed text. 358 elif compress_type == 1: 359 file_obj = bz2_open(file=file_path, mode='r') 360 361 # Gzipped compressed text. 362 elif compress_type == 2: 363 file_obj = gz_open(file=file_path, mode='r') 364 365 # Cannot open. 366 except IOError: 367 message = sys.exc_info()[1] 368 raise RelaxError("Cannot open the file " + repr(file_path) + ". " + message.args[1] + ".") 369 370 # Return the opened file. 371 return file_obj

372 373

374 -def open_write_file(file_name=None, dir=None, force=False, compress_type=0, verbosity=1, return_path=False):

375 """Function for opening a file for writing and creating directories if necessary. 376 377 @keyword file_name: The name of the file to extract the data from. 378 @type file_name: str 379 @keyword dir: The path where the file is located. If None, then the current directory is assumed. 380 @type dir: str 381 @keyword force: Boolean argument which if True causes the file to be overwritten if it already exists. 382 @type force: bool 383 @keyword compress_type: The compression type. The integer values correspond to the compression type: 0, no compression; 1, Bzip2 compression; 2, Gzip compression. If no compression is given but the file name ends in '.gz' or '.bz2', then the compression will be automatically set. 384 @type compress_type: int 385 @keyword verbosity: The verbosity level. 386 @type verbosity: int 387 @keyword return_path: If True, the function will return a tuple of the file object and the full file path. 388 @type return_path: bool 389 @return: The open, writable file object and, if the return_path is True, then the full file path is returned as well. 390 @rtype: writable file object (if return_path, then a tuple of the writable file and the full file path) 391 """ 392 393 # No file name? 394 if file_name == None: 395 raise RelaxError("The name of the file must be supplied.") 396 397 # A file descriptor object. 398 if is_filetype(file_name): 399 # Nothing to do here! 400 return file_name 401 402 # Something pretending to be a file object. 403 if hasattr(file_name, 'write'): 404 # Nothing to do here! 405 return file_name 406 407 # The null device. 408 if search('devnull', file_name): 409 # Print out. 410 if verbosity: 411 print("Opening the null device file for writing.") 412 413 # Open the null device. 414 file_obj = open(devnull, 'w') 415 416 # Return the file. 417 if return_path: 418 return file_obj, None 419 else: 420 return file_obj 421 422 # Create the directories. 423 mkdir_nofail(dir, verbosity=0) 424 425 # File path. 426 file_path = get_file_path(file_name, dir) 427 428 # If no compression is supplied, determine the compression to be used from the file extension. 429 if compress_type == 0: 430 if search('.bz2$', file_path): 431 compress_type = 1 432 elif search('.gz$', file_path): 433 compress_type = 2 434 435 # Bzip2 compression. 436 if compress_type == 1 and not search('.bz2$', file_path): 437 # Bz2 module exists. 438 if bz2: 439 file_path = file_path + '.bz2' 440 441 # Switch to gzip compression. 442 else: 443 warn(RelaxWarning("Cannot use Bzip2 compression, using gzip compression instead. " + bz2_module_message + ".")) 444 compress_type = 2 445 446 # Gzip compression. 447 if compress_type == 2 and not search('.gz$', file_path): 448 file_path = file_path + '.gz' 449 450 # Fail if the file already exists and the force flag is set to 0. 451 if access(file_path, F_OK) and not force: 452 raise RelaxFileOverwriteError(file_path, 'force flag') 453 454 # Open the file for writing. 455 try: 456 # Print out. 457 if verbosity: 458 print("Opening the file " + repr(file_path) + " for writing.") 459 460 # Uncompressed text. 461 if compress_type == 0: 462 file_obj = open(file_path, 'w') 463 464 # Bzip2 compressed text. 465 elif compress_type == 1: 466 file_obj = bz2_open(file=file_path, mode='w') 467 468 # Gzipped compressed text. 469 elif compress_type == 2: 470 file_obj = gz_open(file=file_path, mode='w') 471 472 # Cannot open. 473 except IOError: 474 message = sys.exc_info()[1] 475 raise RelaxError("Cannot open the file " + repr(file_path) + ". " + message.args[1] + ".") 476 477 # Return the opened file. 478 if return_path: 479 return file_obj, file_path 480 else: 481 return file_obj

482 483

484 -def sort_filenames(filenames=None, rev=False):

485 """Sort the given list in alphanumeric order. Should be equivalent to unix 'ls -n' command. 486 487 @keyword filenames: The list of file names to sort. 488 @type filenames: list of str 489 @keyword rev: Flag which if True will cause the list to be reversed. 490 @type rev: bool 491 @return: The sorted list of file names. 492 @rtype: list of str 493 """ 494 495 # Define function to convert to integers if text is digit. 496 convert = lambda text: int(text) if text.isdigit() else text 497 498 # Define function to create key for sorting. 499 alphanum_key = lambda key: [ convert(c) for c in split('([0-9]+)', key) ] 500 501 # Now sort according to key. 502 filenames.sort( key=alphanum_key ) 503 504 # Reverse the list if needed. 505 if rev: 506 return reversed(filenames) 507 else: 508 return filenames

509 510

511 -def strip(data, comments=True):

512 """Remove all comment and empty lines from the file data structure. 513 514 @param data: The file data to clean up. 515 @type data: list of lists of str 516 @keyword comments: A flag which if True will cause comments to be deleted. 517 @type comments: bool 518 @return: The input data with the empty and comment lines removed. 519 @rtype: list of lists of str 520 """ 521 522 # Initialise the new data array. 523 new = [] 524 525 # Loop over the data. 526 for i in range(len(data)): 527 # Empty lines. 528 if len(data[i]) == 0: 529 continue 530 531 # Comment lines. 532 if comments and search("^#", data[i][0]): 533 continue 534 535 # Data lines. 536 new.append(data[i]) 537 538 # Return the new data structure. 539 return new

540 541

542 -def swap_extension(file=None, ext=None):

543 """Swap one file name extension for another. 544 545 @keyword file: The name of the original file. 546 @type file: str 547 @keyword ext: The new file name extension to use. 548 @type ext: str 549 @return: The name of the file with the new file name extension. 550 @rtype: str 551 """ 552 553 # The file root. 554 new_file = file_root(file) 555 556 # Add the new extension. 557 new_file += '.' 558 new_file += ext 559 560 # Return the new file name. 561 return new_file

562 563

564 -def test_binary(binary):

565 """Function for testing that the binary string corresponds to a valid executable file. 566 567 @param binary: The name or path of the binary executable file. 568 @type binary: str 569 """ 570 571 # Path separator RE string. 572 if altsep: 573 path_sep = '[' + sep + altsep + ']' 574 else: 575 path_sep = sep 576 577 # The full path of the program has been given (if a directory separatory has been supplied). 578 if isfile(binary): 579 # Test that the binary exists. 580 if not access(binary, F_OK): 581 raise RelaxMissingBinaryError(binary) 582 583 # Test that if the binary is executable. 584 if not access(binary, X_OK): 585 raise RelaxNonExecError(binary) 586 587 # The path to the binary has not been given. 588 else: 589 # Get the PATH environmental variable. 590 path = getenv('PATH') 591 592 # Split PATH by the path separator. 593 path_list = path.split(pathsep) 594 595 # Test that the binary exists within the system path (and exit this function instantly once it has been found). 596 for path in path_list: 597 if access(path + sep + binary, F_OK) or access(path + sep + binary +".exe", F_OK): 598 return 599 600 # The binary is not located in the system path! 601 raise RelaxNoInPathError(binary)

602 603

604 -def write_data(out=None, headings=None, data=None, sep=None):

605 """Write out a table of the data to the given file handle. 606 607 @keyword out: The file handle to write to. 608 @type out: file handle 609 @keyword headings: The optional headings to print out. 610 @type headings: list of str or None 611 @keyword data: The data to print out. 612 @type data: list of list of str 613 @keyword sep: The column separator which, if None, defaults to whitespace. 614 @type sep: str or None 615 """ 616 617 # No data to print out. 618 if data in [None, []]: 619 return 620 621 # The number of rows and columns. 622 num_rows = len(data) 623 num_cols = len(data[0]) 624 625 # Pretty whitespace formatting. 626 if sep == None: 627 # Determine the widths for the headings. 628 widths = [] 629 for j in range(num_cols): 630 if headings != None: 631 if j == 0: 632 widths.append(len(headings[j]) + 2) 633 else: 634 widths.append(len(headings[j])) 635 636 # No headings. 637 else: 638 widths.append(0) 639 640 # Determine the maximum column widths for nice whitespace formatting. 641 for i in range(num_rows): 642 for j in range(num_cols): 643 size = len(data[i][j]) 644 if size > widths[j]: 645 widths[j] = size 646 647 # Convert to format strings. 648 formats = [] 649 for j in range(num_cols): 650 formats.append("%%-%ss" % (widths[j] + 4)) 651 652 # The headings. 653 if headings != None: 654 out.write(formats[0] % ("# " + headings[0])) 655 for j in range(1, num_cols): 656 out.write(formats[j] % headings[j]) 657 out.write('\n') 658 659 # The data. 660 for i in range(num_rows): 661 # The row. 662 for j in range(num_cols): 663 out.write(formats[j] % data[i][j]) 664 out.write('\n') 665 666 # Non-whitespace formatting. 667 else: 668 # The headings. 669 if headings != None: 670 out.write('#') 671 for j in range(num_cols): 672 # The column separator. 673 if j > 0: 674 out.write(sep) 675 676 # The heading. 677 out.write(headings[j]) 678 out.write('\n') 679 680 # The data. 681 for i in range(num_rows): 682 # The row. 683 for j in range(num_cols): 684 # The column separator. 685 if j > 0: 686 out.write(sep) 687 688 # The heading. 689 out.write(data[i][j]) 690 out.write('\n')

691 692 693

694 -class DummyFileObject:

695 - def __init__(self):

696 """Set up the dummy object to act as a file object.""" 697 698 # Initialise an object for adding the string from all write calls to. 699 self.data = '' 700 701 # Set the closed flag. 702 self.closed = False

703 704

705 - def close(self):

706 """A method for 'closing' this object.""" 707 708 # Set the closed flag. 709 self.closed = True

710 711

712 - def write(self, str):

713 """Mimic the file object write() method so that this class can be used as a file object. 714 715 @param str: The string to be written. 716 @type str: str 717 """ 718 719 # Check if the file is closed. 720 if self.closed: 721 raise ValueError('I/O operation on closed file') 722 723 # Append the string to the data object. 724 self.data = self.data + str

725 726

727 - def readlines(self):

728 """Mimic the file object readlines() method. 729 730 This method works even if this dummy file object is closed! 731 732 733 @return: The contents of the file object separated by newline characters. 734 @rtype: list of str 735 """ 736 737 # Split up the string. 738 lines = self.data.split('\n') 739 740 # Remove the last line if empty. 741 if lines[-1] == '': 742 lines.pop() 743 744 # Loop over the lines, re-adding the newline character to match the file object readlines() method. 745 for i in range(len(lines)): 746 lines[i] = lines[i] + '\n' 747 748 # Return the file lines. 749 return lines

750 751 752

753 -class SplitIO:

754 - def __init__(self):

755 """Class for splitting an IO stream to two outputs."""

756 757

758 - def flush(self):

759 """Flush all streams.""" 760 761 # Call the streams' methods. 762 self.stream1.flush() 763 self.stream2.flush()

764 765

766 - def isatty(self):

767 """Check that both streams are TTYs. 768 769 @return: True, only if both streams are TTYs. 770 @rtype: bool 771 """ 772 773 # Check both streams. 774 return self.stream1.isatty() & self.stream2.isatty()

775 776

777 - def split(self, stream1, stream2):

778 """Function for setting the streams.""" 779 780 # Arguments. 781 self.stream1 = stream1 782 self.stream2 = stream2

783 784

785 - def write(self, text):

786 """Replacement write function.""" 787 788 # Write to stream1. 789 self.stream1.write(text) 790 791 # Write to stream2. 792 self.stream2.write(text)

793

Source Code for Module lib.io