lib.io

1 from __future__ import absolute_import 2 ################################################################################ 3 # # 4 # Copyright (C) 2001,2003-2004,2006-2009,2011-2012,2014,2019 Edward d'Auvergne # 5 # Copyright (C) 2006 Chris MacRaild # 6 # Copyright (C) 2014 Troels E. Linnet # 7 # # 8 # This file is part of the program relax (http://www.nmr-relax.com). # 9 # # 10 # This program is free software: you can redistribute it and/or modify # 11 # it under the terms of the GNU General Public License as published by # 12 # the Free Software Foundation, either version 3 of the License, or # 13 # (at your option) any later version. # 14 # # 15 # This program is distributed in the hope that it will be useful, # 16 # but WITHOUT ANY WARRANTY; without even the implied warranty of # 17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 18 # GNU General Public License for more details. # 19 # # 20 # You should have received a copy of the GNU General Public License # 21 # along with this program. If not, see <http://www.gnu.org/licenses/>. # 22 # # 23 ################################################################################ 24 25 # Module docstring. 26 """Module containing advanced IO functions for relax. 27 28 This includes IO redirection, automatic loading and writing of compressed files (both Gzip and BZ2 compression), reading and writing of files, processing of the contents of files, etc. 29 """ 30 31 # Python module imports. 32 import sys 33 try: 34 import bz2 35 except ImportError: 36 bz2 = None 37 message = sys.exc_info()[1] 38 bz2_module_message = message.args[0] 39 from os import devnull 40 from os import F_OK, X_OK, access, altsep, getenv, makedirs, pathsep, remove, sep 41 from os.path import expanduser, basename, splitext, isfile 42 from re import search, split 43 from sys import stdin, stdout, stderr 44 from warnings import warn 45 46 # relax module imports. 47 from lib.check_types import is_filetype 48 from lib.compat import bz2_open, gz_open 49 from lib.errors import RelaxError, RelaxFileError, RelaxFileOverwriteError, RelaxMissingBinaryError, RelaxNoInPathError, RelaxNonExecError 50 from lib.warnings import RelaxWarning 51 52 53

54 -def delete(file_name, dir=None, fail=True):

55 """Deleting the given file, taking into account missing compression extensions. 56 57 @param file_name: The name of the file to delete. 58 @type file_name: str 59 @keyword dir: The directory containing the file. 60 @type dir: None or str 61 @keyword fail: A flag which if True will cause RelaxFileError to be raised. 62 @type fail: bool 63 @raises RelaxFileError: If the file does not exist, and fail is set to true. 64 """ 65 66 # File path. 67 file_path = get_file_path(file_name, dir) 68 69 # Test if the file exists and determine the compression type. 70 if access(file_path, F_OK): 71 pass 72 elif access(file_path + '.bz2', F_OK): 73 file_path = file_path + '.bz2' 74 elif access(file_path + '.gz', F_OK): 75 file_path = file_path + '.gz' 76 elif fail: 77 raise RelaxFileError(file_path) 78 else: 79 return 80 81 # Remove the file. 82 remove(file_path)

83 84

85 -def determine_compression(file_path):

86 """Function for determining the compression type, and for also testing if the file exists. 87 88 @param file_path: The full file path of the file. 89 @type file_path: str 90 @return: A tuple of the compression type and full path of the file (including its extension). A value of 0 corresponds to no compression. Bzip2 compression corresponds to a value of 1. Gzip compression corresponds to a value of 2. 91 @rtype: (int, str) 92 """ 93 94 # The file has been supplied without its compression extension. 95 if access(file_path, F_OK): 96 compress_type = 0 97 if search('.bz2$', file_path): 98 compress_type = 1 99 elif search('.gz$', file_path): 100 compress_type = 2 101 102 # The file has been supplied with the '.bz2' extension. 103 elif access(file_path + '.bz2', F_OK): 104 file_path = file_path + '.bz2' 105 compress_type = 1 106 107 # The file has been supplied with the '.gz' extension. 108 elif access(file_path + '.gz', F_OK): 109 file_path = file_path + '.gz' 110 compress_type = 2 111 112 # The file doesn't exist. 113 else: 114 raise RelaxFileError(file_path) 115 116 # Return the compression type. 117 return compress_type, file_path

118 119

120 -def extract_data(file=None, dir=None, file_data=None, sep=None):

121 """Return all data in the file as a list of lines where each line is a list of line elements. 122 123 @keyword file: The file to extract the data from. 124 @type file: str or file object 125 @keyword dir: The path where the file is located. If None and the file argument is a string, then the current directory is assumed. 126 @type dir: str or None 127 @keyword file_data: If the file data has already been extracted from the file, it can be passed into this function using this argument. If data is supplied here, then the file_name and dir args are ignored. 128 @type file_data: list of str 129 @keyword sep: The character separating the columns in the file data. If None, then whitespace is assumed. 130 @type sep: str 131 @return: The file data. 132 @rtype: list of lists of str 133 """ 134 135 # Data not already extracted from the file. 136 if not file_data: 137 # Open the file. 138 if isinstance(file, str): 139 file = open_read_file(file_name=file, dir=dir) 140 141 # Read lines. 142 file_data = file.readlines() 143 144 # Create a data structure from the contents of the file split by either whitespace or the separator, sep. 145 data = [] 146 for i in range(len(file_data)): 147 if sep: 148 row = file_data[i].split(sep) 149 else: 150 row = file_data[i].split() 151 data.append(row) 152 153 # Close the file. 154 if not file_data: 155 file.close() 156 157 # Return the data. 158 return data

159 160

161 -def file_root(file_path):

162 """Return the root file name, striped of path and extension details. 163 164 @param file_path: The full path to the file. 165 @type file_path: str 166 @return: The file root (with all directories and the extension stripped away). 167 @rtype: str 168 """ 169 170 # Loop over all file extensions, stopping when none are left. 171 ext = None 172 while ext != '': 173 file_path, ext = splitext(file_path) 174 175 # Return the file root with the directories stripped. 176 return basename(file_path)

177 178

179 -def get_file_path(file_name=None, dir=None):

180 """Generate and expand the full file path. 181 182 @keyword file_name: The name of the file to extract the data from. 183 @type file_name: str 184 @keyword dir: The path where the file is located. If None, then the current directory is assumed. 185 @type dir: str 186 @return: The full file path. 187 @rtype: str 188 """ 189 190 # File name. 191 file_path = file_name 192 193 # Add the directory. 194 if dir: 195 file_path = dir + sep + file_path 196 197 # Expand any ~ characters. 198 if file_path: # Catch a file path of None, as expanduser can't handle this. 199 file_path = expanduser(file_path) 200 201 # Return the file path. 202 return file_path

203 204

205 -def io_streams_restore(verbosity=1):

206 """Restore all IO streams to the Python defaults. 207 208 @keyword verbosity: The verbosity level. 209 @type verbosity: int 210 """ 211 212 # Print out. 213 if verbosity: 214 print("Restoring the sys.stdin IO stream to the Python STDIN IO stream.") 215 print("Restoring the sys.stdout IO stream to the Python STDOUT IO stream.") 216 print("Restoring the sys.stderr IO stream to the Python STDERR IO stream.") 217 218 # Restore streams. 219 sys.stdin = sys.__stdin__ 220 sys.stdout = sys.__stdout__ 221 sys.stderr = sys.__stderr__

222 223

224 -def io_streams_log(file_name=None, dir=None, verbosity=1):

225 """Turn on logging, sending both STDOUT and STDERR streams to a file. 226 227 @keyword file_name: The name of the file. 228 @type file_name: str 229 @keyword dir: The path where the file is located. If None, then the current directory is assumed. 230 @type dir: str 231 @keyword verbosity: The verbosity level. 232 @type verbosity: int 233 """ 234 235 # Log file. 236 log_file, file_path = open_write_file(file_name=file_name, dir=dir, force=True, verbosity=verbosity, return_path=True) 237 238 # Logging IO streams. 239 log_stdin = stdin 240 log_stdout = None 241 log_stderr = SplitIO() 242 243 # Print out. 244 if verbosity: 245 print("Redirecting the sys.stdin IO stream to the Python stdin IO stream.") 246 print("Redirecting the sys.stdout IO stream to the log file '%s'." % file_path) 247 print("Redirecting the sys.stderr IO stream to both the Python stderr IO stream and the log file '%s'." % file_path) 248 249 # Set the logging IO streams. 250 log_stdout = log_file 251 log_stderr.split(stderr, log_file) 252 253 # IO stream redirection. 254 sys.stdin = log_stdin 255 sys.stdout = log_stdout 256 sys.stderr = log_stderr

257 258

259 -def io_streams_tee(file_name=None, dir=None, compress_type=0, verbosity=1):

260 """Turn on teeing to split both STDOUT and STDERR streams and sending second part to a file. 261 262 @keyword file_name: The name of the file. 263 @type file_name: str 264 @keyword dir: The path where the file is located. If None, then the current directory is assumed. 265 @type dir: str 266 @keyword compress_type: The compression type. The integer values correspond to the compression type: 0, no compression; 1, Bzip2 compression; 2, Gzip compression. 267 @type compress_type: int 268 @keyword verbosity: The verbosity level. 269 @type verbosity: int 270 """ 271 272 # Tee file. 273 tee_file, file_path = open_write_file(file_name=file_name, dir=dir, force=True, compress_type=compress_type, verbosity=verbosity, return_path=1) 274 275 # Tee IO streams. 276 tee_stdin = stdin 277 tee_stdout = SplitIO() 278 tee_stderr = SplitIO() 279 280 # Print out. 281 if verbosity: 282 print("Redirecting the sys.stdin IO stream to the Python stdin IO stream.") 283 print("Redirecting the sys.stdout IO stream to both the Python stdout IO stream and the log file '%s'." % file_path) 284 print("Redirecting the sys.stderr IO stream to both the Python stderr IO stream and the log file '%s'." % file_path) 285 286 # Set the tee IO streams. 287 tee_stdout.split(stdout, tee_file) 288 tee_stderr.split(stderr, tee_file) 289 290 # IO stream redirection. 291 sys.stdin = tee_stdin 292 sys.stdout = tee_stdout 293 sys.stderr = tee_stderr

294 295

296 -def mkdir_nofail(dir=None, verbosity=1):

297 """Create the given directory, or exit without raising an error if the directory exists. 298 299 @keyword dir: The directory to create. 300 @type dir: str 301 @keyword verbosity: The verbosity level. 302 @type verbosity: int 303 """ 304 305 # No directory given. 306 if dir == None: 307 return 308 309 # Expand any ~ characters. 310 dir = expanduser(dir) 311 312 # Make the directory. 313 try: 314 makedirs(dir) 315 except OSError: 316 if verbosity: 317 print("Directory ." + sep + dir + " already exists.\n")

318 319

320 -def open_read_file(file_name=None, dir=None, verbosity=1):

321 """Open the file 'file' and return all the data. 322 323 @keyword file_name: The name of the file to extract the data from. 324 @type file_name: str 325 @keyword dir: The path where the file is located. If None, then the current directory is assumed. 326 @type dir: str 327 @keyword verbosity: The verbosity level. 328 @type verbosity: int 329 @return: The open file object. 330 @rtype: file object 331 """ 332 333 # A file descriptor object. 334 if is_filetype(file_name): 335 # Nothing to do here! 336 return file_name 337 338 # Invalid file name. 339 if not file_name and not isinstance(file_name, str): 340 raise RelaxError("The file name " + repr(file_name) + " " + repr(type(file_name)) + " is invalid and cannot be opened.") 341 342 # File path. 343 file_path = get_file_path(file_name, dir) 344 345 # Test if the file exists and determine the compression type. 346 compress_type, file_path = determine_compression(file_path) 347 348 # Open the file for reading. 349 try: 350 # Print out. 351 if verbosity: 352 print("Opening the file " + repr(file_path) + " for reading.") 353 354 # Uncompressed text. 355 if compress_type == 0: 356 file_obj = open(file_path, 'r') 357 358 # Bzip2 compressed text. 359 elif compress_type == 1: 360 file_obj = bz2_open(file=file_path, mode='r') 361 362 # Gzipped compressed text. 363 elif compress_type == 2: 364 file_obj = gz_open(file=file_path, mode='r') 365 366 # Cannot open. 367 except IOError: 368 message = sys.exc_info()[1] 369 raise RelaxError("Cannot open the file " + repr(file_path) + ". " + message.args[1] + ".") 370 371 # Return the opened file. 372 return file_obj

373 374

375 -def open_write_file(file_name=None, dir=None, force=False, compress_type=0, verbosity=1, return_path=False):

376 """Function for opening a file for writing and creating directories if necessary. 377 378 @keyword file_name: The name of the file to extract the data from. 379 @type file_name: str 380 @keyword dir: The path where the file is located. If None, then the current directory is assumed. 381 @type dir: str 382 @keyword force: Boolean argument which if True causes the file to be overwritten if it already exists. 383 @type force: bool 384 @keyword compress_type: The compression type. The integer values correspond to the compression type: 0, no compression; 1, Bzip2 compression; 2, Gzip compression. If no compression is given but the file name ends in '.gz' or '.bz2', then the compression will be automatically set. 385 @type compress_type: int 386 @keyword verbosity: The verbosity level. 387 @type verbosity: int 388 @keyword return_path: If True, the function will return a tuple of the file object and the full file path. 389 @type return_path: bool 390 @return: The open, writable file object and, if the return_path is True, then the full file path is returned as well. 391 @rtype: writable file object (if return_path, then a tuple of the writable file and the full file path) 392 """ 393 394 # No file name? 395 if file_name == None: 396 raise RelaxError("The name of the file must be supplied.") 397 398 # A file descriptor object. 399 if is_filetype(file_name): 400 # Nothing to do here! 401 return file_name 402 403 # Something pretending to be a file object. 404 if hasattr(file_name, 'write'): 405 # Nothing to do here! 406 return file_name 407 408 # The null device. 409 if search('devnull', file_name): 410 # Print out. 411 if verbosity: 412 print("Opening the null device file for writing.") 413 414 # Open the null device. 415 file_obj = open(devnull, 'w') 416 417 # Return the file. 418 if return_path: 419 return file_obj, None 420 else: 421 return file_obj 422 423 # Create the directories. 424 mkdir_nofail(dir, verbosity=0) 425 426 # File path. 427 file_path = get_file_path(file_name, dir) 428 429 # If no compression is supplied, determine the compression to be used from the file extension. 430 if compress_type == 0: 431 if search('.bz2$', file_path): 432 compress_type = 1 433 elif search('.gz$', file_path): 434 compress_type = 2 435 436 # Bzip2 compression. 437 if compress_type == 1 and not search('.bz2$', file_path): 438 # Bz2 module exists. 439 if bz2: 440 file_path = file_path + '.bz2' 441 442 # Switch to gzip compression. 443 else: 444 warn(RelaxWarning("Cannot use Bzip2 compression, using gzip compression instead. " + bz2_module_message + ".")) 445 compress_type = 2 446 447 # Gzip compression. 448 if compress_type == 2 and not search('.gz$', file_path): 449 file_path = file_path + '.gz' 450 451 # Fail if the file already exists and the force flag is set to 0. 452 if access(file_path, F_OK) and not force: 453 raise RelaxFileOverwriteError(file_path, 'force flag') 454 455 # Open the file for writing. 456 try: 457 # Print out. 458 if verbosity: 459 print("Opening the file " + repr(file_path) + " for writing.") 460 461 # Uncompressed text. 462 if compress_type == 0: 463 file_obj = open(file_path, 'w') 464 465 # Bzip2 compressed text. 466 elif compress_type == 1: 467 file_obj = bz2_open(file=file_path, mode='w') 468 469 # Gzipped compressed text. 470 elif compress_type == 2: 471 file_obj = gz_open(file=file_path, mode='w') 472 473 # Cannot open. 474 except IOError: 475 message = sys.exc_info()[1] 476 raise RelaxError("Cannot open the file " + repr(file_path) + ". " + message.args[1] + ".") 477 478 # Return the opened file. 479 if return_path: 480 return file_obj, file_path 481 else: 482 return file_obj

483 484

485 -def sort_filenames(filenames=None, rev=False):

486 """Sort the given list in alphanumeric order. Should be equivalent to unix 'ls -n' command. 487 488 @keyword filenames: The list of file names to sort. 489 @type filenames: list of str 490 @keyword rev: Flag which if True will cause the list to be reversed. 491 @type rev: bool 492 @return: The sorted list of file names. 493 @rtype: list of str 494 """ 495 496 # Define function to convert to integers if text is digit. 497 convert = lambda text: int(text) if text.isdigit() else text 498 499 # Define function to create key for sorting. 500 alphanum_key = lambda key: [ convert(c) for c in split('([0-9]+)', key) ] 501 502 # Now sort according to key. 503 filenames.sort( key=alphanum_key ) 504 505 # Reverse the list if needed. 506 if rev: 507 return reversed(filenames) 508 else: 509 return filenames

510 511

512 -def strip(data, comments=True):

513 """Remove all comment and empty lines from the file data structure. 514 515 @param data: The file data to clean up. 516 @type data: list of lists of str 517 @keyword comments: A flag which if True will cause comments to be deleted. 518 @type comments: bool 519 @return: The input data with the empty and comment lines removed. 520 @rtype: list of lists of str 521 """ 522 523 # Initialise the new data array. 524 new = [] 525 526 # Loop over the data. 527 for i in range(len(data)): 528 # Empty lines. 529 if len(data[i]) == 0: 530 continue 531 532 # Comment lines. 533 if comments and search("^#", data[i][0]): 534 continue 535 536 # Data lines. 537 new.append(data[i]) 538 539 # Return the new data structure. 540 return new

541 542

543 -def swap_extension(file=None, ext=None):

544 """Swap one file name extension for another. 545 546 @keyword file: The name of the original file. 547 @type file: str 548 @keyword ext: The new file name extension to use. 549 @type ext: str 550 @return: The name of the file with the new file name extension. 551 @rtype: str 552 """ 553 554 # The file root. 555 new_file = file_root(file) 556 557 # Add the new extension. 558 new_file += '.' 559 new_file += ext 560 561 # Return the new file name. 562 return new_file

563 564

565 -def test_binary(binary):

566 """Function for testing that the binary string corresponds to a valid executable file. 567 568 @param binary: The name or path of the binary executable file. 569 @type binary: str 570 """ 571 572 # Path separator RE string. 573 if altsep: 574 path_sep = '[' + sep + altsep + ']' 575 else: 576 path_sep = sep 577 578 # The full path of the program has been given (if a directory separatory has been supplied). 579 if isfile(binary): 580 # Test that the binary exists. 581 if not access(binary, F_OK): 582 raise RelaxMissingBinaryError(binary) 583 584 # Test that if the binary is executable. 585 if not access(binary, X_OK): 586 raise RelaxNonExecError(binary) 587 588 # The path to the binary has not been given. 589 else: 590 # Get the PATH environmental variable. 591 path = getenv('PATH') 592 593 # Split PATH by the path separator. 594 path_list = path.split(pathsep) 595 596 # Test that the binary exists within the system path (and exit this function instantly once it has been found). 597 for path in path_list: 598 if access(path + sep + binary, F_OK) or access(path + sep + binary +".exe", F_OK): 599 return 600 601 # The binary is not located in the system path! 602 raise RelaxNoInPathError(binary)

603 604

605 -def write_data(out=None, headings=None, data=None, sep=None):

606 """Write out a table of the data to the given file handle. 607 608 @keyword out: The file handle to write to. 609 @type out: file handle 610 @keyword headings: The optional headings to print out. 611 @type headings: list of str or None 612 @keyword data: The data to print out. 613 @type data: list of list of str 614 @keyword sep: The column separator which, if None, defaults to whitespace. 615 @type sep: str or None 616 """ 617 618 # No data to print out. 619 if data in [None, []]: 620 return 621 622 # The number of rows and columns. 623 num_rows = len(data) 624 num_cols = len(data[0]) 625 626 # Pretty whitespace formatting. 627 if sep == None: 628 # Determine the widths for the headings. 629 widths = [] 630 for j in range(num_cols): 631 if headings != None: 632 if j == 0: 633 widths.append(len(headings[j]) + 2) 634 else: 635 widths.append(len(headings[j])) 636 637 # No headings. 638 else: 639 widths.append(0) 640 641 # Determine the maximum column widths for nice whitespace formatting. 642 for i in range(num_rows): 643 for j in range(num_cols): 644 size = len(data[i][j]) 645 if size > widths[j]: 646 widths[j] = size 647 648 # Convert to format strings. 649 formats = [] 650 for j in range(num_cols): 651 formats.append("%%-%ss" % (widths[j] + 4)) 652 653 # The headings. 654 if headings != None: 655 out.write(formats[0] % ("# " + headings[0])) 656 for j in range(1, num_cols): 657 out.write(formats[j] % headings[j]) 658 out.write('\n') 659 660 # The data. 661 for i in range(num_rows): 662 # The row. 663 for j in range(num_cols): 664 out.write(formats[j] % data[i][j]) 665 out.write('\n') 666 667 # Non-whitespace formatting. 668 else: 669 # The headings. 670 if headings != None: 671 out.write('#') 672 for j in range(num_cols): 673 # The column separator. 674 if j > 0: 675 out.write(sep) 676 677 # The heading. 678 out.write(headings[j]) 679 out.write('\n') 680 681 # The data. 682 for i in range(num_rows): 683 # The row. 684 for j in range(num_cols): 685 # The column separator. 686 if j > 0: 687 out.write(sep) 688 689 # The heading. 690 out.write(data[i][j]) 691 out.write('\n')

692 693 694

695 -class DummyFileObject:

696 - def __init__(self, mode='w'):

697 """Set up the dummy object to act as a file object. 698 699 @keyword mode: Set the read or write mode for object testing. This can be 'r' or 'w'. 700 @type mode: str 701 """ 702 703 # Store the arg. 704 self.mode = mode 705 706 # Initialise an object for adding the string from all write calls to. 707 self.data = '' 708 709 # Set the closed flag. 710 self.closed = False

711 712

713 - def close(self):

714 """A method for 'closing' this object.""" 715 716 # Set the closed flag. 717 self.closed = True

718 719

720 - def readable(self):

721 """Mimic the Python 3 readable() method.""" 722 723 if self.mode == 'r': 724 return True 725 else: 726 return False

727 728

729 - def readlines(self):

730 """Mimic the file object readlines() method. 731 732 This method works even if this dummy file object is closed! 733 734 735 @return: The contents of the file object separated by newline characters. 736 @rtype: list of str 737 """ 738 739 # Split up the string. 740 lines = self.data.split('\n') 741 742 # Remove the last line if empty. 743 if lines[-1] == '': 744 lines.pop() 745 746 # Loop over the lines, re-adding the newline character to match the file object readlines() method. 747 for i in range(len(lines)): 748 lines[i] = lines[i] + '\n' 749 750 # Return the file lines. 751 return lines

752 753

754 - def writable(self):

755 """Mimic the Python 3 writable() method.""" 756 757 if self.mode == 'w': 758 return True 759 else: 760 return False

761 762

763 - def write(self, str):

764 """Mimic the file object write() method so that this class can be used as a file object. 765 766 @param str: The string to be written. 767 @type str: str 768 """ 769 770 # Check if the file is closed. 771 if self.closed: 772 raise ValueError('I/O operation on closed file') 773 774 # Append the string to the data object. 775 self.data = self.data + str

776 777 778

779 -class SplitIO:

780 - def __init__(self):

781 """Class for splitting an IO stream to two outputs."""

782 783

784 - def flush(self):

785 """Flush all streams.""" 786 787 # Call the streams' methods. 788 self.stream1.flush() 789 self.stream2.flush()

790 791

792 - def isatty(self):

793 """Check that both streams are TTYs. 794 795 @return: True, only if both streams are TTYs. 796 @rtype: bool 797 """ 798 799 # Check both streams. 800 return self.stream1.isatty() & self.stream2.isatty()

801 802

803 - def split(self, stream1, stream2):

804 """Function for setting the streams.""" 805 806 # Arguments. 807 self.stream1 = stream1 808 self.stream2 = stream2

809 810

811 - def write(self, text):

812 """Replacement write function.""" 813 814 # Write to stream1. 815 self.stream1.write(text) 816 817 # Write to stream2. 818 self.stream2.write(text)

819

Source Code for Module lib.io