1 from __future__ import absolute_import
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 """Module containing advanced IO functions for relax.
27
28 This includes IO redirection, automatic loading and writing of compressed files (both Gzip and BZ2 compression), reading and writing of files, processing of the contents of files, etc.
29 """
30
31
32 import sys
33 try:
34 import bz2
35 except ImportError:
36 bz2 = None
37 message = sys.exc_info()[1]
38 bz2_module_message = message.args[0]
39 from os import devnull
40 from os import F_OK, X_OK, access, altsep, getenv, makedirs, pathsep, remove, sep
41 from os.path import expanduser, basename, splitext, isfile
42 from re import search, split
43 from sys import stdin, stdout, stderr
44 from warnings import warn
45
46
47 from lib.check_types import is_filetype
48 from lib.compat import bz2_open, gz_open
49 from lib.errors import RelaxError, RelaxFileError, RelaxFileOverwriteError, RelaxMissingBinaryError, RelaxNoInPathError, RelaxNonExecError
50 from lib.warnings import RelaxWarning
51
52
53
54 -def delete(file_name, dir=None, fail=True):
55 """Deleting the given file, taking into account missing compression extensions.
56
57 @param file_name: The name of the file to delete.
58 @type file_name: str
59 @keyword dir: The directory containing the file.
60 @type dir: None or str
61 @keyword fail: A flag which if True will cause RelaxFileError to be raised.
62 @type fail: bool
63 @raises RelaxFileError: If the file does not exist, and fail is set to true.
64 """
65
66
67 file_path = get_file_path(file_name, dir)
68
69
70 if access(file_path, F_OK):
71 pass
72 elif access(file_path + '.bz2', F_OK):
73 file_path = file_path + '.bz2'
74 elif access(file_path + '.gz', F_OK):
75 file_path = file_path + '.gz'
76 elif fail:
77 raise RelaxFileError(file_path)
78 else:
79 return
80
81
82 remove(file_path)
83
84
86 """Function for determining the compression type, and for also testing if the file exists.
87
88 @param file_path: The full file path of the file.
89 @type file_path: str
90 @return: A tuple of the compression type and full path of the file (including its extension). A value of 0 corresponds to no compression. Bzip2 compression corresponds to a value of 1. Gzip compression corresponds to a value of 2.
91 @rtype: (int, str)
92 """
93
94
95 if access(file_path, F_OK):
96 compress_type = 0
97 if search('.bz2$', file_path):
98 compress_type = 1
99 elif search('.gz$', file_path):
100 compress_type = 2
101
102
103 elif access(file_path + '.bz2', F_OK):
104 file_path = file_path + '.bz2'
105 compress_type = 1
106
107
108 elif access(file_path + '.gz', F_OK):
109 file_path = file_path + '.gz'
110 compress_type = 2
111
112
113 else:
114 raise RelaxFileError(file_path)
115
116
117 return compress_type, file_path
118
119
121 """Return all data in the file as a list of lines where each line is a list of line elements.
122
123 @keyword file: The file to extract the data from.
124 @type file: str or file object
125 @keyword dir: The path where the file is located. If None and the file argument is a string, then the current directory is assumed.
126 @type dir: str or None
127 @keyword file_data: If the file data has already been extracted from the file, it can be passed into this function using this argument. If data is supplied here, then the file_name and dir args are ignored.
128 @type file_data: list of str
129 @keyword sep: The character separating the columns in the file data. If None, then whitespace is assumed.
130 @type sep: str
131 @return: The file data.
132 @rtype: list of lists of str
133 """
134
135
136 if not file_data:
137
138 if isinstance(file, str):
139 file = open_read_file(file_name=file, dir=dir)
140
141
142 file_data = file.readlines()
143
144
145 data = []
146 for i in range(len(file_data)):
147 if sep:
148 row = file_data[i].split(sep)
149 else:
150 row = file_data[i].split()
151 data.append(row)
152
153
154 if not file_data:
155 file.close()
156
157
158 return data
159
160
162 """Return the root file name, striped of path and extension details.
163
164 @param file_path: The full path to the file.
165 @type file_path: str
166 @return: The file root (with all directories and the extension stripped away).
167 @rtype: str
168 """
169
170
171 ext = None
172 while ext != '':
173 file_path, ext = splitext(file_path)
174
175
176 return basename(file_path)
177
178
180 """Generate and expand the full file path.
181
182 @keyword file_name: The name of the file to extract the data from.
183 @type file_name: str
184 @keyword dir: The path where the file is located. If None, then the current directory is assumed.
185 @type dir: str
186 @return: The full file path.
187 @rtype: str
188 """
189
190
191 file_path = file_name
192
193
194 if dir:
195 file_path = dir + sep + file_path
196
197
198 if file_path:
199 file_path = expanduser(file_path)
200
201
202 return file_path
203
204
206 """Restore all IO streams to the Python defaults.
207
208 @keyword verbosity: The verbosity level.
209 @type verbosity: int
210 """
211
212
213 if verbosity:
214 print("Restoring the sys.stdin IO stream to the Python STDIN IO stream.")
215 print("Restoring the sys.stdout IO stream to the Python STDOUT IO stream.")
216 print("Restoring the sys.stderr IO stream to the Python STDERR IO stream.")
217
218
219 sys.stdin = sys.__stdin__
220 sys.stdout = sys.__stdout__
221 sys.stderr = sys.__stderr__
222
223
225 """Turn on logging, sending both STDOUT and STDERR streams to a file.
226
227 @keyword file_name: The name of the file.
228 @type file_name: str
229 @keyword dir: The path where the file is located. If None, then the current directory is assumed.
230 @type dir: str
231 @keyword verbosity: The verbosity level.
232 @type verbosity: int
233 """
234
235
236 log_file, file_path = open_write_file(file_name=file_name, dir=dir, force=True, verbosity=verbosity, return_path=True)
237
238
239 log_stdin = stdin
240 log_stdout = None
241 log_stderr = SplitIO()
242
243
244 if verbosity:
245 print("Redirecting the sys.stdin IO stream to the Python stdin IO stream.")
246 print("Redirecting the sys.stdout IO stream to the log file '%s'." % file_path)
247 print("Redirecting the sys.stderr IO stream to both the Python stderr IO stream and the log file '%s'." % file_path)
248
249
250 log_stdout = log_file
251 log_stderr.split(stderr, log_file)
252
253
254 sys.stdin = log_stdin
255 sys.stdout = log_stdout
256 sys.stderr = log_stderr
257
258
259 -def io_streams_tee(file_name=None, dir=None, compress_type=0, verbosity=1):
260 """Turn on teeing to split both STDOUT and STDERR streams and sending second part to a file.
261
262 @keyword file_name: The name of the file.
263 @type file_name: str
264 @keyword dir: The path where the file is located. If None, then the current directory is assumed.
265 @type dir: str
266 @keyword compress_type: The compression type. The integer values correspond to the compression type: 0, no compression; 1, Bzip2 compression; 2, Gzip compression.
267 @type compress_type: int
268 @keyword verbosity: The verbosity level.
269 @type verbosity: int
270 """
271
272
273 tee_file, file_path = open_write_file(file_name=file_name, dir=dir, force=True, compress_type=compress_type, verbosity=verbosity, return_path=1)
274
275
276 tee_stdin = stdin
277 tee_stdout = SplitIO()
278 tee_stderr = SplitIO()
279
280
281 if verbosity:
282 print("Redirecting the sys.stdin IO stream to the Python stdin IO stream.")
283 print("Redirecting the sys.stdout IO stream to both the Python stdout IO stream and the log file '%s'." % file_path)
284 print("Redirecting the sys.stderr IO stream to both the Python stderr IO stream and the log file '%s'." % file_path)
285
286
287 tee_stdout.split(stdout, tee_file)
288 tee_stderr.split(stderr, tee_file)
289
290
291 sys.stdin = tee_stdin
292 sys.stdout = tee_stdout
293 sys.stderr = tee_stderr
294
295
297 """Create the given directory, or exit without raising an error if the directory exists.
298
299 @keyword dir: The directory to create.
300 @type dir: str
301 @keyword verbosity: The verbosity level.
302 @type verbosity: int
303 """
304
305
306 if dir == None:
307 return
308
309
310 dir = expanduser(dir)
311
312
313 try:
314 makedirs(dir)
315 except OSError:
316 if verbosity:
317 print("Directory ." + sep + dir + " already exists.\n")
318
319
321 """Open the file 'file' and return all the data.
322
323 @keyword file_name: The name of the file to extract the data from.
324 @type file_name: str
325 @keyword dir: The path where the file is located. If None, then the current directory is assumed.
326 @type dir: str
327 @keyword verbosity: The verbosity level.
328 @type verbosity: int
329 @return: The open file object.
330 @rtype: file object
331 """
332
333
334 if is_filetype(file_name):
335
336 return file_name
337
338
339 if not file_name and not isinstance(file_name, str):
340 raise RelaxError("The file name " + repr(file_name) + " " + repr(type(file_name)) + " is invalid and cannot be opened.")
341
342
343 file_path = get_file_path(file_name, dir)
344
345
346 compress_type, file_path = determine_compression(file_path)
347
348
349 try:
350
351 if verbosity:
352 print("Opening the file " + repr(file_path) + " for reading.")
353
354
355 if compress_type == 0:
356 file_obj = open(file_path, 'r')
357
358
359 elif compress_type == 1:
360 file_obj = bz2_open(file=file_path, mode='r')
361
362
363 elif compress_type == 2:
364 file_obj = gz_open(file=file_path, mode='r')
365
366
367 except IOError:
368 message = sys.exc_info()[1]
369 raise RelaxError("Cannot open the file " + repr(file_path) + ". " + message.args[1] + ".")
370
371
372 return file_obj
373
374
375 -def open_write_file(file_name=None, dir=None, force=False, compress_type=0, verbosity=1, return_path=False):
376 """Function for opening a file for writing and creating directories if necessary.
377
378 @keyword file_name: The name of the file to extract the data from.
379 @type file_name: str
380 @keyword dir: The path where the file is located. If None, then the current directory is assumed.
381 @type dir: str
382 @keyword force: Boolean argument which if True causes the file to be overwritten if it already exists.
383 @type force: bool
384 @keyword compress_type: The compression type. The integer values correspond to the compression type: 0, no compression; 1, Bzip2 compression; 2, Gzip compression. If no compression is given but the file name ends in '.gz' or '.bz2', then the compression will be automatically set.
385 @type compress_type: int
386 @keyword verbosity: The verbosity level.
387 @type verbosity: int
388 @keyword return_path: If True, the function will return a tuple of the file object and the full file path.
389 @type return_path: bool
390 @return: The open, writable file object and, if the return_path is True, then the full file path is returned as well.
391 @rtype: writable file object (if return_path, then a tuple of the writable file and the full file path)
392 """
393
394
395 if file_name == None:
396 raise RelaxError("The name of the file must be supplied.")
397
398
399 if is_filetype(file_name):
400
401 return file_name
402
403
404 if hasattr(file_name, 'write'):
405
406 return file_name
407
408
409 if search('devnull', file_name):
410
411 if verbosity:
412 print("Opening the null device file for writing.")
413
414
415 file_obj = open(devnull, 'w')
416
417
418 if return_path:
419 return file_obj, None
420 else:
421 return file_obj
422
423
424 mkdir_nofail(dir, verbosity=0)
425
426
427 file_path = get_file_path(file_name, dir)
428
429
430 if compress_type == 0:
431 if search('.bz2$', file_path):
432 compress_type = 1
433 elif search('.gz$', file_path):
434 compress_type = 2
435
436
437 if compress_type == 1 and not search('.bz2$', file_path):
438
439 if bz2:
440 file_path = file_path + '.bz2'
441
442
443 else:
444 warn(RelaxWarning("Cannot use Bzip2 compression, using gzip compression instead. " + bz2_module_message + "."))
445 compress_type = 2
446
447
448 if compress_type == 2 and not search('.gz$', file_path):
449 file_path = file_path + '.gz'
450
451
452 if access(file_path, F_OK) and not force:
453 raise RelaxFileOverwriteError(file_path, 'force flag')
454
455
456 try:
457
458 if verbosity:
459 print("Opening the file " + repr(file_path) + " for writing.")
460
461
462 if compress_type == 0:
463 file_obj = open(file_path, 'w')
464
465
466 elif compress_type == 1:
467 file_obj = bz2_open(file=file_path, mode='w')
468
469
470 elif compress_type == 2:
471 file_obj = gz_open(file=file_path, mode='w')
472
473
474 except IOError:
475 message = sys.exc_info()[1]
476 raise RelaxError("Cannot open the file " + repr(file_path) + ". " + message.args[1] + ".")
477
478
479 if return_path:
480 return file_obj, file_path
481 else:
482 return file_obj
483
484
486 """Sort the given list in alphanumeric order. Should be equivalent to unix 'ls -n' command.
487
488 @keyword filenames: The list of file names to sort.
489 @type filenames: list of str
490 @keyword rev: Flag which if True will cause the list to be reversed.
491 @type rev: bool
492 @return: The sorted list of file names.
493 @rtype: list of str
494 """
495
496
497 convert = lambda text: int(text) if text.isdigit() else text
498
499
500 alphanum_key = lambda key: [ convert(c) for c in split('([0-9]+)', key) ]
501
502
503 filenames.sort( key=alphanum_key )
504
505
506 if rev:
507 return reversed(filenames)
508 else:
509 return filenames
510
511
512 -def strip(data, comments=True):
513 """Remove all comment and empty lines from the file data structure.
514
515 @param data: The file data to clean up.
516 @type data: list of lists of str
517 @keyword comments: A flag which if True will cause comments to be deleted.
518 @type comments: bool
519 @return: The input data with the empty and comment lines removed.
520 @rtype: list of lists of str
521 """
522
523
524 new = []
525
526
527 for i in range(len(data)):
528
529 if len(data[i]) == 0:
530 continue
531
532
533 if comments and search("^#", data[i][0]):
534 continue
535
536
537 new.append(data[i])
538
539
540 return new
541
542
544 """Swap one file name extension for another.
545
546 @keyword file: The name of the original file.
547 @type file: str
548 @keyword ext: The new file name extension to use.
549 @type ext: str
550 @return: The name of the file with the new file name extension.
551 @rtype: str
552 """
553
554
555 new_file = file_root(file)
556
557
558 new_file += '.'
559 new_file += ext
560
561
562 return new_file
563
564
566 """Function for testing that the binary string corresponds to a valid executable file.
567
568 @param binary: The name or path of the binary executable file.
569 @type binary: str
570 """
571
572
573 if altsep:
574 path_sep = '[' + sep + altsep + ']'
575 else:
576 path_sep = sep
577
578
579 if isfile(binary):
580
581 if not access(binary, F_OK):
582 raise RelaxMissingBinaryError(binary)
583
584
585 if not access(binary, X_OK):
586 raise RelaxNonExecError(binary)
587
588
589 else:
590
591 path = getenv('PATH')
592
593
594 path_list = path.split(pathsep)
595
596
597 for path in path_list:
598 if access(path + sep + binary, F_OK) or access(path + sep + binary +".exe", F_OK):
599 return
600
601
602 raise RelaxNoInPathError(binary)
603
604
605 -def write_data(out=None, headings=None, data=None, sep=None):
606 """Write out a table of the data to the given file handle.
607
608 @keyword out: The file handle to write to.
609 @type out: file handle
610 @keyword headings: The optional headings to print out.
611 @type headings: list of str or None
612 @keyword data: The data to print out.
613 @type data: list of list of str
614 @keyword sep: The column separator which, if None, defaults to whitespace.
615 @type sep: str or None
616 """
617
618
619 if data in [None, []]:
620 return
621
622
623 num_rows = len(data)
624 num_cols = len(data[0])
625
626
627 if sep == None:
628
629 widths = []
630 for j in range(num_cols):
631 if headings != None:
632 if j == 0:
633 widths.append(len(headings[j]) + 2)
634 else:
635 widths.append(len(headings[j]))
636
637
638 else:
639 widths.append(0)
640
641
642 for i in range(num_rows):
643 for j in range(num_cols):
644 size = len(data[i][j])
645 if size > widths[j]:
646 widths[j] = size
647
648
649 formats = []
650 for j in range(num_cols):
651 formats.append("%%-%ss" % (widths[j] + 4))
652
653
654 if headings != None:
655 out.write(formats[0] % ("# " + headings[0]))
656 for j in range(1, num_cols):
657 out.write(formats[j] % headings[j])
658 out.write('\n')
659
660
661 for i in range(num_rows):
662
663 for j in range(num_cols):
664 out.write(formats[j] % data[i][j])
665 out.write('\n')
666
667
668 else:
669
670 if headings != None:
671 out.write('#')
672 for j in range(num_cols):
673
674 if j > 0:
675 out.write(sep)
676
677
678 out.write(headings[j])
679 out.write('\n')
680
681
682 for i in range(num_rows):
683
684 for j in range(num_cols):
685
686 if j > 0:
687 out.write(sep)
688
689
690 out.write(data[i][j])
691 out.write('\n')
692
693
694
697 """Set up the dummy object to act as a file object.
698
699 @keyword mode: Set the read or write mode for object testing. This can be 'r' or 'w'.
700 @type mode: str
701 """
702
703
704 self.mode = mode
705
706
707 self.data = ''
708
709
710 self.closed = False
711
712
714 """A method for 'closing' this object."""
715
716
717 self.closed = True
718
719
721 """Mimic the Python 3 readable() method."""
722
723 if self.mode == 'r':
724 return True
725 else:
726 return False
727
728
730 """Mimic the file object readlines() method.
731
732 This method works even if this dummy file object is closed!
733
734
735 @return: The contents of the file object separated by newline characters.
736 @rtype: list of str
737 """
738
739
740 lines = self.data.split('\n')
741
742
743 if lines[-1] == '':
744 lines.pop()
745
746
747 for i in range(len(lines)):
748 lines[i] = lines[i] + '\n'
749
750
751 return lines
752
753
755 """Mimic the Python 3 writable() method."""
756
757 if self.mode == 'w':
758 return True
759 else:
760 return False
761
762
764 """Mimic the file object write() method so that this class can be used as a file object.
765
766 @param str: The string to be written.
767 @type str: str
768 """
769
770
771 if self.closed:
772 raise ValueError('I/O operation on closed file')
773
774
775 self.data = self.data + str
776
777
778
781 """Class for splitting an IO stream to two outputs."""
782
783
785 """Flush all streams."""
786
787
788 self.stream1.flush()
789 self.stream2.flush()
790
791
793 """Check that both streams are TTYs.
794
795 @return: True, only if both streams are TTYs.
796 @rtype: bool
797 """
798
799
800 return self.stream1.isatty() & self.stream2.isatty()
801
802
803 - def split(self, stream1, stream2):
804 """Function for setting the streams."""
805
806
807 self.stream1 = stream1
808 self.stream2 = stream2
809
810
812 """Replacement write function."""
813
814
815 self.stream1.write(text)
816
817
818 self.stream2.write(text)
819