1 from __future__ import absolute_import
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 """Module containing advanced IO functions for relax.
26
27 This includes IO redirection, automatic loading and writing of compressed files (both Gzip and BZ2 compression), reading and writing of files, processing of the contents of files, etc.
28 """
29
30
31 import sys
32 try:
33 import bz2
34 except ImportError:
35 bz2 = None
36 message = sys.exc_info()[1]
37 bz2_module_message = message.args[0]
38 from os import devnull
39 from os import F_OK, X_OK, access, altsep, getenv, makedirs, pathsep, remove, sep
40 from os.path import expanduser, basename, splitext, isfile
41 from re import search, split
42 from sys import stdin, stdout, stderr
43 from warnings import warn
44
45
46 from lib.check_types import is_filetype
47 from lib.compat import bz2_open, gz_open
48 from lib.errors import RelaxError, RelaxFileError, RelaxFileOverwriteError, RelaxMissingBinaryError, RelaxNoInPathError, RelaxNonExecError
49 from lib.warnings import RelaxWarning
50
51
52
53 -def delete(file_name, dir=None, fail=True):
54 """Deleting the given file, taking into account missing compression extensions.
55
56 @param file_name: The name of the file to delete.
57 @type file_name: str
58 @keyword dir: The directory containing the file.
59 @type dir: None or str
60 @keyword fail: A flag which if True will cause RelaxFileError to be raised.
61 @type fail: bool
62 @raises RelaxFileError: If the file does not exist, and fail is set to true.
63 """
64
65
66 file_path = get_file_path(file_name, dir)
67
68
69 if access(file_path, F_OK):
70 pass
71 elif access(file_path + '.bz2', F_OK):
72 file_path = file_path + '.bz2'
73 elif access(file_path + '.gz', F_OK):
74 file_path = file_path + '.gz'
75 elif fail:
76 raise RelaxFileError(file_path)
77 else:
78 return
79
80
81 remove(file_path)
82
83
85 """Function for determining the compression type, and for also testing if the file exists.
86
87 @param file_path: The full file path of the file.
88 @type file_path: str
89 @return: A tuple of the compression type and full path of the file (including its extension). A value of 0 corresponds to no compression. Bzip2 compression corresponds to a value of 1. Gzip compression corresponds to a value of 2.
90 @rtype: (int, str)
91 """
92
93
94 if access(file_path, F_OK):
95 compress_type = 0
96 if search('.bz2$', file_path):
97 compress_type = 1
98 elif search('.gz$', file_path):
99 compress_type = 2
100
101
102 elif access(file_path + '.bz2', F_OK):
103 file_path = file_path + '.bz2'
104 compress_type = 1
105
106
107 elif access(file_path + '.gz', F_OK):
108 file_path = file_path + '.gz'
109 compress_type = 2
110
111
112 else:
113 raise RelaxFileError(file_path)
114
115
116 return compress_type, file_path
117
118
120 """Return all data in the file as a list of lines where each line is a list of line elements.
121
122 @keyword file: The file to extract the data from.
123 @type file: str or file object
124 @keyword dir: The path where the file is located. If None and the file argument is a string, then the current directory is assumed.
125 @type dir: str or None
126 @keyword file_data: If the file data has already been extracted from the file, it can be passed into this function using this argument. If data is supplied here, then the file_name and dir args are ignored.
127 @type file_data: list of str
128 @keyword sep: The character separating the columns in the file data. If None, then whitespace is assumed.
129 @type sep: str
130 @return: The file data.
131 @rtype: list of lists of str
132 """
133
134
135 if not file_data:
136
137 if isinstance(file, str):
138 file = open_read_file(file_name=file, dir=dir)
139
140
141 file_data = file.readlines()
142
143
144 data = []
145 for i in range(len(file_data)):
146 if sep:
147 row = file_data[i].split(sep)
148 else:
149 row = file_data[i].split()
150 data.append(row)
151
152
153 if not file_data:
154 file.close()
155
156
157 return data
158
159
161 """Return the root file name, striped of path and extension details.
162
163 @param file_path: The full path to the file.
164 @type file_path: str
165 @return: The file root (with all directories and the extension stripped away).
166 @rtype: str
167 """
168
169
170 ext = None
171 while ext != '':
172 file_path, ext = splitext(file_path)
173
174
175 return basename(file_path)
176
177
179 """Generate and expand the full file path.
180
181 @keyword file_name: The name of the file to extract the data from.
182 @type file_name: str
183 @keyword dir: The path where the file is located. If None, then the current directory is assumed.
184 @type dir: str
185 @return: The full file path.
186 @rtype: str
187 """
188
189
190 file_path = file_name
191
192
193 if dir:
194 file_path = dir + sep + file_path
195
196
197 if file_path:
198 file_path = expanduser(file_path)
199
200
201 return file_path
202
203
205 """Restore all IO streams to the Python defaults.
206
207 @keyword verbosity: The verbosity level.
208 @type verbosity: int
209 """
210
211
212 if verbosity:
213 print("Restoring the sys.stdin IO stream to the Python STDIN IO stream.")
214 print("Restoring the sys.stdout IO stream to the Python STDOUT IO stream.")
215 print("Restoring the sys.stderr IO stream to the Python STDERR IO stream.")
216
217
218 sys.stdin = sys.__stdin__
219 sys.stdout = sys.__stdout__
220 sys.stderr = sys.__stderr__
221
222
224 """Turn on logging, sending both STDOUT and STDERR streams to a file.
225
226 @keyword file_name: The name of the file.
227 @type file_name: str
228 @keyword dir: The path where the file is located. If None, then the current directory is assumed.
229 @type dir: str
230 @keyword verbosity: The verbosity level.
231 @type verbosity: int
232 """
233
234
235 log_file, file_path = open_write_file(file_name=file_name, dir=dir, force=True, verbosity=verbosity, return_path=True)
236
237
238 log_stdin = stdin
239 log_stdout = None
240 log_stderr = SplitIO()
241
242
243 if verbosity:
244 print("Redirecting the sys.stdin IO stream to the Python stdin IO stream.")
245 print("Redirecting the sys.stdout IO stream to the log file '%s'." % file_path)
246 print("Redirecting the sys.stderr IO stream to both the Python stderr IO stream and the log file '%s'." % file_path)
247
248
249 log_stdout = log_file
250 log_stderr.split(stderr, log_file)
251
252
253 sys.stdin = log_stdin
254 sys.stdout = log_stdout
255 sys.stderr = log_stderr
256
257
258 -def io_streams_tee(file_name=None, dir=None, compress_type=0, verbosity=1):
259 """Turn on teeing to split both STDOUT and STDERR streams and sending second part to a file.
260
261 @keyword file_name: The name of the file.
262 @type file_name: str
263 @keyword dir: The path where the file is located. If None, then the current directory is assumed.
264 @type dir: str
265 @keyword compress_type: The compression type. The integer values correspond to the compression type: 0, no compression; 1, Bzip2 compression; 2, Gzip compression.
266 @type compress_type: int
267 @keyword verbosity: The verbosity level.
268 @type verbosity: int
269 """
270
271
272 tee_file, file_path = open_write_file(file_name=file_name, dir=dir, force=True, compress_type=compress_type, verbosity=verbosity, return_path=1)
273
274
275 tee_stdin = stdin
276 tee_stdout = SplitIO()
277 tee_stderr = SplitIO()
278
279
280 if verbosity:
281 print("Redirecting the sys.stdin IO stream to the Python stdin IO stream.")
282 print("Redirecting the sys.stdout IO stream to both the Python stdout IO stream and the log file '%s'." % file_path)
283 print("Redirecting the sys.stderr IO stream to both the Python stderr IO stream and the log file '%s'." % file_path)
284
285
286 tee_stdout.split(stdout, tee_file)
287 tee_stderr.split(stderr, tee_file)
288
289
290 sys.stdin = tee_stdin
291 sys.stdout = tee_stdout
292 sys.stderr = tee_stderr
293
294
296 """Create the given directory, or exit without raising an error if the directory exists.
297
298 @keyword dir: The directory to create.
299 @type dir: str
300 @keyword verbosity: The verbosity level.
301 @type verbosity: int
302 """
303
304
305 if dir == None:
306 return
307
308
309 dir = expanduser(dir)
310
311
312 try:
313 makedirs(dir)
314 except OSError:
315 if verbosity:
316 print("Directory ." + sep + dir + " already exists.\n")
317
318
320 """Open the file 'file' and return all the data.
321
322 @keyword file_name: The name of the file to extract the data from.
323 @type file_name: str
324 @keyword dir: The path where the file is located. If None, then the current directory is assumed.
325 @type dir: str
326 @keyword verbosity: The verbosity level.
327 @type verbosity: int
328 @return: The open file object.
329 @rtype: file object
330 """
331
332
333 if is_filetype(file_name):
334
335 return file_name
336
337
338 if not file_name and not isinstance(file_name, str):
339 raise RelaxError("The file name " + repr(file_name) + " " + repr(type(file_name)) + " is invalid and cannot be opened.")
340
341
342 file_path = get_file_path(file_name, dir)
343
344
345 compress_type, file_path = determine_compression(file_path)
346
347
348 try:
349
350 if verbosity:
351 print("Opening the file " + repr(file_path) + " for reading.")
352
353
354 if compress_type == 0:
355 file_obj = open(file_path, 'r')
356
357
358 elif compress_type == 1:
359 file_obj = bz2_open(file=file_path, mode='r')
360
361
362 elif compress_type == 2:
363 file_obj = gz_open(file=file_path, mode='r')
364
365
366 except IOError:
367 message = sys.exc_info()[1]
368 raise RelaxError("Cannot open the file " + repr(file_path) + ". " + message.args[1] + ".")
369
370
371 return file_obj
372
373
374 -def open_write_file(file_name=None, dir=None, force=False, compress_type=0, verbosity=1, return_path=False):
375 """Function for opening a file for writing and creating directories if necessary.
376
377 @keyword file_name: The name of the file to extract the data from.
378 @type file_name: str
379 @keyword dir: The path where the file is located. If None, then the current directory is assumed.
380 @type dir: str
381 @keyword force: Boolean argument which if True causes the file to be overwritten if it already exists.
382 @type force: bool
383 @keyword compress_type: The compression type. The integer values correspond to the compression type: 0, no compression; 1, Bzip2 compression; 2, Gzip compression. If no compression is given but the file name ends in '.gz' or '.bz2', then the compression will be automatically set.
384 @type compress_type: int
385 @keyword verbosity: The verbosity level.
386 @type verbosity: int
387 @keyword return_path: If True, the function will return a tuple of the file object and the full file path.
388 @type return_path: bool
389 @return: The open, writable file object and, if the return_path is True, then the full file path is returned as well.
390 @rtype: writable file object (if return_path, then a tuple of the writable file and the full file path)
391 """
392
393
394 if file_name == None:
395 raise RelaxError("The name of the file must be supplied.")
396
397
398 if is_filetype(file_name):
399
400 return file_name
401
402
403 if hasattr(file_name, 'write'):
404
405 return file_name
406
407
408 if search('devnull', file_name):
409
410 if verbosity:
411 print("Opening the null device file for writing.")
412
413
414 file_obj = open(devnull, 'w')
415
416
417 if return_path:
418 return file_obj, None
419 else:
420 return file_obj
421
422
423 mkdir_nofail(dir, verbosity=0)
424
425
426 file_path = get_file_path(file_name, dir)
427
428
429 if compress_type == 0:
430 if search('.bz2$', file_path):
431 compress_type = 1
432 elif search('.gz$', file_path):
433 compress_type = 2
434
435
436 if compress_type == 1 and not search('.bz2$', file_path):
437
438 if bz2:
439 file_path = file_path + '.bz2'
440
441
442 else:
443 warn(RelaxWarning("Cannot use Bzip2 compression, using gzip compression instead. " + bz2_module_message + "."))
444 compress_type = 2
445
446
447 if compress_type == 2 and not search('.gz$', file_path):
448 file_path = file_path + '.gz'
449
450
451 if access(file_path, F_OK) and not force:
452 raise RelaxFileOverwriteError(file_path, 'force flag')
453
454
455 try:
456
457 if verbosity:
458 print("Opening the file " + repr(file_path) + " for writing.")
459
460
461 if compress_type == 0:
462 file_obj = open(file_path, 'w')
463
464
465 elif compress_type == 1:
466 file_obj = bz2_open(file=file_path, mode='w')
467
468
469 elif compress_type == 2:
470 file_obj = gz_open(file=file_path, mode='w')
471
472
473 except IOError:
474 message = sys.exc_info()[1]
475 raise RelaxError("Cannot open the file " + repr(file_path) + ". " + message.args[1] + ".")
476
477
478 if return_path:
479 return file_obj, file_path
480 else:
481 return file_obj
482
483
485 """Sort the given list in alphanumeric order. Should be equivalent to unix 'ls -n' command.
486
487 @keyword filenames: The list of file names to sort.
488 @type filenames: list of str
489 @keyword rev: Flag which if True will cause the list to be reversed.
490 @type rev: bool
491 @return: The sorted list of file names.
492 @rtype: list of str
493 """
494
495
496 convert = lambda text: int(text) if text.isdigit() else text
497
498
499 alphanum_key = lambda key: [ convert(c) for c in split('([0-9]+)', key) ]
500
501
502 filenames.sort( key=alphanum_key )
503
504
505 if rev:
506 return reversed(filenames)
507 else:
508 return filenames
509
510
511 -def strip(data, comments=True):
512 """Remove all comment and empty lines from the file data structure.
513
514 @param data: The file data to clean up.
515 @type data: list of lists of str
516 @keyword comments: A flag which if True will cause comments to be deleted.
517 @type comments: bool
518 @return: The input data with the empty and comment lines removed.
519 @rtype: list of lists of str
520 """
521
522
523 new = []
524
525
526 for i in range(len(data)):
527
528 if len(data[i]) == 0:
529 continue
530
531
532 if comments and search("^#", data[i][0]):
533 continue
534
535
536 new.append(data[i])
537
538
539 return new
540
541
543 """Swap one file name extension for another.
544
545 @keyword file: The name of the original file.
546 @type file: str
547 @keyword ext: The new file name extension to use.
548 @type ext: str
549 @return: The name of the file with the new file name extension.
550 @rtype: str
551 """
552
553
554 new_file = file_root(file)
555
556
557 new_file += '.'
558 new_file += ext
559
560
561 return new_file
562
563
565 """Function for testing that the binary string corresponds to a valid executable file.
566
567 @param binary: The name or path of the binary executable file.
568 @type binary: str
569 """
570
571
572 if altsep:
573 path_sep = '[' + sep + altsep + ']'
574 else:
575 path_sep = sep
576
577
578 if isfile(binary):
579
580 if not access(binary, F_OK):
581 raise RelaxMissingBinaryError(binary)
582
583
584 if not access(binary, X_OK):
585 raise RelaxNonExecError(binary)
586
587
588 else:
589
590 path = getenv('PATH')
591
592
593 path_list = path.split(pathsep)
594
595
596 for path in path_list:
597 if access(path + sep + binary, F_OK) or access(path + sep + binary +".exe", F_OK):
598 return
599
600
601 raise RelaxNoInPathError(binary)
602
603
604 -def write_data(out=None, headings=None, data=None, sep=None):
605 """Write out a table of the data to the given file handle.
606
607 @keyword out: The file handle to write to.
608 @type out: file handle
609 @keyword headings: The optional headings to print out.
610 @type headings: list of str or None
611 @keyword data: The data to print out.
612 @type data: list of list of str
613 @keyword sep: The column separator which, if None, defaults to whitespace.
614 @type sep: str or None
615 """
616
617
618 if data in [None, []]:
619 return
620
621
622 num_rows = len(data)
623 num_cols = len(data[0])
624
625
626 if sep == None:
627
628 widths = []
629 for j in range(num_cols):
630 if headings != None:
631 if j == 0:
632 widths.append(len(headings[j]) + 2)
633 else:
634 widths.append(len(headings[j]))
635
636
637 else:
638 widths.append(0)
639
640
641 for i in range(num_rows):
642 for j in range(num_cols):
643 size = len(data[i][j])
644 if size > widths[j]:
645 widths[j] = size
646
647
648 formats = []
649 for j in range(num_cols):
650 formats.append("%%-%ss" % (widths[j] + 4))
651
652
653 if headings != None:
654 out.write(formats[0] % ("# " + headings[0]))
655 for j in range(1, num_cols):
656 out.write(formats[j] % headings[j])
657 out.write('\n')
658
659
660 for i in range(num_rows):
661
662 for j in range(num_cols):
663 out.write(formats[j] % data[i][j])
664 out.write('\n')
665
666
667 else:
668
669 if headings != None:
670 out.write('#')
671 for j in range(num_cols):
672
673 if j > 0:
674 out.write(sep)
675
676
677 out.write(headings[j])
678 out.write('\n')
679
680
681 for i in range(num_rows):
682
683 for j in range(num_cols):
684
685 if j > 0:
686 out.write(sep)
687
688
689 out.write(data[i][j])
690 out.write('\n')
691
692
693
696 """Set up the dummy object to act as a file object."""
697
698
699 self.data = ''
700
701
702 self.closed = False
703
704
706 """A method for 'closing' this object."""
707
708
709 self.closed = True
710
711
713 """Mimic the file object write() method so that this class can be used as a file object.
714
715 @param str: The string to be written.
716 @type str: str
717 """
718
719
720 if self.closed:
721 raise ValueError('I/O operation on closed file')
722
723
724 self.data = self.data + str
725
726
728 """Mimic the file object readlines() method.
729
730 This method works even if this dummy file object is closed!
731
732
733 @return: The contents of the file object separated by newline characters.
734 @rtype: list of str
735 """
736
737
738 lines = self.data.split('\n')
739
740
741 if lines[-1] == '':
742 lines.pop()
743
744
745 for i in range(len(lines)):
746 lines[i] = lines[i] + '\n'
747
748
749 return lines
750
751
752
755 """Class for splitting an IO stream to two outputs."""
756
757
759 """Flush all streams."""
760
761
762 self.stream1.flush()
763 self.stream2.flush()
764
765
767 """Check that both streams are TTYs.
768
769 @return: True, only if both streams are TTYs.
770 @rtype: bool
771 """
772
773
774 return self.stream1.isatty() & self.stream2.isatty()
775
776
777 - def split(self, stream1, stream2):
778 """Function for setting the streams."""
779
780
781 self.stream1 = stream1
782 self.stream2 = stream2
783
784
786 """Replacement write function."""
787
788
789 self.stream1.write(text)
790
791
792 self.stream2.write(text)
793