1 from __future__ import absolute_import
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 """Module containing advanced IO functions for relax.
25
26 This includes IO redirection, automatic loading and writing of compressed files (both Gzip and BZ2 compression), reading and writing of files, processing of the contents of files, etc.
27 """
28
29
30 import sys
31 try:
32 import bz2
33 except ImportError:
34 bz2 = None
35 message = sys.exc_info()[1]
36 bz2_module_message = message.args[0]
37 from os import devnull
38 from os import F_OK, X_OK, access, altsep, getenv, makedirs, pathsep, remove, sep
39 from os.path import expanduser, basename, splitext, isfile
40 from re import search
41 from sys import stdin, stdout, stderr
42 from warnings import warn
43
44
45 from lib.check_types import is_filetype
46 from lib.compat import bz2_open, gz_open
47 from lib.errors import RelaxError, RelaxFileError, RelaxFileOverwriteError, RelaxMissingBinaryError, RelaxNoInPathError, RelaxNonExecError
48 from lib.warnings import RelaxWarning
49
50
51
52 -def delete(file_name, dir=None, fail=True):
53 """Deleting the given file, taking into account missing compression extensions.
54
55 @param file_name: The name of the file to delete.
56 @type file_name: str
57 @keyword dir: The directory containing the file.
58 @type dir: None or str
59 @keyword fail: A flag which if True will cause RelaxFileError to be raised.
60 @type fail: bool
61 @raises RelaxFileError: If the file does not exist, and fail is set to true.
62 """
63
64
65 file_path = get_file_path(file_name, dir)
66
67
68 if access(file_path, F_OK):
69 pass
70 elif access(file_path + '.bz2', F_OK):
71 file_path = file_path + '.bz2'
72 elif access(file_path + '.gz', F_OK):
73 file_path = file_path + '.gz'
74 elif fail:
75 raise RelaxFileError(file_path)
76 else:
77 return
78
79
80 remove(file_path)
81
82
84 """Function for determining the compression type, and for also testing if the file exists.
85
86 @param file_path: The full file path of the file.
87 @type file_path: str
88 @return: A tuple of the compression type and full path of the file (including its extension). A value of 0 corresponds to no compression. Bzip2 compression corresponds to a value of 1. Gzip compression corresponds to a value of 2.
89 @rtype: (int, str)
90 """
91
92
93 if access(file_path, F_OK):
94 compress_type = 0
95 if search('.bz2$', file_path):
96 compress_type = 1
97 elif search('.gz$', file_path):
98 compress_type = 2
99
100
101 elif access(file_path + '.bz2', F_OK):
102 file_path = file_path + '.bz2'
103 compress_type = 1
104
105
106 elif access(file_path + '.gz', F_OK):
107 file_path = file_path + '.gz'
108 compress_type = 2
109
110
111 else:
112 raise RelaxFileError(file_path)
113
114
115 return compress_type, file_path
116
117
119 """Return all data in the file as a list of lines where each line is a list of line elements.
120
121 @param file: The file to extract the data from.
122 @type file: str or file object
123 @param dir: The path where the file is located. If None and the file argument is a string, then the current directory is assumed.
124 @type dir: str or None
125 @param file_data: If the file data has already been extracted from the file, it can be passed into this function using this argument. If data is supplied here, then the file_name and dir args are ignored.
126 @type file_data: list of str
127 @param sep: The character separating the columns in the file data. If None, then whitespace is assumed.
128 @type sep: str
129 @return: The file data.
130 @rtype: list of lists of str
131 """
132
133
134 if not file_data:
135
136 if isinstance(file, str):
137 file = open_read_file(file_name=file, dir=dir)
138
139
140 file_data = file.readlines()
141
142
143 data = []
144 for i in range(len(file_data)):
145 if sep:
146 row = file_data[i].split(sep)
147 else:
148 row = file_data[i].split()
149 data.append(row)
150
151
152 if not file_data:
153 file.close()
154
155
156 return data
157
158
160 """Return the root file name, striped of path and extension details.
161
162 @param file_path: The full path to the file.
163 @type file_path: str
164 @return: The file root (with all directories and the extension stripped away).
165 @rtype: str
166 """
167
168
169 ext = None
170 while ext != '':
171 file_path, ext = splitext(file_path)
172
173
174 return basename(file_path)
175
176
178 """Generate and expand the full file path.
179
180 @param file_name: The name of the file to extract the data from.
181 @type file_name: str
182 @param dir: The path where the file is located. If None, then the current directory is assumed.
183 @type dir: str
184 @return: The full file path.
185 @rtype: str
186 """
187
188
189 file_path = file_name
190
191
192 if dir:
193 file_path = dir + sep + file_path
194
195
196 if file_path:
197 file_path = expanduser(file_path)
198
199
200 return file_path
201
202
204 """Restore all IO streams to the Python defaults.
205
206 @param verbosity: The verbosity level.
207 @type verbosity: int
208 """
209
210
211 if verbosity:
212 print("Restoring the sys.stdin IO stream to the Python STDIN IO stream.")
213 print("Restoring the sys.stdout IO stream to the Python STDOUT IO stream.")
214 print("Restoring the sys.stderr IO stream to the Python STDERR IO stream.")
215
216
217 sys.stdin = sys.__stdin__
218 sys.stdout = sys.__stdout__
219 sys.stderr = sys.__stderr__
220
221
223 """Turn on logging, sending both STDOUT and STDERR streams to a file.
224
225 @param file_name: The name of the file.
226 @type file_name: str
227 @param dir: The path where the file is located. If None, then the current directory is assumed.
228 @type dir: str
229 @param verbosity: The verbosity level.
230 @type verbosity: int
231 """
232
233
234 log_file, file_path = open_write_file(file_name=file_name, dir=dir, force=True, verbosity=verbosity, return_path=True)
235
236
237 log_stdin = stdin
238 log_stdout = None
239 log_stderr = SplitIO()
240
241
242 if verbosity:
243 print("Redirecting the sys.stdin IO stream to the Python stdin IO stream.")
244 print("Redirecting the sys.stdout IO stream to the log file '%s'." % file_path)
245 print("Redirecting the sys.stderr IO stream to both the Python stderr IO stream and the log file '%s'." % file_path)
246
247
248 log_stdout = log_file
249 log_stderr.split(stderr, log_file)
250
251
252 sys.stdin = log_stdin
253 sys.stdout = log_stdout
254 sys.stderr = log_stderr
255
256
257 -def io_streams_tee(file_name=None, dir=None, compress_type=0, verbosity=1):
258 """Turn on teeing to split both STDOUT and STDERR streams and sending second part to a file.
259
260 @param file_name: The name of the file.
261 @type file_name: str
262 @param dir: The path where the file is located. If None, then the current directory is assumed.
263 @type dir: str
264 @param compress_type: The compression type. The integer values correspond to the compression type: 0, no compression; 1, Bzip2 compression; 2, Gzip compression.
265 @type compress_type: int
266 @param verbosity: The verbosity level.
267 @type verbosity: int
268 """
269
270
271 tee_file, file_path = open_write_file(file_name=file_name, dir=dir, force=True, compress_type=compress_type, verbosity=verbosity, return_path=1)
272
273
274 tee_stdin = stdin
275 tee_stdout = SplitIO()
276 tee_stderr = SplitIO()
277
278
279 if verbosity:
280 print("Redirecting the sys.stdin IO stream to the Python stdin IO stream.")
281 print("Redirecting the sys.stdout IO stream to both the Python stdout IO stream and the log file '%s'." % file_path)
282 print("Redirecting the sys.stderr IO stream to both the Python stderr IO stream and the log file '%s'." % file_path)
283
284
285 tee_stdout.split(stdout, tee_file)
286 tee_stderr.split(stderr, tee_file)
287
288
289 sys.stdin = tee_stdin
290 sys.stdout = tee_stdout
291 sys.stderr = tee_stderr
292
293
295 """Create the given directory, or exit without raising an error if the directory exists.
296
297 @param dir: The directory to create.
298 @type dir: str
299 @param verbosity: The verbosity level.
300 @type verbosity: int
301 """
302
303
304 if dir == None:
305 return
306
307
308 dir = expanduser(dir)
309
310
311 try:
312 makedirs(dir)
313 except OSError:
314 if verbosity:
315 print("Directory ." + sep + dir + " already exists.\n")
316
317
319 """Open the file 'file' and return all the data.
320
321 @param file_name: The name of the file to extract the data from.
322 @type file_name: str
323 @param dir: The path where the file is located. If None, then the current directory is assumed.
324 @type dir: str
325 @param verbosity: The verbosity level.
326 @type verbosity: int
327 @return: The open file object.
328 @rtype: file object
329 """
330
331
332 if is_filetype(file_name):
333
334 return file_name
335
336
337 if not file_name and not isinstance(file_name, str):
338 raise RelaxError("The file name " + repr(file_name) + " " + repr(type(file_name)) + " is invalid and cannot be opened.")
339
340
341 file_path = get_file_path(file_name, dir)
342
343
344 compress_type, file_path = determine_compression(file_path)
345
346
347 try:
348
349 if verbosity:
350 print("Opening the file " + repr(file_path) + " for reading.")
351
352
353 if compress_type == 0:
354 file_obj = open(file_path, 'r')
355
356
357 elif compress_type == 1:
358 file_obj = bz2_open(file=file_path, mode='r')
359
360
361 elif compress_type == 2:
362 file_obj = gz_open(file=file_path, mode='r')
363
364
365 except IOError:
366 message = sys.exc_info()[1]
367 raise RelaxError("Cannot open the file " + repr(file_path) + ". " + message.args[1] + ".")
368
369
370 return file_obj
371
372
373 -def open_write_file(file_name=None, dir=None, force=False, compress_type=0, verbosity=1, return_path=False):
374 """Function for opening a file for writing and creating directories if necessary.
375
376 @param file_name: The name of the file to extract the data from.
377 @type file_name: str
378 @param dir: The path where the file is located. If None, then the current directory is assumed.
379 @type dir: str
380 @param force: Boolean argument which if True causes the file to be overwritten if it already exists.
381 @type force: bool
382 @param compress_type: The compression type. The integer values correspond to the compression type: 0, no compression; 1, Bzip2 compression; 2, Gzip compression. If no compression is given but the file name ends in '.gz' or '.bz2', then the compression will be automatically set.
383 @type compress_type: int
384 @param verbosity: The verbosity level.
385 @type verbosity: int
386 @param return_path: If True, the function will return a tuple of the file object and the full file path.
387 @type return_path: bool
388 @return: The open, writable file object and, if the return_path is True, then the full file path is returned as well.
389 @rtype: writable file object (if return_path, then a tuple of the writable file and the full file path)
390 """
391
392
393 if file_name == None:
394 raise RelaxError("The name of the file must be supplied.")
395
396
397 if is_filetype(file_name):
398
399 return file_name
400
401
402 if hasattr(file_name, 'write'):
403
404 return file_name
405
406
407 if search('devnull', file_name):
408
409 if verbosity:
410 print("Opening the null device file for writing.")
411
412
413 file_obj = open(devnull, 'w')
414
415
416 if return_path:
417 return file_obj, None
418 else:
419 return file_obj
420
421
422 mkdir_nofail(dir, verbosity=0)
423
424
425 file_path = get_file_path(file_name, dir)
426
427
428 if compress_type == 0:
429 if search('.bz2$', file_path):
430 compress_type = 1
431 elif search('.gz$', file_path):
432 compress_type = 2
433
434
435 if compress_type == 1 and not search('.bz2$', file_path):
436
437 if bz2:
438 file_path = file_path + '.bz2'
439
440
441 else:
442 warn(RelaxWarning("Cannot use Bzip2 compression, using gzip compression instead. " + bz2_module_message + "."))
443 compress_type = 2
444
445
446 if compress_type == 2 and not search('.gz$', file_path):
447 file_path = file_path + '.gz'
448
449
450 if access(file_path, F_OK) and not force:
451 raise RelaxFileOverwriteError(file_path, 'force flag')
452
453
454 try:
455
456 if verbosity:
457 print("Opening the file " + repr(file_path) + " for writing.")
458
459
460 if compress_type == 0:
461 file_obj = open(file_path, 'w')
462
463
464 elif compress_type == 1:
465 file_obj = bz2_open(file=file_path, mode='w')
466
467
468 elif compress_type == 2:
469 file_obj = gz_open(file=file_path, mode='w')
470
471
472 except IOError:
473 message = sys.exc_info()[1]
474 raise RelaxError("Cannot open the file " + repr(file_path) + ". " + message.args[1] + ".")
475
476
477 if return_path:
478 return file_obj, file_path
479 else:
480 return file_obj
481
482
483 -def strip(data, comments=True):
484 """Remove all comment and empty lines from the file data structure.
485
486 @param data: The file data to clean up.
487 @type data: list of lists of str
488 @keyword comments: A flag which if True will cause comments to be deleted.
489 @type comments: bool
490 @return: The input data with the empty and comment lines removed.
491 @rtype: list of lists of str
492 """
493
494
495 new = []
496
497
498 for i in range(len(data)):
499
500 if len(data[i]) == 0:
501 continue
502
503
504 if comments and search("^#", data[i][0]):
505 continue
506
507
508 new.append(data[i])
509
510
511 return new
512
513
515 """Function for testing that the binary string corresponds to a valid executable file.
516
517 @param binary: The name or path of the binary executable file.
518 @type binary: str
519 """
520
521
522 if altsep:
523 path_sep = '[' + sep + altsep + ']'
524 else:
525 path_sep = sep
526
527
528 if isfile(binary):
529
530 if not access(binary, F_OK):
531 raise RelaxMissingBinaryError(binary)
532
533
534 if not access(binary, X_OK):
535 raise RelaxNonExecError(binary)
536
537
538 else:
539
540 path = getenv('PATH')
541
542
543 path_list = path.split(pathsep)
544
545
546 for path in path_list:
547 if access(path + sep + binary, F_OK) or access(path + sep + binary +".exe", F_OK):
548 return
549
550
551 raise RelaxNoInPathError(binary)
552
553
554 -def write_data(out=None, headings=None, data=None, sep=None):
555 """Write out a table of the data to the given file handle.
556
557 @keyword out: The file handle to write to.
558 @type out: file handle
559 @keyword headings: The optional headings to print out.
560 @type headings: list of str or None
561 @keyword data: The data to print out.
562 @type data: list of list of str
563 @keyword sep: The column separator which, if None, defaults to whitespace.
564 @type sep: str or None
565 """
566
567
568 if data in [None, []]:
569 return
570
571
572 num_rows = len(data)
573 num_cols = len(data[0])
574
575
576 if sep == None:
577
578 widths = []
579 for j in range(num_cols):
580 if headings != None:
581 if j == 0:
582 widths.append(len(headings[j]) + 2)
583 else:
584 widths.append(len(headings[j]))
585
586
587 else:
588 widths.append(0)
589
590
591 for i in range(num_rows):
592 for j in range(num_cols):
593 size = len(data[i][j])
594 if size > widths[j]:
595 widths[j] = size
596
597
598 formats = []
599 for j in range(num_cols):
600 formats.append("%%-%ss" % (widths[j] + 4))
601
602
603 if headings != None:
604 out.write(formats[0] % ("# " + headings[0]))
605 for j in range(1, num_cols):
606 out.write(formats[j] % headings[j])
607 out.write('\n')
608
609
610 for i in range(num_rows):
611
612 for j in range(num_cols):
613 out.write(formats[j] % data[i][j])
614 out.write('\n')
615
616
617 else:
618
619 if headings != None:
620 out.write('#')
621 for j in range(num_cols):
622
623 if j > 0:
624 out.write(sep)
625
626
627 out.write(headings[j])
628 out.write('\n')
629
630
631 for i in range(num_rows):
632
633 for j in range(num_cols):
634
635 if j > 0:
636 out.write(sep)
637
638
639 out.write(data[i][j])
640 out.write('\n')
641
642
643
646 """Set up the dummy object to act as a file object."""
647
648
649 self.data = ''
650
651
652 self.closed = False
653
654
656 """A method for 'closing' this object."""
657
658
659 self.closed = True
660
661
663 """Mimic the file object write() method so that this class can be used as a file object.
664
665 @param str: The string to be written.
666 @type str: str
667 """
668
669
670 if self.closed:
671 raise ValueError('I/O operation on closed file')
672
673
674 self.data = self.data + str
675
676
678 """Mimic the file object readlines() method.
679
680 This method works even if this dummy file object is closed!
681
682
683 @return: The contents of the file object separated by newline characters.
684 @rtype: list of str
685 """
686
687
688 lines = self.data.split('\n')
689
690
691 if lines[-1] == '':
692 lines.pop()
693
694
695 for i in range(len(lines)):
696 lines[i] = lines[i] + '\n'
697
698
699 return lines
700
701
702
705 """Class for splitting an IO stream to two outputs."""
706
707
709 """Flush all streams."""
710
711
712 self.stream1.flush()
713 self.stream2.flush()
714
715
717 """Check that both streams are TTYs.
718
719 @return: True, only if both streams are TTYs.
720 @rtype: bool
721 """
722
723
724 return self.stream1.isatty() & self.stream2.isatty()
725
726
727 - def split(self, stream1, stream2):
728 """Function for setting the streams."""
729
730
731 self.stream1 = stream1
732 self.stream2 = stream2
733
734
736 """Replacement write function."""
737
738
739 self.stream1.write(text)
740
741
742 self.stream2.write(text)
743