1  """ 
  2  Classes for dealing with STAR syntax 
  3  """ 
  4  from bmrblib.pystarlib.Text import pattern_quotes_needed 
  5  from bmrblib.pystarlib.Text import quotes_add 
  6  from bmrblib.pystarlib.Text import pattern_quotes_needed_2 
  7  from bmrblib.pystarlib.Text import pattern_quoted 
  8  from bmrblib.pystarlib.Text import tag_value_quoted_parse 
  9  from bmrblib.pystarlib.Text import pattern_tags_loop 
 10  from bmrblib.pystarlib.Text import pattern_tags_loop_2 
 11  from bmrblib.pystarlib.Text import pattern_tagname_2 
 12  from bmrblib.pystarlib.Text import pattern_tagtable_stop_2 
 13  from bmrblib.pystarlib.Text import pattern_tagtable_loop_2 
 14  from bmrblib.pystarlib.Text import pattern_unquoted_find 
 15  from bmrblib.pystarlib.Text import pattern_tag_name 
 16  from bmrblib.pystarlib.Text import tag_value_parse 
 17  from bmrblib.pystarlib.Utils import Lister 
 18  from bmrblib.pystarlib.Utils import transpose 
 19   
 20  import types 
 21  import re 
 22   
 23   
 24  """ 
 25  Looped and free tags can not be mixed in same object. 
 26  """ 
 28      """ 
 29      In initializing the class a content has to be given!!! 
 30      If not then the class will make something up and it won't 
 31      be pretty but it will be following legal syntax. Usual 
 32      case is to call the parse method with some text and position. 
 33      """ 
 34 -    def __init__( self, 
 35                    free      = None, 
 36                    title     = '', 
 37                    tagnames  =  None, 
 38                    tagvalues =  None, 
 39                    verbosity = 2 
 40                    ): 
  41          self.free       = free 
 42          self.title      = title 
 43           
 44           
 45           
 46           
 47          self.tagnames = tagnames 
 48          if self.tagnames == None: 
 49            self.tagnames  = [ '_Dummy_tag' ] 
 50             
 51          self.tagvalues = tagvalues 
 52          if self.tagvalues == None: 
 53            self.tagvalues  = [ [None] ] 
 54             
 55          self.verbosity  = verbosity 
  56       
 57      "Returns the STAR text representation" 
 58 -    def star_text ( self, 
 59                      flavor                  = 'NMR-STAR' 
 60                     ): 
  61           
 62          if flavor == None or flavor == 'NMR-STAR': 
 63               
 64              loop_ident_size     = 3 
 65          elif flavor == 'mmCIF': 
 66              loop_ident_size     = 0 
 67          else: 
 68              print('ERROR: Unknown flavor of STAR given', flavor) 
 69              return 1 
 70                   
 71          free_ident_size         = loop_ident_size 
 72          tagnames_ident_size     = loop_ident_size + 3 
 73          show_stop_tag           = 1 
 74           
 75          str         = '' 
 76          count       = -1 
 77          count_hash  = 100000  
 78           
 79           
 80          if self.free: 
 81              i = 0 
 82              for tagname in self.tagnames: 
 83                   
 84                  if pattern_quotes_needed.search( self.tagvalues[i][0] ): 
 85                      tagvalue = quotes_add( self.tagvalues[i][0] ) 
 86                  else: 
 87                      tagvalue = self.tagvalues[i][0] 
 88                  str = str + free_ident_size * ' ' + "%s %s" % ( tagname, tagvalue ) 
 89                  if tagvalue[ -1 ] != '\n': 
 90                      str = str + '\n' 
 91                  i = i + 1 
 92              return str 
 93           
 94           
 95          str = str + loop_ident_size * ' ' + 'loop_\n' 
 96   
 97          for tagname in self.tagnames: 
 98               
 99              str = str + tagnames_ident_size * ' ' + '%s\n' % tagname 
100          str = str + '\n' 
101               
102          col_count = len( self.tagnames ) 
103          row_count = len( self.tagvalues[0] ) 
104          col_range = list(range( col_count)) 
105          row_range = list(range( row_count)) 
106   
107          str_row = [] 
108          row_id = 0 
109          tag_id = 0 
110           
111           
112           
113           
114           
115           
116          tagvalues_tr = transpose( self.tagvalues ) 
117           
118          for row_id in row_range: 
119   
120              str_tmp = ','.join(tagvalues_tr[row_id]) 
121   
122               
123              match_quotes_needed_2 = pattern_quotes_needed_2.search( str_tmp ) 
124              if match_quotes_needed_2: 
125                  str_tmp = '' 
126                  for col_id in col_range: 
127                       
128                      if pattern_quotes_needed.search( self.tagvalues[col_id][row_id] ): 
129                          str_tmp = str_tmp + '%s ' % quotes_add( self.tagvalues[col_id][row_id] ) 
130                      else: 
131                          str_tmp = str_tmp + '%s ' % self.tagvalues[col_id][row_id] 
132              else: 
133                  str_tmp = ' '.join(tagvalues_tr[row_id]) 
134   
135              str_row.append( str_tmp ) 
136   
137               
138              if self.verbosity > 1: 
139                  tag_id = tag_id + col_count 
140                  if tag_id - count > count_hash: 
141                      count = tag_id 
142                      if self.verbosity >= 9: 
143                          print('##### %s looped tag values collected ######' % count_hash) 
144                                   
145          if show_stop_tag: 
146              str_row.append( '\n' + loop_ident_size * ' ' + 'stop_\n' ) 
147   
148          str = str + '\n'.join(str_row) 
149   
150           
151          del tagvalues_tr 
152           
153          return str 
 154       
155      """ 
156      A title identifing a tagtable by its tagnames 
157      simply the space separated concatenation of the tag names 
158      """ 
160          if self.verbosity >= 9: 
161              print('Setting title of tagtable') 
162          self.title = ''.join(self.tagnames) 
 163   
164                   
165      """ 
166      Size and type checks to be extended 
167      0 Only fast checks 
168      9 Type checks of each element 
169      """ 
171   
172          names_length    = len(self.tagnames) 
173          values_length   = len(self.tagvalues) 
174   
175          if names_length != values_length: 
176              print("ERROR: names_length[%s] != values_length[%s]:" % ( 
177                  names_length, values_length )) 
178              print("ERROR: names:", self.tagnames) 
179              return 1 
180   
181          column_length_first = len( self.tagvalues[ 0 ] )             
182          for tag_id in range( values_length ): 
183              if len( self.tagvalues[ tag_id ] ) != column_length_first: 
184                  print("ERROR: length column[%s](%s) is not the same as" % ( 
185                              self.tagnames[ tag_id], 
186                              len( self.tagvalues[ tag_id ] ) )) 
187                  print("ERROR: length column[%s](%s)" % ( 
188                              self.tagnames[ 0], 
189                              column_length_first )) 
190                  return 1 
191   
192          if check_type >= 9: 
193              cols = list(range( names_length)) 
194              rows = list(range( column_length_first)) 
195              for row_id in rows: 
196                  for col_id in cols: 
197                      val_type = type(self.tagvalues[col_id][row_id]) 
198                      if val_type !=  bytes: 
199                          print("ERROR: type %s is not allowed as a value in a tagtable" % val_type) 
200                          print("ERROR: found for tagtable[%s][%s]" % ( self.tagnames[ col_id ], row_id )) 
201                          return 1 
202   
203          if self.verbosity >= 9: 
204              print('Checked integrity of TagTable (%2s names %4s values each): OK [%s]' % ( 
205                  names_length, column_length_first, self.title )) 
206          return 0 
 207           
208   
209      """ 
210      - Parses text into a tagtable. 
211      - Returns the position in the string with the first non-white space 
212      character after the tagtable or the length of the text in case all 
213      was parsed. Just to be verbose, if the tagtable is ended by a save_ 
214      then the starting position of the save_ will be returned. 
215      - Assumption here is that ;; blocks are collapsed, see Text functions 
216      - For speed purposes I scan ahead to see how far I can go before 
217      hitting a quoted tag value. I estimate in the large tables only 1 in 
218      1000 has a ;; block and only 1 in 5-10 has '' or "" block. For the part 
219      that is not quoted the parsing can be really fast. 
220      """ 
221 -    def parse(  self, 
222                  text      = '', 
223                  pos       = 0 ): 
 224           
225          if self.free: 
226              pos = self._tagtable_free_parse( text, pos ) 
227              if pos == None: 
228                  print("ERROR: tagtable_free_parse returned with ERROR") 
229                  return None 
230              else: 
231                  return pos 
232              if self.check_integrity(): 
233                  print("ERROR: integrity of parsed table is not ok") 
234                  return None 
235               
236           
237           
238          match_tags_loop = pattern_tags_loop.search(text, pos) 
239          if not match_tags_loop: 
240              print("ERROR: No tag names found for looped tagtable") 
241              return None 
242   
243           
244          match_tags_loop_2 = pattern_tags_loop_2.findall(text, 
245                                  pos, 
246                                  match_tags_loop.end() ) 
247          for m in match_tags_loop_2: 
248              self.tagnames.append( m ) 
249          pos = match_tags_loop.end() 
250   
251           
252           
253           
254           
255           
256           
257           
258       
259          text_length = len(text) 
260          if pos == text_length: 
261              print("ERROR: No tag values found for looped tagtable") 
262              return None 
263   
264   
265          pos_tagtable_loop = pattern_unquoted_find(text, pattern_tagtable_loop_2, pos) 
266          pos_tagtable_stop = pattern_unquoted_find(text, pattern_tagtable_stop_2, pos) 
267          pos_tagname       = pattern_unquoted_find(text, pattern_tagname_2, pos) 
268   
269           
270           
271          pos_end = text_length 
272          if pos_tagtable_loop != -1 and pos_tagtable_loop<pos_end: 
273              pos_end = pos_tagtable_loop + 1 
274          if pos_tagtable_stop != -1 and pos_tagtable_stop<pos_end: 
275              pos_end = pos_tagtable_stop + 1 
276          if pos_tagname != -1 and pos_tagname<pos_end: 
277              pos_end = pos_tagname + 1 
278   
279          if self.verbosity >= 9: 
280              print('pos_tagtable_loop:', pos_tagtable_loop) 
281              print('pos_tagtable_stop:', pos_tagtable_stop) 
282              print('pos_tagname      :', pos_tagname) 
283              print('Will parse tagtable text to end at position: [%s]' % pos_end) 
284               
285           
286          if not ( pos_tagtable_loop!=-1 or pos_tagtable_stop!=-1 or pos_tagname!=-1 ): 
287              if self.verbosity > 1: 
288                  pass 
289   
290   
291   
292   
293   
294   
295           
296           
297          if self._tagtable_loop_values_parse( 
298                  text, pos, pos_end):  
299              print("ERROR: not parsed table") 
300              return None 
301           
302           
303          pos = pos_end 
304           
305           
306          if pos_tagtable_stop != -1: 
307               
308               
309              match_tagtable_stop = pattern_tagtable_stop_2.search( text, pos-1 ) 
310              if not match_tagtable_stop: 
311                  print("ERROR: no stop_ on second try") 
312                  return None 
313              pos = match_tagtable_stop.end() 
314           
315          if self.check_integrity(): 
316              print("ERROR: integrity of parsed table is not ok") 
317              return None 
318          return pos 
 319   
320   
321      """ 
322      Parse names and values of free tagtable loop from pos 
323      returns new position alias status (None for failure) 
324      """ 
326           
327          text_length = len(text) 
328   
329          while pos < text_length - 1: 
330              if text[pos] != '_': 
331                  break 
332               
333              match_tag_name = pattern_tag_name.search(text, pos) 
334              if match_tag_name: 
335                  if ( match_tag_name.start() - pos ) != 0: 
336                      print("ERROR: looking for a free tag name (0)") 
337                      return None 
338              else: 
339                  print("ERROR: looking for a free tag name(1)") 
340                  return None 
341              self.tagnames.append( match_tag_name.group(1) ) 
342              pos  = match_tag_name.end() 
343               
344              value, pos = tag_value_parse(text, pos) 
345              if pos == 0: 
346                  print("ERROR: looking for a free tag name(1)") 
347                  return None 
348               
349              self.tagvalues.append( [ value ] )  
350              if self.verbosity >= 9: 
351                  print('**Parsed tag name : [%s] and value [%s]: ' % ( 
352                      match_tag_name.group(1), value)) 
353          self.set_title() 
354          return pos 
 355   
356   
357      """ 
358      Parse values of tagtable loop from pos to pos_end 
359      returns status (None for success, 1 for failure) 
360      """ 
362           
363          if self.free: 
364              print("ERROR: This is a 'free' tagtable, only looped tagtable can be parsed") 
365              return 1 
366          names_length                = len(self.tagnames) 
367           
368          self.tagvalues   = []         
369          for dummy in range( names_length ): 
370              self.tagvalues.append( [] ) 
371   
372           
373          match_white_space = re.compile('\s+').search( text, pos, pos_end ) 
374          if match_white_space: 
375              if match_white_space.start() == 0:  
376                  pos = match_white_space.end() 
377               
378          tag_id          = 0 
379          count           = 0           
380          count_hash      = 100000 
381          text_length     = len(text) 
382   
383           
384          while pos < pos_end: 
385              if self.verbosity > 2: 
386                  if pos - count > count_hash: 
387                      print('DEBUG: ##### %s chars processed ######' % count_hash) 
388                      count = pos 
389               
390              match_quoted = pattern_quoted.search( text, pos, pos_end )             
391              if match_quoted:                 
392                  if match_quoted.start() == pos:  
393                       
394                      value, pos = tag_value_quoted_parse( text, pos ) 
395                      if pos ==  None: 
396                          print('ERROR: got error in parse (1)') 
397                          return 1 
398                      if pos > pos_end: 
399                          print('ERROR: found a quoted value that was not wholly within boundaries (1)')                         
400                          return 1 
401                      self.tagvalues[ tag_id ].append( value ) 
402                      tag_id += 1 
403                      if tag_id == names_length:   
404                          tag_id = 0 
405                  else:       
406                       
407                       
408                       
409                       
410                      idxstart = match_quoted.start() 
411                      c = text[idxstart] 
412                      bc = text[idxstart-1] 
413                      if (c == "'" or c == '"') and bc != " ": 
414                           
415   
416                          tempendpos = text.find(' ', idxstart) 
417                      else:                     
418                          tempendpos = idxstart 
419   
420                       
421                       
422                       
423                      for t in text[pos:tempendpos].split(): 
424                          self.tagvalues[tag_id].append( t ) 
425                          tag_id += 1 
426                          if tag_id == names_length:  
427                              tag_id = 0                                         
428                      if tempendpos == match_quoted.start(): 
429                           
430                          pos = tempendpos 
431                          value, pos = tag_value_quoted_parse( text, pos ) 
432                          if pos ==  None: 
433                              print('ERROR: got error in parse (2)') 
434                              return 1 
435                          if pos > pos_end: 
436                              print('ERROR: found a quoted value that was not wholly within boundaries (2)') 
437                              return 1 
438                          self.tagvalues[ tag_id ].append( value ) 
439                          tag_id += 1 
440                          if tag_id == names_length:   
441                              tag_id = 0 
442                      else: 
443                          pos = tempendpos 
444              else:  
445                  for t in text[pos:pos_end].split(): 
446                      self.tagvalues[tag_id].append( t ) 
447                      tag_id += 1 
448                      if tag_id == names_length:  
449                          tag_id = 0 
450                  pos = text_length  
451               
452          col_length = len( self.tagvalues[-1] )     
453          if tag_id != 0: 
454              print("ERROR: not correct number of tag values read") 
455              print("Read [%s] tag(s) that is:" \ 
456                    % ( col_length * names_length + tag_id )) 
457              print("[%s] row(s) complete and [%s] tag value(s) in last row that is incomplete." \ 
458                    % ( col_length, tag_id )) 
459              print("Tag names of this table are:") 
460              print(self.tagnames)             
461              for xxx in range(0, len(self.tagvalues[0])):              
462                  for yyy in range(0, len(self.tagvalues)): 
463                      print(self.tagvalues[yyy][xxx])               
464                  print('-----------------------------------------------')               
465              pos = 0 
466              while pos < tag_id: 
467                   print(self.tagvalues[pos][-1]) 
468                   pos = pos + 1             
469              return 1 
470   
471          if col_length == 0: 
472              print("ERROR: no tag values parsed") 
473              return 1 
474   
475           
476          self.set_title() 
477          return None 
 478