1 """
2 Classes for dealing with STAR syntax
3 """
4 from bmrblib.pystarlib.Text import pattern_quotes_needed
5 from bmrblib.pystarlib.Text import quotes_add
6 from bmrblib.pystarlib.Text import pattern_quotes_needed_2
7 from bmrblib.pystarlib.Text import pattern_quoted
8 from bmrblib.pystarlib.Text import tag_value_quoted_parse
9 from bmrblib.pystarlib.Text import pattern_tags_loop
10 from bmrblib.pystarlib.Text import pattern_tags_loop_2
11 from bmrblib.pystarlib.Text import pattern_tagname_2
12 from bmrblib.pystarlib.Text import pattern_tagtable_stop_2
13 from bmrblib.pystarlib.Text import pattern_tagtable_loop_2
14 from bmrblib.pystarlib.Text import pattern_unquoted_find
15 from bmrblib.pystarlib.Text import pattern_tag_name
16 from bmrblib.pystarlib.Text import tag_value_parse
17 from bmrblib.pystarlib.Utils import Lister
18 from bmrblib.pystarlib.Utils import transpose
19
20 import types
21 import re
22
23
24 """
25 Looped and free tags can not be mixed in same object.
26 """
28 """
29 In initializing the class a content has to be given!!!
30 If not then the class will make something up and it won't
31 be pretty but it will be following legal syntax. Usual
32 case is to call the parse method with some text and position.
33 """
34 - def __init__( self,
35 free = None,
36 title = '',
37 tagnames = None,
38 tagvalues = None,
39 verbosity = 2
40 ):
41 self.free = free
42 self.title = title
43
44
45
46
47 self.tagnames = tagnames
48 if self.tagnames == None:
49 self.tagnames = [ '_Dummy_tag' ]
50
51 self.tagvalues = tagvalues
52 if self.tagvalues == None:
53 self.tagvalues = [ [None] ]
54
55 self.verbosity = verbosity
56
57 "Returns the STAR text representation"
58 - def star_text ( self,
59 flavor = 'NMR-STAR'
60 ):
61
62 if flavor == None or flavor == 'NMR-STAR':
63
64 loop_ident_size = 3
65 elif flavor == 'mmCIF':
66 loop_ident_size = 0
67 else:
68 print('ERROR: Unknown flavor of STAR given', flavor)
69 return 1
70
71 free_ident_size = loop_ident_size
72 tagnames_ident_size = loop_ident_size + 3
73 show_stop_tag = 1
74
75 str = ''
76 count = -1
77 count_hash = 100000
78
79
80 if self.free:
81 i = 0
82 for tagname in self.tagnames:
83
84 if pattern_quotes_needed.search( self.tagvalues[i][0] ):
85 tagvalue = quotes_add( self.tagvalues[i][0] )
86 else:
87 tagvalue = self.tagvalues[i][0]
88 str = str + free_ident_size * ' ' + "%s %s" % ( tagname, tagvalue )
89 if tagvalue[ -1 ] != '\n':
90 str = str + '\n'
91 i = i + 1
92 return str
93
94
95 str = str + loop_ident_size * ' ' + 'loop_\n'
96
97 for tagname in self.tagnames:
98
99 str = str + tagnames_ident_size * ' ' + '%s\n' % tagname
100 str = str + '\n'
101
102 col_count = len( self.tagnames )
103 row_count = len( self.tagvalues[0] )
104 col_range = list(range( col_count))
105 row_range = list(range( row_count))
106
107 str_row = []
108 row_id = 0
109 tag_id = 0
110
111
112
113
114
115
116 tagvalues_tr = transpose( self.tagvalues )
117
118 for row_id in row_range:
119
120 str_tmp = ','.join(tagvalues_tr[row_id])
121
122
123 match_quotes_needed_2 = pattern_quotes_needed_2.search( str_tmp )
124 if match_quotes_needed_2:
125 str_tmp = ''
126 for col_id in col_range:
127
128 if pattern_quotes_needed.search( self.tagvalues[col_id][row_id] ):
129 str_tmp = str_tmp + '%s ' % quotes_add( self.tagvalues[col_id][row_id] )
130 else:
131 str_tmp = str_tmp + '%s ' % self.tagvalues[col_id][row_id]
132 else:
133 str_tmp = ' '.join(tagvalues_tr[row_id])
134
135 str_row.append( str_tmp )
136
137
138 if self.verbosity > 1:
139 tag_id = tag_id + col_count
140 if tag_id - count > count_hash:
141 count = tag_id
142 if self.verbosity >= 9:
143 print('##### %s looped tag values collected ######' % count_hash)
144
145 if show_stop_tag:
146 str_row.append( '\n' + loop_ident_size * ' ' + 'stop_\n' )
147
148 str = str + '\n'.join(str_row)
149
150
151 del tagvalues_tr
152
153 return str
154
155 """
156 A title identifing a tagtable by its tagnames
157 simply the space separated concatenation of the tag names
158 """
160 if self.verbosity >= 9:
161 print('Setting title of tagtable')
162 self.title = ''.join(self.tagnames)
163
164
165 """
166 Size and type checks to be extended
167 0 Only fast checks
168 9 Type checks of each element
169 """
171
172 names_length = len(self.tagnames)
173 values_length = len(self.tagvalues)
174
175 if names_length != values_length:
176 print("ERROR: names_length[%s] != values_length[%s]:" % (
177 names_length, values_length ))
178 print("ERROR: names:", self.tagnames)
179 return 1
180
181 column_length_first = len( self.tagvalues[ 0 ] )
182 for tag_id in range( values_length ):
183 if len( self.tagvalues[ tag_id ] ) != column_length_first:
184 print("ERROR: length column[%s](%s) is not the same as" % (
185 self.tagnames[ tag_id],
186 len( self.tagvalues[ tag_id ] ) ))
187 print("ERROR: length column[%s](%s)" % (
188 self.tagnames[ 0],
189 column_length_first ))
190 return 1
191
192 if check_type >= 9:
193 cols = list(range( names_length))
194 rows = list(range( column_length_first))
195 for row_id in rows:
196 for col_id in cols:
197 val_type = type(self.tagvalues[col_id][row_id])
198 if val_type != bytes:
199 print("ERROR: type %s is not allowed as a value in a tagtable" % val_type)
200 print("ERROR: found for tagtable[%s][%s]" % ( self.tagnames[ col_id ], row_id ))
201 return 1
202
203 if self.verbosity >= 9:
204 print('Checked integrity of TagTable (%2s names %4s values each): OK [%s]' % (
205 names_length, column_length_first, self.title ))
206 return 0
207
208
209 """
210 - Parses text into a tagtable.
211 - Returns the position in the string with the first non-white space
212 character after the tagtable or the length of the text in case all
213 was parsed. Just to be verbose, if the tagtable is ended by a save_
214 then the starting position of the save_ will be returned.
215 - Assumption here is that ;; blocks are collapsed, see Text functions
216 - For speed purposes I scan ahead to see how far I can go before
217 hitting a quoted tag value. I estimate in the large tables only 1 in
218 1000 has a ;; block and only 1 in 5-10 has '' or "" block. For the part
219 that is not quoted the parsing can be really fast.
220 """
221 - def parse( self,
222 text = '',
223 pos = 0 ):
224
225 if self.free:
226 pos = self._tagtable_free_parse( text, pos )
227 if pos == None:
228 print("ERROR: tagtable_free_parse returned with ERROR")
229 return None
230 else:
231 return pos
232 if self.check_integrity():
233 print("ERROR: integrity of parsed table is not ok")
234 return None
235
236
237
238 match_tags_loop = pattern_tags_loop.search(text, pos)
239 if not match_tags_loop:
240 print("ERROR: No tag names found for looped tagtable")
241 return None
242
243
244 match_tags_loop_2 = pattern_tags_loop_2.findall(text,
245 pos,
246 match_tags_loop.end() )
247 for m in match_tags_loop_2:
248 self.tagnames.append( m )
249 pos = match_tags_loop.end()
250
251
252
253
254
255
256
257
258
259 text_length = len(text)
260 if pos == text_length:
261 print("ERROR: No tag values found for looped tagtable")
262 return None
263
264
265 pos_tagtable_loop = pattern_unquoted_find(text, pattern_tagtable_loop_2, pos)
266 pos_tagtable_stop = pattern_unquoted_find(text, pattern_tagtable_stop_2, pos)
267 pos_tagname = pattern_unquoted_find(text, pattern_tagname_2, pos)
268
269
270
271 pos_end = text_length
272 if pos_tagtable_loop != -1 and pos_tagtable_loop<pos_end:
273 pos_end = pos_tagtable_loop + 1
274 if pos_tagtable_stop != -1 and pos_tagtable_stop<pos_end:
275 pos_end = pos_tagtable_stop + 1
276 if pos_tagname != -1 and pos_tagname<pos_end:
277 pos_end = pos_tagname + 1
278
279 if self.verbosity >= 9:
280 print('pos_tagtable_loop:', pos_tagtable_loop)
281 print('pos_tagtable_stop:', pos_tagtable_stop)
282 print('pos_tagname :', pos_tagname)
283 print('Will parse tagtable text to end at position: [%s]' % pos_end)
284
285
286 if not ( pos_tagtable_loop!=-1 or pos_tagtable_stop!=-1 or pos_tagname!=-1 ):
287 if self.verbosity > 1:
288 pass
289
290
291
292
293
294
295
296
297 if self._tagtable_loop_values_parse(
298 text, pos, pos_end):
299 print("ERROR: not parsed table")
300 return None
301
302
303 pos = pos_end
304
305
306 if pos_tagtable_stop != -1:
307
308
309 match_tagtable_stop = pattern_tagtable_stop_2.search( text, pos-1 )
310 if not match_tagtable_stop:
311 print("ERROR: no stop_ on second try")
312 return None
313 pos = match_tagtable_stop.end()
314
315 if self.check_integrity():
316 print("ERROR: integrity of parsed table is not ok")
317 return None
318 return pos
319
320
321 """
322 Parse names and values of free tagtable loop from pos
323 returns new position alias status (None for failure)
324 """
326
327 text_length = len(text)
328
329 while pos < text_length - 1:
330 if text[pos] != '_':
331 break
332
333 match_tag_name = pattern_tag_name.search(text, pos)
334 if match_tag_name:
335 if ( match_tag_name.start() - pos ) != 0:
336 print("ERROR: looking for a free tag name (0)")
337 return None
338 else:
339 print("ERROR: looking for a free tag name(1)")
340 return None
341 self.tagnames.append( match_tag_name.group(1) )
342 pos = match_tag_name.end()
343
344 value, pos = tag_value_parse(text, pos)
345 if pos == 0:
346 print("ERROR: looking for a free tag name(1)")
347 return None
348
349 self.tagvalues.append( [ value ] )
350 if self.verbosity >= 9:
351 print('**Parsed tag name : [%s] and value [%s]: ' % (
352 match_tag_name.group(1), value))
353 self.set_title()
354 return pos
355
356
357 """
358 Parse values of tagtable loop from pos to pos_end
359 returns status (None for success, 1 for failure)
360 """
362
363 if self.free:
364 print("ERROR: This is a 'free' tagtable, only looped tagtable can be parsed")
365 return 1
366 names_length = len(self.tagnames)
367
368 self.tagvalues = []
369 for dummy in range( names_length ):
370 self.tagvalues.append( [] )
371
372
373 match_white_space = re.compile('\s+').search( text, pos, pos_end )
374 if match_white_space:
375 if match_white_space.start() == 0:
376 pos = match_white_space.end()
377
378 tag_id = 0
379 count = 0
380 count_hash = 100000
381 text_length = len(text)
382
383
384 while pos < pos_end:
385 if self.verbosity > 2:
386 if pos - count > count_hash:
387 print('DEBUG: ##### %s chars processed ######' % count_hash)
388 count = pos
389
390 match_quoted = pattern_quoted.search( text, pos, pos_end )
391 if match_quoted:
392 if match_quoted.start() == pos:
393
394 value, pos = tag_value_quoted_parse( text, pos )
395 if pos == None:
396 print('ERROR: got error in parse (1)')
397 return 1
398 if pos > pos_end:
399 print('ERROR: found a quoted value that was not wholly within boundaries (1)')
400 return 1
401 self.tagvalues[ tag_id ].append( value )
402 tag_id += 1
403 if tag_id == names_length:
404 tag_id = 0
405 else:
406
407
408
409
410 idxstart = match_quoted.start()
411 c = text[idxstart]
412 bc = text[idxstart-1]
413 if (c == "'" or c == '"') and bc != " ":
414
415
416 tempendpos = text.find(' ', idxstart)
417 else:
418 tempendpos = idxstart
419
420
421
422
423 for t in text[pos:tempendpos].split():
424 self.tagvalues[tag_id].append( t )
425 tag_id += 1
426 if tag_id == names_length:
427 tag_id = 0
428 if tempendpos == match_quoted.start():
429
430 pos = tempendpos
431 value, pos = tag_value_quoted_parse( text, pos )
432 if pos == None:
433 print('ERROR: got error in parse (2)')
434 return 1
435 if pos > pos_end:
436 print('ERROR: found a quoted value that was not wholly within boundaries (2)')
437 return 1
438 self.tagvalues[ tag_id ].append( value )
439 tag_id += 1
440 if tag_id == names_length:
441 tag_id = 0
442 else:
443 pos = tempendpos
444 else:
445 for t in text[pos:pos_end].split():
446 self.tagvalues[tag_id].append( t )
447 tag_id += 1
448 if tag_id == names_length:
449 tag_id = 0
450 pos = text_length
451
452 col_length = len( self.tagvalues[-1] )
453 if tag_id != 0:
454 print("ERROR: not correct number of tag values read")
455 print("Read [%s] tag(s) that is:" \
456 % ( col_length * names_length + tag_id ))
457 print("[%s] row(s) complete and [%s] tag value(s) in last row that is incomplete." \
458 % ( col_length, tag_id ))
459 print("Tag names of this table are:")
460 print(self.tagnames)
461 for xxx in range(0, len(self.tagvalues[0])):
462 for yyy in range(0, len(self.tagvalues)):
463 print(self.tagvalues[yyy][xxx])
464 print('-----------------------------------------------')
465 pos = 0
466 while pos < tag_id:
467 print(self.tagvalues[pos][-1])
468 pos = pos + 1
469 return 1
470
471 if col_length == 0:
472 print("ERROR: no tag values parsed")
473 return 1
474
475
476 self.set_title()
477 return None
478