Package lib :: Module selection
[hide private]
[frames] | no frames]

Source Code for Module lib.selection

  1  from __future__ import absolute_import 
  2  ############################################################################### 
  3  #                                                                             # 
  4  # Copyright (C) 2003-2013 Edward d'Auvergne                                   # 
  5  #                                                                             # 
  6  # This file is part of the program relax (http://www.nmr-relax.com).          # 
  7  #                                                                             # 
  8  # This program is free software: you can redistribute it and/or modify        # 
  9  # it under the terms of the GNU General Public License as published by        # 
 10  # the Free Software Foundation, either version 3 of the License, or           # 
 11  # (at your option) any later version.                                         # 
 12  #                                                                             # 
 13  # This program is distributed in the hope that it will be useful,             # 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of              # 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               # 
 16  # GNU General Public License for more details.                                # 
 17  #                                                                             # 
 18  # You should have received a copy of the GNU General Public License           # 
 19  # along with this program.  If not, see <http://www.gnu.org/licenses/>.       # 
 20  #                                                                             # 
 21  ############################################################################### 
 22   
 23  # Module docstring. 
 24  """Module for the molecule, residue and atom selections.""" 
 25   
 26  # Python module imports. 
 27  from warnings import warn 
 28   
 29  # relax module imports. 
 30  from lib import regex 
 31  from lib.check_types import is_unicode 
 32  from lib.errors import RelaxError 
 33  from lib.warnings import RelaxWarning 
 34   
 35   
36 -def parse_token(token, verbosity=False):
37 """Parse the token string and return a list of identifying numbers and names. 38 39 Firstly the token is split by the ',' character into its individual elements and all whitespace stripped from the elements. Numbers are converted to integers, names are left as strings, and ranges are converted into the full list of integers. 40 41 42 @param token: The identification string, the elements of which are separated by commas. Each element can be either a single number, a range of numbers (two numbers separated by '-'), or a name. 43 @type token: str 44 @keyword verbosity: A flag which if True will cause a number of printouts to be activated. 45 @type verbosity: bool 46 @return: A list of identifying numbers and names. 47 @rtype: list of int and str 48 """ 49 50 # No token. 51 if token == None: 52 return [] 53 54 # Convert to a list. 55 if not isinstance(token, list): 56 tokens = [token] 57 else: 58 tokens = token 59 60 # Loop over the tokens. 61 id_list = [] 62 for token in tokens: 63 # Split by the ',' character. 64 elements = token.split(',') 65 66 # Loop over the elements. 67 for element in elements: 68 # Strip all leading and trailing whitespace. 69 element = element.strip() 70 71 # Find all '-' characters (ignoring the first character, i.e. a negative number). 72 indices= [] 73 for i in range(1, len(element)): 74 if element[i] == '-': 75 indices.append(i) 76 77 # Range. 78 valid_range = True 79 if indices: 80 # Invalid range element, only one range char '-' and one negative sign is allowed. 81 if len(indices) > 2: 82 if verbosity: 83 print("The range element " + repr(element) + " is invalid. Assuming the '-' character does not specify a range.") 84 valid_range = False 85 86 # Convert the two numbers to integers. 87 try: 88 start = int(element[:indices[0]]) 89 end = int(element[indices[0]+1:]) 90 except ValueError: 91 if verbosity: 92 print("The range element " + repr(element) + " is invalid as either the start or end of the range are not integers. Assuming the '-' character does not specify a range.") 93 valid_range = False 94 95 # Test that the starting number is less than the end. 96 if valid_range and start >= end: 97 if verbosity: 98 print("The starting number of the range element " + repr(element) + " needs to be less than the end number. Assuming the '-' character does not specify a range.") 99 valid_range = False 100 101 # Create the range and append it to the list. 102 if valid_range: 103 for i in range(start, end+1): 104 id_list.append(i) 105 106 # Just append the string (even though it might be junk). 107 else: 108 id_list.append(element) 109 110 # Number or name. 111 else: 112 # Try converting the element into an integer. 113 try: 114 element = int(element) 115 except ValueError: 116 pass 117 118 # Append the element. 119 id_list.append(element) 120 121 # Return the identifying list. 122 return id_list
123 124
125 -def tokenise(selection):
126 """Split the input selection string returning the mol_token, res_token, and spin_token strings. 127 128 The mol_token is identified as the text from the '#' to either the ':' or '@' characters or the end of the string. 129 130 The res_token is identified as the text from the ':' to either the '@' character or the end of the string. 131 132 The spin_token is identified as the text from the '@' to the end of the string. 133 134 135 @param selection: The selection identifier. 136 @type selection: str 137 @return: The mol_token, res_token, and spin_token. 138 @rtype: 3-tuple of str or None 139 """ 140 141 # No selection. 142 if selection == None: 143 return None, None, None 144 145 146 # Walk along the ID string, separating the molecule, residue, and spin data. 147 mol_info = '' 148 res_info = '' 149 spin_info = '' 150 pos = 'mol' 151 for i in range(len(selection)): 152 # Find forbidden boolean operators. 153 if selection[i] == '|': 154 raise RelaxError("The boolean operator '|' is not supported for individual spin selections.") 155 156 # Hit the residue position. 157 if selection[i] == ':': 158 if pos == 'spin': 159 raise RelaxError("Invalid selection string '%s'." % selection) 160 pos = 'res' 161 162 # Hit the spin position. 163 if selection[i] == '@': 164 pos = 'spin' 165 166 # Append the data. 167 if pos == 'mol': 168 mol_info = mol_info + selection[i] 169 if pos == 'res': 170 res_info = res_info + selection[i] 171 if pos == 'spin': 172 spin_info = spin_info + selection[i] 173 174 175 # Molecules. 176 ############ 177 178 # Molecule identifier. 179 if mol_info: 180 # Find boolean operators. 181 if '&' in mol_info: 182 raise RelaxError("The boolean operator '&' is not supported for the molecule component of individual spin IDs.") 183 184 # Checks: 185 # No residue identification characters are allowed. 186 # No spin identification characters are allowed. 187 # First character must be '#'. 188 # Only 1 '#' allowed. 189 if ':' in mol_info or '@' in mol_info or mol_info[0] != '#' or mol_info.count('#') != 1: 190 raise RelaxError("Invalid molecule selection '%s'." % mol_info) 191 192 # ID. 193 mol_token = mol_info[1:] 194 195 # No molecule identifier. 196 else: 197 mol_token = None 198 199 200 # Residues. 201 ########### 202 203 # Residue identifier. 204 if res_info: 205 # Only max 1 '&' allowed. 206 if res_info.count('&') > 1: 207 raise RelaxError("Only one '&' boolean operator is supported for the residue component of individual spin IDs.") 208 209 # Split by '&'. 210 res_token = res_info.split('&') 211 212 # Check and remove the ':' character. 213 for i in range(len(res_token)): 214 # Checks: 215 # No molecule identification characters are allowed. 216 # No spin identification characters are allowed. 217 # First character must be ':'. 218 # Only 1 ':' allowed. 219 if '#' in res_token[i] or '@' in res_token[i] or res_token[i][0] != ':' or res_token[i].count(':') != 1: 220 raise RelaxError("Invalid residue selection '%s'." % res_info) 221 222 # Strip. 223 res_token[i] = res_token[i][1:] 224 225 # Convert to a string if only a single item. 226 if len(res_token) == 1: 227 res_token = res_token[0] 228 229 # No residue identifier. 230 else: 231 res_token = None 232 233 234 # Spins. 235 ######## 236 237 # Spin identifier. 238 if spin_info: 239 # Only max 1 '&' allowed. 240 if spin_info.count('&') > 1: 241 raise RelaxError("Only one '&' boolean operator is supported for the spin component of individual spin IDs.") 242 243 # Split by '&'. 244 spin_token = spin_info.split('&') 245 246 # Check and remove the ':' character. 247 for i in range(len(spin_token)): 248 # Checks: 249 # No molecule identification characters are allowed. 250 # No residue identification characters are allowed. 251 # First character must be '@'. 252 # Only 1 '@' allowed. 253 if '#' in spin_token[i] or ':' in spin_token[i] or spin_token[i][0] != '@' or spin_token[i].count('@') != 1: 254 raise RelaxError("Invalid spin selection '%s'." % spin_info) 255 256 # Strip. 257 spin_token[i] = spin_token[i][1:] 258 259 # Convert to a string if only a single item. 260 if len(spin_token) == 1: 261 spin_token = spin_token[0] 262 263 # No spin identifier. 264 else: 265 spin_token = None 266 267 268 # End. 269 ###### 270 271 # Improper selection string. 272 if mol_token == None and res_token == None and spin_token == None: 273 raise RelaxError("The selection string '%s' is invalid." % selection) 274 275 # Return the three tokens. 276 return mol_token, res_token, spin_token
277 278
279 -def spin_id_to_data_list(id):
280 """Convert the single spin ID string into a list of the mol, res, and spin names and numbers. 281 282 @param id: The spin ID string. 283 @type id: str 284 @return: The molecule name, the residue number and name, and the spin number and name. 285 @rtype: str, int, str, int, str 286 """ 287 288 # Split up the spin ID. 289 mol_token, res_token, spin_token = tokenise(id) 290 mol_info = parse_token(mol_token) 291 res_info = parse_token(res_token) 292 spin_info = parse_token(spin_token) 293 294 # Molecule name. 295 mol_name = None 296 if len(mol_info) > 1: 297 raise RelaxError("The single spin ID '%s' should only belong to one molecule, not %s." % (id, mol_info)) 298 if len(mol_info) == 1: 299 mol_name = mol_info[0] 300 301 # Residue info. 302 res_names = [] 303 res_nums = [] 304 for i in range(len(res_info)): 305 try: 306 res_nums.append(int(res_info[i])) 307 except ValueError: 308 res_names.append(res_info[i]) 309 310 # Residue number. 311 res_num = None 312 if len(res_nums) > 1: 313 raise RelaxError("The single spin ID '%s' should only belong to one residue number, not %s." % (id, res_info)) 314 elif len(res_nums) == 1: 315 res_num = res_nums[0] 316 317 # Residue name. 318 res_name = None 319 if len(res_names) > 1: 320 raise RelaxError("The single spin ID '%s' should only belong to one residue name, not %s." % (id, res_info)) 321 elif len(res_names) == 1: 322 res_name = res_names[0] 323 324 # Spin info. 325 spin_names = [] 326 spin_nums = [] 327 for i in range(len(spin_info)): 328 try: 329 spin_nums.append(int(spin_info[i])) 330 except ValueError: 331 spin_names.append(spin_info[i]) 332 333 # Spin number. 334 spin_num = None 335 if len(spin_nums) > 1: 336 raise RelaxError("The single spin ID '%s' should only belong to one spin number, not %s." % (id, spin_info)) 337 elif len(spin_nums) == 1: 338 spin_num = spin_nums[0] 339 340 # Spin name. 341 spin_name = None 342 if len(spin_names) > 1: 343 raise RelaxError("The single spin ID '%s' should only belong to one spin name, not %s." % (id, spin_info)) 344 elif len(spin_names) == 1: 345 spin_name = spin_names[0] 346 347 # Return the data. 348 return mol_name, res_num, res_name, spin_num, spin_name
349 350 351
352 -class Selection(object):
353 """An object containing mol-res-spin selections. 354 355 A Selection object represents either a set of selected molecules, residues and spins, or the union or intersection of two other Selection objects. 356 """ 357
358 - def __init__(self, select_string):
359 """Initialise a Selection object. 360 361 @param select_string: A mol-res-spin selection string. 362 @type select_string: string 363 """ 364 365 # Handle Unicode. 366 if is_unicode(select_string): 367 select_string = str(select_string) 368 369 self._union = None 370 self._intersect = None 371 372 self.molecules = [] 373 self.residues = [] 374 self.spins = [] 375 376 if not select_string: 377 return 378 379 # Read boolean symbols from right to left: 380 and_index = select_string.rfind('&') 381 or_index = select_string.rfind('|') 382 383 if and_index > or_index: 384 sel0 = Selection(select_string[:and_index].strip()) 385 sel1 = Selection(select_string[and_index+1:].strip()) 386 self.intersection(sel0, sel1) 387 388 elif or_index > and_index: 389 sel0 = Selection(select_string[:or_index].strip()) 390 sel1 = Selection(select_string[or_index+1:].strip()) 391 self.union(sel0, sel1) 392 393 # No booleans, so parse as simple selection: 394 else: 395 mol_token, res_token, spin_token = tokenise(select_string) 396 self.molecules = parse_token(mol_token) 397 self.residues = parse_token(res_token) 398 self.spins = parse_token(spin_token)
399 400
401 - def contains_mol(self, mol=None):
402 """Determine if the molecule name, in string form, is contained in this selection object. 403 404 @keyword mol: The name of the molecule. 405 @type mol: str or None 406 @return: The answer of whether the molecule is contained withing the selection object. 407 @rtype: bool 408 """ 409 410 # The selection object is a union. 411 if self._union: 412 return self._union[0].contains_mol(mol) or self._union[1].contains_mol(mol) 413 414 # The selection object is an intersection. 415 elif self._intersect: 416 return self._intersect[0].contains_mol(mol) and self._intersect[1].contains_mol(mol) 417 418 # The check. 419 if regex.search(self.molecules, mol): 420 return True 421 422 # Nothingness. 423 if not self.molecules: 424 return True 425 426 # No match. 427 return False
428 429
430 - def contains_res(self, res_num=None, res_name=None, mol=None):
431 """Determine if the residue name, in string form, is contained in this selection object. 432 433 @keyword res_num: The residue number. 434 @type res_num: int or None 435 @keyword res_name: The residue name. 436 @type res_name: str or None 437 @keyword mol: The molecule name. 438 @type mol: str or None 439 @return: The answer of whether the molecule is contained withing the selection object. 440 @rtype: bool 441 """ 442 443 # The selection object is a union. 444 if self._union: 445 return self._union[0].contains_res(res_num, res_name, mol) or self._union[1].contains_res(res_num, res_name, mol) 446 447 # The selection object is an intersection. 448 elif self._intersect: 449 return self._intersect[0].contains_res(res_num, res_name, mol) and self._intersect[1].contains_res(res_num, res_name, mol) 450 451 # Does it contain the molecule. 452 select_mol = self.contains_mol(mol) 453 454 # Residue selection flag. 455 select_res = False 456 457 # The residue checks. 458 if res_num in self.residues or regex.search(self.residues, res_name): 459 select_res = True 460 461 # Nothingness. 462 if not self.residues: 463 select_res = True 464 465 # Return the result. 466 return select_res and select_mol
467 468
469 - def contains_spin(self, spin_num=None, spin_name=None, res_num=None, res_name=None, mol=None):
470 """Determine if the spin is contained in this selection object. 471 472 @keyword spin_num: The spin number. 473 @type spin_num: int or None 474 @keyword spin_name: The spin name. 475 @type spin_name: str or None 476 @keyword res_num: The residue number. 477 @type res_num: int or None 478 @keyword res_name: The residue name. 479 @type res_name: str or None 480 @keyword mol: The molecule name. 481 @type mol: str or None 482 @return: The answer of whether the spin is contained withing the selection object. 483 @rtype: bool 484 """ 485 486 # The selection object is a union. 487 if self._union: 488 return self._union[0].contains_spin(spin_num, spin_name, res_num, res_name, mol) or self._union[1].contains_spin(spin_num, spin_name, res_num, res_name, mol) 489 490 # The selection object is an intersection. 491 elif self._intersect: 492 return self._intersect[0].contains_spin(spin_num, spin_name, res_num, res_name, mol) and self._intersect[1].contains_spin(spin_num, spin_name, res_num, res_name, mol) 493 494 # Does it contain the molecule. 495 select_mol = self.contains_mol(mol) 496 497 # Does it contain the residue. 498 select_res = self.contains_res(res_num, res_name, mol) 499 500 # Spin selection flag. 501 select_spin = False 502 503 # The spin checks. 504 if spin_num in self.spins or regex.search(self.spins, spin_name): 505 select_spin = True 506 507 # Nothingness. 508 if not self.spins: 509 select_spin = True 510 511 # Return the result. 512 return select_spin and select_res and select_mol
513 514
515 - def contains_spin_id(self, spin_id):
516 """Is the molecule, residue, and/or spin of the spin_id string located in the selection. 517 518 Only the simple selections allowed by the tokenise function are currently supported. 519 520 521 @param spin_id: The spin identification string. 522 @type spin_id: str 523 @return: The answer of whether the molecule, residue, and/or spin corresponding to the spin_id string found within the selection object. 524 @rtype: bool 525 """ 526 527 # No ID string. 528 if spin_id == '': 529 warn(RelaxWarning("The spin ID string '' is empty.")) 530 return False 531 532 # Parse the spin_id string. 533 mol_name, res_num, res_name, spin_num, spin_name = spin_id_to_data_list(spin_id) 534 535 # Check if the spin is in the selection object. 536 return self.contains_spin(spin_num=spin_num, spin_name=spin_name, res_num=res_num, res_name=res_name, mol=mol_name)
537 538
539 - def has_molecules(self):
540 """Determine if the selection object contains molecules. 541 542 @return: The answer of whether the selection contains molecules. 543 @rtype: bool 544 """ 545 546 # The selection object is a union. 547 if self._union: 548 return self._union[0].has_molecules() or self._union[1].has_molecules() 549 550 # The selection object is an intersection. 551 elif self._intersect: 552 return self._intersect[0].has_molecules() and self._intersect[1].has_molecules() 553 554 # Molecules are present. 555 if self.molecules: 556 return True
557 558
559 - def has_residues(self):
560 """Determine if the selection object contains residues. 561 562 @return: The answer of whether the selection contains residues. 563 @rtype: bool 564 """ 565 566 # The selection object is a union. 567 if self._union: 568 return self._union[0].has_residues() or self._union[1].has_residues() 569 570 # The selection object is an intersection. 571 elif self._intersect: 572 return self._intersect[0].has_residues() and self._intersect[1].has_residues() 573 574 # Residues are present. 575 if self.residues: 576 return True
577 578
579 - def has_spins(self):
580 """Determine if the selection object contains spins. 581 582 @return: The answer of whether the selection contains spins. 583 @rtype: bool 584 """ 585 586 # The selection object is a union. 587 if self._union: 588 return self._union[0].has_spins() or self._union[1].has_spins() 589 590 # The selection object is an intersection. 591 elif self._intersect: 592 return self._intersect[0].has_spins() and self._intersect[1].has_spins() 593 594 # Spins are present. 595 if self.spins: 596 return True
597 598
599 - def intersection(self, select_obj0, select_obj1):
600 """Make this Selection object the intersection of two other Selection objects. 601 602 @param select_obj0: First Selection object in intersection. 603 @type select_obj0: Selection instance. 604 @param select_obj1: First Selection object in intersection. 605 @type select_obj1: Selection instance. 606 """ 607 608 # Check that nothing is set. 609 if self._union or self._intersect or self.molecules or self.residues or self.spins: 610 raise RelaxError("Cannot define multiple Boolean relationships between Selection objects") 611 612 # Create the intersection. 613 self._intersect = (select_obj0, select_obj1)
614 615
616 - def union(self, select_obj0, select_obj1):
617 """Make this Selection object the union of two other Selection objects. 618 619 @param select_obj0: First Selection object in intersection. 620 @type select_obj0: Selection instance. 621 @param select_obj1: First Selection object in intersection. 622 @type select_obj1: Selection instance. 623 """ 624 625 # Check that nothing is set. 626 if self._union or self._intersect or self.molecules or self.residues or self.spins: 627 raise RelaxError("Cannot define multiple Boolean relationships between Selection objects") 628 629 # Create the union. 630 self._union = (select_obj0, select_obj1)
631