Package lib :: Module selection
[hide private]
[frames] | no frames]

Source Code for Module lib.selection

  1  from __future__ import absolute_import 
  2  ############################################################################### 
  3  #                                                                             # 
  4  # Copyright (C) 2003-2004,2006-2013,2015 Edward d'Auvergne                    # 
  5  # Copyright (C) 2006-2007 Chris MacRaild                                      # 
  6  #                                                                             # 
  7  # This file is part of the program relax (http://www.nmr-relax.com).          # 
  8  #                                                                             # 
  9  # This program is free software: you can redistribute it and/or modify        # 
 10  # it under the terms of the GNU General Public License as published by        # 
 11  # the Free Software Foundation, either version 3 of the License, or           # 
 12  # (at your option) any later version.                                         # 
 13  #                                                                             # 
 14  # This program is distributed in the hope that it will be useful,             # 
 15  # but WITHOUT ANY WARRANTY; without even the implied warranty of              # 
 16  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               # 
 17  # GNU General Public License for more details.                                # 
 18  #                                                                             # 
 19  # You should have received a copy of the GNU General Public License           # 
 20  # along with this program.  If not, see <http://www.gnu.org/licenses/>.       # 
 21  #                                                                             # 
 22  ############################################################################### 
 23   
 24  # Module docstring. 
 25  """Module for the molecule, residue and atom selections.""" 
 26   
 27  # Python module imports. 
 28  from warnings import warn 
 29   
 30  # relax module imports. 
 31  from lib import regex 
 32  from lib.check_types import is_unicode 
 33  from lib.errors import RelaxError 
 34  from lib.warnings import RelaxWarning 
 35   
 36   
37 -def parse_token(token, verbosity=False):
38 """Parse the token string and return a list of identifying numbers and names. 39 40 Firstly the token is split by the ',' character into its individual elements and all whitespace stripped from the elements. Numbers are converted to integers, names are left as strings, and ranges are converted into the full list of integers. 41 42 43 @param token: The identification string, the elements of which are separated by commas. Each element can be either a single number, a range of numbers (two numbers separated by '-'), or a name. 44 @type token: str 45 @keyword verbosity: A flag which if True will cause a number of printouts to be activated. 46 @type verbosity: bool 47 @return: A list of identifying numbers and names. 48 @rtype: list of int and str 49 """ 50 51 # No token. 52 if token == None: 53 return [] 54 55 # Convert to a list. 56 if not isinstance(token, list): 57 tokens = [token] 58 else: 59 tokens = token 60 61 # Loop over the tokens. 62 id_list = [] 63 for token in tokens: 64 # Split by the ',' character. 65 elements = token.split(',') 66 67 # Loop over the elements. 68 for element in elements: 69 # Strip all leading and trailing whitespace. 70 element = element.strip() 71 72 # Find all '-' characters (ignoring the first character, i.e. a negative number). 73 indices= [] 74 for i in range(1, len(element)): 75 if element[i] == '-': 76 indices.append(i) 77 78 # Range. 79 valid_range = True 80 if indices: 81 # Invalid range element, only one range char '-' and one negative sign is allowed. 82 if len(indices) > 2: 83 if verbosity: 84 print("The range element " + repr(element) + " is invalid. Assuming the '-' character does not specify a range.") 85 valid_range = False 86 87 # Convert the two numbers to integers. 88 try: 89 start = int(element[:indices[0]]) 90 end = int(element[indices[0]+1:]) 91 except ValueError: 92 if verbosity: 93 print("The range element " + repr(element) + " is invalid as either the start or end of the range are not integers. Assuming the '-' character does not specify a range.") 94 valid_range = False 95 96 # Test that the starting number is less than the end. 97 if valid_range and start >= end: 98 if verbosity: 99 print("The starting number of the range element " + repr(element) + " needs to be less than the end number. Assuming the '-' character does not specify a range.") 100 valid_range = False 101 102 # Create the range and append it to the list. 103 if valid_range: 104 for i in range(start, end+1): 105 id_list.append(i) 106 107 # Just append the string (even though it might be junk). 108 else: 109 id_list.append(element) 110 111 # Number or name. 112 else: 113 # Try converting the element into an integer. 114 try: 115 element = int(element) 116 except ValueError: 117 pass 118 119 # Append the element. 120 id_list.append(element) 121 122 # Return the identifying list. 123 return id_list
124 125
126 -def tokenise(selection):
127 """Split the input selection string returning the mol_token, res_token, and spin_token strings. 128 129 The mol_token is identified as the text from the '#' to either the ':' or '@' characters or the end of the string. 130 131 The res_token is identified as the text from the ':' to either the '@' character or the end of the string. 132 133 The spin_token is identified as the text from the '@' to the end of the string. 134 135 136 @param selection: The selection identifier. 137 @type selection: str 138 @return: The mol_token, res_token, and spin_token. 139 @rtype: 3-tuple of str or None 140 """ 141 142 # No selection. 143 if selection == None: 144 return None, None, None 145 146 147 # Walk along the ID string, separating the molecule, residue, and spin data. 148 mol_info = '' 149 res_info = '' 150 spin_info = '' 151 pos = 'mol' 152 for i in range(len(selection)): 153 # Find forbidden boolean operators. 154 if selection[i] == '|': 155 raise RelaxError("The boolean operator '|' is not supported for individual spin selections.") 156 157 # Hit the residue position. 158 if selection[i] == ':': 159 if pos == 'spin': 160 raise RelaxError("Invalid selection string '%s'." % selection) 161 pos = 'res' 162 163 # Hit the spin position. 164 if selection[i] == '@': 165 pos = 'spin' 166 167 # Append the data. 168 if pos == 'mol': 169 mol_info = mol_info + selection[i] 170 if pos == 'res': 171 res_info = res_info + selection[i] 172 if pos == 'spin': 173 spin_info = spin_info + selection[i] 174 175 176 # Molecules. 177 ############ 178 179 # Molecule identifier. 180 if mol_info: 181 # Find boolean operators. 182 if '&' in mol_info: 183 raise RelaxError("The boolean operator '&' is not supported for the molecule component of individual spin IDs.") 184 185 # Checks: 186 # No residue identification characters are allowed. 187 # No spin identification characters are allowed. 188 # First character must be '#'. 189 # Only 1 '#' allowed. 190 if ':' in mol_info or '@' in mol_info or mol_info[0] != '#' or mol_info.count('#') != 1: 191 raise RelaxError("Invalid molecule selection '%s'." % mol_info) 192 193 # ID. 194 mol_token = mol_info[1:] 195 196 # No molecule identifier. 197 else: 198 mol_token = None 199 200 201 # Residues. 202 ########### 203 204 # Residue identifier. 205 if res_info: 206 # Only max 1 '&' allowed. 207 if res_info.count('&') > 1: 208 raise RelaxError("Only one '&' boolean operator is supported for the residue component of individual spin IDs.") 209 210 # Split by '&'. 211 res_token = res_info.split('&') 212 213 # Check and remove the ':' character. 214 for i in range(len(res_token)): 215 # Checks: 216 # No molecule identification characters are allowed. 217 # No spin identification characters are allowed. 218 # First character must be ':'. 219 # Only 1 ':' allowed. 220 if '#' in res_token[i] or '@' in res_token[i] or res_token[i][0] != ':' or res_token[i].count(':') != 1: 221 raise RelaxError("Invalid residue selection '%s'." % res_info) 222 223 # Strip. 224 res_token[i] = res_token[i][1:] 225 226 # Convert to a string if only a single item. 227 if len(res_token) == 1: 228 res_token = res_token[0] 229 230 # No residue identifier. 231 else: 232 res_token = None 233 234 235 # Spins. 236 ######## 237 238 # Spin identifier. 239 if spin_info: 240 # Only max 1 '&' allowed. 241 if spin_info.count('&') > 1: 242 raise RelaxError("Only one '&' boolean operator is supported for the spin component of individual spin IDs.") 243 244 # Split by '&'. 245 spin_token = spin_info.split('&') 246 247 # Check and remove the ':' character. 248 for i in range(len(spin_token)): 249 # Checks: 250 # No molecule identification characters are allowed. 251 # No residue identification characters are allowed. 252 # First character must be '@'. 253 # Only 1 '@' allowed. 254 if '#' in spin_token[i] or ':' in spin_token[i] or spin_token[i][0] != '@' or spin_token[i].count('@') != 1: 255 raise RelaxError("Invalid spin selection '%s'." % spin_info) 256 257 # Strip. 258 spin_token[i] = spin_token[i][1:] 259 260 # Convert to a string if only a single item. 261 if len(spin_token) == 1: 262 spin_token = spin_token[0] 263 264 # No spin identifier. 265 else: 266 spin_token = None 267 268 269 # End. 270 ###### 271 272 # Improper selection string. 273 if mol_token == None and res_token == None and spin_token == None: 274 raise RelaxError("The selection string '%s' is invalid." % selection) 275 276 # Return the three tokens. 277 return mol_token, res_token, spin_token
278 279
280 -def spin_id_to_data_list(id):
281 """Convert the single spin ID string into a list of the mol, res, and spin names and numbers. 282 283 @param id: The spin ID string. 284 @type id: str 285 @return: The molecule name, the residue number and name, and the spin number and name. 286 @rtype: str, int, str, int, str 287 """ 288 289 # Split up the spin ID. 290 mol_token, res_token, spin_token = tokenise(id) 291 mol_info = parse_token(mol_token) 292 res_info = parse_token(res_token) 293 spin_info = parse_token(spin_token) 294 295 # Molecule name. 296 mol_name = None 297 if len(mol_info) > 1: 298 raise RelaxError("The single spin ID '%s' should only belong to one molecule, not %s." % (id, mol_info)) 299 if len(mol_info) == 1: 300 mol_name = mol_info[0] 301 302 # Residue info. 303 res_names = [] 304 res_nums = [] 305 for i in range(len(res_info)): 306 try: 307 res_nums.append(int(res_info[i])) 308 except ValueError: 309 res_names.append(res_info[i]) 310 311 # Residue number. 312 res_num = None 313 if len(res_nums) > 1: 314 raise RelaxError("The single spin ID '%s' should only belong to one residue number, not %s." % (id, res_info)) 315 elif len(res_nums) == 1: 316 res_num = res_nums[0] 317 318 # Residue name. 319 res_name = None 320 if len(res_names) > 1: 321 raise RelaxError("The single spin ID '%s' should only belong to one residue name, not %s." % (id, res_info)) 322 elif len(res_names) == 1: 323 res_name = res_names[0] 324 325 # Spin info. 326 spin_names = [] 327 spin_nums = [] 328 for i in range(len(spin_info)): 329 try: 330 spin_nums.append(int(spin_info[i])) 331 except ValueError: 332 spin_names.append(spin_info[i]) 333 334 # Spin number. 335 spin_num = None 336 if len(spin_nums) > 1: 337 raise RelaxError("The single spin ID '%s' should only belong to one spin number, not %s." % (id, spin_info)) 338 elif len(spin_nums) == 1: 339 spin_num = spin_nums[0] 340 341 # Spin name. 342 spin_name = None 343 if len(spin_names) > 1: 344 raise RelaxError("The single spin ID '%s' should only belong to one spin name, not %s." % (id, spin_info)) 345 elif len(spin_names) == 1: 346 spin_name = spin_names[0] 347 348 # Return the data. 349 return mol_name, res_num, res_name, spin_num, spin_name
350 351 352
353 -class Selection(object):
354 """An object containing mol-res-spin selections. 355 356 A Selection object represents either a set of selected molecules, residues and spins, or the union or intersection of two other Selection objects. 357 """ 358
359 - def __init__(self, select_string):
360 """Initialise a Selection object. 361 362 @param select_string: A mol-res-spin selection string. 363 @type select_string: string 364 """ 365 366 # Handle Unicode. 367 if is_unicode(select_string): 368 select_string = str(select_string) 369 370 self._union = None 371 self._intersect = None 372 373 self.molecules = [] 374 self.residues = [] 375 self.spins = [] 376 377 if not select_string: 378 return 379 380 # Read boolean symbols from right to left: 381 and_index = select_string.rfind('&') 382 or_index = select_string.rfind('|') 383 384 if and_index > or_index: 385 sel0 = Selection(select_string[:and_index].strip()) 386 sel1 = Selection(select_string[and_index+1:].strip()) 387 self.intersection(sel0, sel1) 388 389 elif or_index > and_index: 390 sel0 = Selection(select_string[:or_index].strip()) 391 sel1 = Selection(select_string[or_index+1:].strip()) 392 self.union(sel0, sel1) 393 394 # No booleans, so parse as simple selection: 395 else: 396 mol_token, res_token, spin_token = tokenise(select_string) 397 self.molecules = parse_token(mol_token) 398 self.residues = parse_token(res_token) 399 self.spins = parse_token(spin_token)
400 401
402 - def contains_mol(self, mol=None):
403 """Determine if the molecule name, in string form, is contained in this selection object. 404 405 @keyword mol: The name of the molecule. 406 @type mol: str or None 407 @return: The answer of whether the molecule is contained withing the selection object. 408 @rtype: bool 409 """ 410 411 # The selection object is a union. 412 if self._union: 413 return self._union[0].contains_mol(mol) or self._union[1].contains_mol(mol) 414 415 # The selection object is an intersection. 416 elif self._intersect: 417 return self._intersect[0].contains_mol(mol) and self._intersect[1].contains_mol(mol) 418 419 # Nothingness. 420 if not self.molecules: 421 return True 422 423 # The check. 424 if regex.search(self.molecules, mol): 425 return True 426 427 # No match. 428 return False
429 430
431 - def contains_res(self, res_num=None, res_name=None, mol=None):
432 """Determine if the residue name, in string form, is contained in this selection object. 433 434 @keyword res_num: The residue number. 435 @type res_num: int or None 436 @keyword res_name: The residue name. 437 @type res_name: str or None 438 @keyword mol: The molecule name. 439 @type mol: str or None 440 @return: The answer of whether the molecule is contained withing the selection object. 441 @rtype: bool 442 """ 443 444 # The selection object is a union. 445 if self._union: 446 return self._union[0].contains_res(res_num, res_name, mol) or self._union[1].contains_res(res_num, res_name, mol) 447 448 # The selection object is an intersection. 449 elif self._intersect: 450 return self._intersect[0].contains_res(res_num, res_name, mol) and self._intersect[1].contains_res(res_num, res_name, mol) 451 452 # Does it contain the molecule. 453 if not self.contains_mol(mol): 454 return False 455 456 # Residue selection flag. 457 select_res = False 458 459 # Nothingness. 460 if not self.residues: 461 return True 462 463 # The residue checks. 464 if res_num in self.residues or regex.search(self.residues, res_name): 465 return True 466 467 # No match. 468 return False
469 470
471 - def contains_spin(self, spin_num=None, spin_name=None, res_num=None, res_name=None, mol=None):
472 """Determine if the spin is contained in this selection object. 473 474 @keyword spin_num: The spin number. 475 @type spin_num: int or None 476 @keyword spin_name: The spin name. 477 @type spin_name: str or None 478 @keyword res_num: The residue number. 479 @type res_num: int or None 480 @keyword res_name: The residue name. 481 @type res_name: str or None 482 @keyword mol: The molecule name. 483 @type mol: str or None 484 @return: The answer of whether the spin is contained withing the selection object. 485 @rtype: bool 486 """ 487 488 # The selection object is a union. 489 if self._union: 490 return self._union[0].contains_spin(spin_num, spin_name, res_num, res_name, mol) or self._union[1].contains_spin(spin_num, spin_name, res_num, res_name, mol) 491 492 # The selection object is an intersection. 493 elif self._intersect: 494 return self._intersect[0].contains_spin(spin_num, spin_name, res_num, res_name, mol) and self._intersect[1].contains_spin(spin_num, spin_name, res_num, res_name, mol) 495 496 # Does it contain the molecule. 497 if not self.contains_mol(mol): 498 return False 499 500 # Does it contain the residue. 501 if not self.contains_res(res_num, res_name, mol): 502 return False 503 504 # Nothingness. 505 if not self.spins: 506 return True 507 508 # The spin checks. 509 if spin_num in self.spins or regex.search(self.spins, spin_name): 510 return True 511 512 # No match. 513 return False
514 515
516 - def contains_spin_id(self, spin_id):
517 """Is the molecule, residue, and/or spin of the spin_id string located in the selection. 518 519 Only the simple selections allowed by the tokenise function are currently supported. 520 521 522 @param spin_id: The spin identification string. 523 @type spin_id: str 524 @return: The answer of whether the molecule, residue, and/or spin corresponding to the spin_id string found within the selection object. 525 @rtype: bool 526 """ 527 528 # No ID string. 529 if spin_id == '': 530 warn(RelaxWarning("The spin ID string '' is empty.")) 531 return False 532 533 # Parse the spin_id string. 534 mol_name, res_num, res_name, spin_num, spin_name = spin_id_to_data_list(spin_id) 535 536 # Check if the spin is in the selection object. 537 return self.contains_spin(spin_num=spin_num, spin_name=spin_name, res_num=res_num, res_name=res_name, mol=mol_name)
538 539
540 - def has_molecules(self):
541 """Determine if the selection object contains molecules. 542 543 @return: The answer of whether the selection contains molecules. 544 @rtype: bool 545 """ 546 547 # The selection object is a union. 548 if self._union: 549 return self._union[0].has_molecules() or self._union[1].has_molecules() 550 551 # The selection object is an intersection. 552 elif self._intersect: 553 return self._intersect[0].has_molecules() and self._intersect[1].has_molecules() 554 555 # Molecules are present. 556 if self.molecules: 557 return True
558 559
560 - def has_residues(self):
561 """Determine if the selection object contains residues. 562 563 @return: The answer of whether the selection contains residues. 564 @rtype: bool 565 """ 566 567 # The selection object is a union. 568 if self._union: 569 return self._union[0].has_residues() or self._union[1].has_residues() 570 571 # The selection object is an intersection. 572 elif self._intersect: 573 return self._intersect[0].has_residues() and self._intersect[1].has_residues() 574 575 # Residues are present. 576 if self.residues: 577 return True
578 579
580 - def has_spins(self):
581 """Determine if the selection object contains spins. 582 583 @return: The answer of whether the selection contains spins. 584 @rtype: bool 585 """ 586 587 # The selection object is a union. 588 if self._union: 589 return self._union[0].has_spins() or self._union[1].has_spins() 590 591 # The selection object is an intersection. 592 elif self._intersect: 593 return self._intersect[0].has_spins() and self._intersect[1].has_spins() 594 595 # Spins are present. 596 if self.spins: 597 return True
598 599
600 - def intersection(self, select_obj0, select_obj1):
601 """Make this Selection object the intersection of two other Selection objects. 602 603 @param select_obj0: First Selection object in intersection. 604 @type select_obj0: Selection instance. 605 @param select_obj1: First Selection object in intersection. 606 @type select_obj1: Selection instance. 607 """ 608 609 # Check that nothing is set. 610 if self._union or self._intersect or self.molecules or self.residues or self.spins: 611 raise RelaxError("Cannot define multiple Boolean relationships between Selection objects") 612 613 # Create the intersection. 614 self._intersect = (select_obj0, select_obj1)
615 616
617 - def union(self, select_obj0, select_obj1):
618 """Make this Selection object the union of two other Selection objects. 619 620 @param select_obj0: First Selection object in intersection. 621 @type select_obj0: Selection instance. 622 @param select_obj1: First Selection object in intersection. 623 @type select_obj1: Selection instance. 624 """ 625 626 # Check that nothing is set. 627 if self._union or self._intersect or self.molecules or self.residues or self.spins: 628 raise RelaxError("Cannot define multiple Boolean relationships between Selection objects") 629 630 # Create the union. 631 self._union = (select_obj0, select_obj1)
632