Author: bugman Date: Tue Jan 27 10:56:01 2015 New Revision: 27324 URL: http://svn.gna.org/viewcvs/relax?rev=27324&view=rev Log: Merged revisions 27321-27323 via svnmerge from svn+ssh://bugman@xxxxxxxxxxx/svn/relax/trunk ........ r27321 | bugman | 2015-01-27 09:48:41 +0100 (Tue, 27 Jan 2015) | 7 lines Created the Structure.test_pdb_combined_secondary_structure system test. This is used to demonstrate a problem in the handling of secondary structure metadata when combining multiple PDB structures. It appears as if the chain ID is preserved as the original ID and is not updated to match the new IDs in the output PDB. ........ r27322 | bugman | 2015-01-27 10:50:56 +0100 (Tue, 27 Jan 2015) | 9 lines Bug fix for the PDB secondary sheet handling when combining multiple PDB structures. The helix and sheet metadata now converts the original chain IDs into molecule indices, shifted to new values based on the currently loaded data, when the structure.read_pdb user function is executed. When the structure.write_pdb user function is executed, the molecule indices are converted into new chain IDs. This allows the chain IDs in the HELIX and SHEET records to match those of the ATOM and HETATOM records. ........ r27323 | bugman | 2015-01-27 10:51:34 +0100 (Tue, 27 Jan 2015) | 5 lines Updated the Structure.test_metadata_xml system test for the changed PDB metadata handling. The helix and sheets IDs are now molecule indices. ........ Modified: branches/frame_order_cleanup/ (props changed) branches/frame_order_cleanup/lib/structure/internal/object.py branches/frame_order_cleanup/test_suite/system_tests/structure.py Propchange: branches/frame_order_cleanup/ ------------------------------------------------------------------------------ --- svnmerge-integrated (original) +++ svnmerge-integrated Tue Jan 27 10:56:01 2015 @@ -1 +1 @@ -/trunk:1-27319 +/trunk:1-27323 Modified: branches/frame_order_cleanup/lib/structure/internal/object.py URL: http://svn.gna.org/viewcvs/relax/branches/frame_order_cleanup/lib/structure/internal/object.py?rev=27324&r1=27323&r2=27324&view=diff ============================================================================== --- branches/frame_order_cleanup/lib/structure/internal/object.py (original) +++ branches/frame_order_cleanup/lib/structure/internal/object.py Tue Jan 27 10:56:01 2015 @@ -490,6 +490,12 @@ 'TURN ' ] + # The number of pre-existing molecules. + if not len(self.structural_data): + mol_num = 0 + else: + mol_num = len(self.structural_data[0].mol) + # Loop over the lines. for i in range(len(lines)): # No match, therefore assume to be out of the secondary structure section. @@ -501,34 +507,56 @@ # Parse the record. record_type, ser_num, helix_id, init_res_name, init_chain_id, init_seq_num, init_icode, end_res_name, end_chain_id, end_seq_num, end_icode, helix_class, comment, length = pdb_read.helix(lines[i]) + # The molecule indices. + mol_init_index = self._pdb_chain_id_to_mol_index(init_chain_id) + mol_end_index = self._pdb_chain_id_to_mol_index(end_chain_id) + # Only load the desired molecule. if read_mol != None: - if self._pdb_chain_id_to_mol_index(init_chain_id)+1 not in read_mol: + if mol_init_index + 1 not in read_mol: continue - if self._pdb_chain_id_to_mol_index(end_chain_id)+1 not in read_mol: + if mol_end_index + 1 not in read_mol: continue + + # New molecule indices based on currently loaded data. + mol_init_index += mol_num + mol_end_index += mol_num # Store the data. if not hasattr(self, 'helices'): self.helices = [] - self.helices.append([helix_id, init_chain_id, init_res_name, init_seq_num, end_chain_id, end_res_name, end_seq_num, helix_class, length]) + self.helices.append([helix_id, mol_init_index, init_res_name, init_seq_num, mol_end_index, end_res_name, end_seq_num, helix_class, length]) # A sheet. if lines[i][:5] == 'SHEET': # Parse the record. record_type, strand, sheet_id, num_strands, init_res_name, init_chain_id, init_seq_num, init_icode, end_res_name, end_chain_id, end_seq_num, end_icode, sense, cur_atom, cur_res_name, cur_chain_id, cur_res_seq, cur_icode, prev_atom, prev_res_name, prev_chain_id, prev_res_seq, prev_icode = pdb_read.sheet(lines[i]) + # The molecule indices. + mol_init_index = self._pdb_chain_id_to_mol_index(init_chain_id) + mol_end_index = self._pdb_chain_id_to_mol_index(end_chain_id) + # Only load the desired molecule. if read_mol != None: - if self._pdb_chain_id_to_mol_index(init_chain_id)+1 not in read_mol: + if mol_init_index + 1 not in read_mol: continue - if self._pdb_chain_id_to_mol_index(end_chain_id)+1 not in read_mol: + if mol_end_index + 1 not in read_mol: continue + + # New molecule indices based on currently loaded data. + mol_init_index += mol_num + mol_end_index += mol_num + mol_cur_index = None + if cur_chain_id: + mol_cur_index = self._pdb_chain_id_to_mol_index(cur_chain_id) + mol_num + mol_prev_index = None + if prev_chain_id: + mol_prev_index = self._pdb_chain_id_to_mol_index(prev_chain_id) + mol_num # Store the data. if not hasattr(self, 'sheets'): self.sheets = [] - self.sheets.append([strand, sheet_id, num_strands, init_res_name, init_chain_id, init_seq_num, init_icode, end_res_name, end_chain_id, end_seq_num, end_icode, sense, cur_atom, cur_res_name, cur_chain_id, cur_res_seq, cur_icode, prev_atom, prev_res_name, prev_chain_id, prev_res_seq, prev_icode]) + self.sheets.append([strand, sheet_id, num_strands, init_res_name, mol_init_index, init_seq_num, init_icode, end_res_name, mol_end_index, end_seq_num, end_icode, sense, cur_atom, cur_res_name, mol_cur_index, cur_res_seq, cur_icode, prev_atom, prev_res_name, mol_prev_index, prev_res_seq, prev_icode]) # Return the remaining lines. return lines[i:] @@ -2868,8 +2896,8 @@ # Loop over and unpack the helix data. index = 1 - for helix_id, init_chain_id, init_res_name, init_seq_num, end_chain_id, end_res_name, end_seq_num, helix_class, length in self.helices: - pdb_write.helix(file, ser_num=index, helix_id=helix_id, init_chain_id=init_chain_id, init_res_name=init_res_name, init_seq_num=init_seq_num, end_chain_id=end_chain_id, end_res_name=end_res_name, end_seq_num=end_seq_num, helix_class=helix_class, length=length) + for helix_id, mol_init_index, init_res_name, init_seq_num, mol_end_index, end_res_name, end_seq_num, helix_class, length in self.helices: + pdb_write.helix(file, ser_num=index, helix_id=helix_id, init_chain_id=CHAIN_ID_LIST[mol_init_index], init_res_name=init_res_name, init_seq_num=init_seq_num, end_chain_id=CHAIN_ID_LIST[mol_end_index], end_res_name=end_res_name, end_seq_num=end_seq_num, helix_class=helix_class, length=length) index += 1 # The SHEET records. @@ -2881,7 +2909,18 @@ # Loop over and unpack the helix data. index = 1 - for strand, sheet_id, num_strands, init_res_name, init_chain_id, init_seq_num, init_icode, end_res_name, end_chain_id, end_seq_num, end_icode, sense, cur_atom, cur_res_name, cur_chain_id, cur_res_seq, cur_icode, prev_atom, prev_res_name, prev_chain_id, prev_res_seq, prev_icode in self.sheets: + for strand, sheet_id, num_strands, init_res_name, mol_init_index, init_seq_num, init_icode, end_res_name, mol_end_index, end_seq_num, end_icode, sense, cur_atom, cur_res_name, mol_cur_index, cur_res_seq, cur_icode, prev_atom, prev_res_name, mol_prev_index, prev_res_seq, prev_icode in self.sheets: + # Translate molecule indices to chain IDs. + init_chain_id = CHAIN_ID_LIST[mol_init_index] + end_chain_id = CHAIN_ID_LIST[mol_end_index] + cur_chain_id = None + if mol_cur_index != None: + cur_chain_id = CHAIN_ID_LIST[mol_cur_index] + prev_chain_id = None + if mol_prev_index != None: + prev_chain_id = CHAIN_ID_LIST[mol_prev_index] + + # Write out. pdb_write.sheet(file, strand=strand, sheet_id=sheet_id, num_strands=num_strands, init_res_name=init_res_name, init_chain_id=init_chain_id, init_seq_num=init_seq_num, init_icode=init_icode, end_res_name=end_res_name, end_chain_id=end_chain_id, end_seq_num=end_seq_num, end_icode=end_icode, sense=sense, cur_atom=cur_atom, cur_res_name=cur_res_name, cur_chain_id=cur_chain_id, cur_res_seq=cur_res_seq, cur_icode=cur_icode, prev_atom=prev_atom, prev_res_name=prev_res_name, prev_chain_id=prev_chain_id, prev_res_seq=prev_res_seq, prev_icode=prev_icode) index += 1 Modified: branches/frame_order_cleanup/test_suite/system_tests/structure.py URL: http://svn.gna.org/viewcvs/relax/branches/frame_order_cleanup/test_suite/system_tests/structure.py?rev=27324&r1=27323&r2=27324&view=diff ============================================================================== --- branches/frame_order_cleanup/test_suite/system_tests/structure.py (original) +++ branches/frame_order_cleanup/test_suite/system_tests/structure.py Tue Jan 27 10:56:01 2015 @@ -1,6 +1,6 @@ ############################################################################### # # -# Copyright (C) 2008-2014 Edward d'Auvergne # +# Copyright (C) 2008-2015 Edward d'Auvergne # # # # This file is part of the program relax (http://www.nmr-relax.com). # # # @@ -3657,11 +3657,11 @@ # What the data should look like. helices = [ - ['H1', 'A', 'ILE', 23, 'A', 'GLU', 34, 1, 12] + ['H1', 0, 'ILE', 23, 0, 'GLU', 34, 1, 12] ] sheets = [ - [1, 'BET', 5, 'GLY', 'A', 10, None, 'VAL', 'A', 17, None, 0, None, None, None, None, None, None, None, None, None, None], - [2, 'BET', 5, 'MET', 'A', 1, None, 'THR', 'A', 7, None, -1, None, None, None, None, None, None, None, None, None, None] + [1, 'BET', 5, 'GLY', 0, 10, None, 'VAL', 0, 17, None, 0, None, None, None, None, None, None, None, None, None, None], + [2, 'BET', 5, 'MET', 0, 1, None, 'THR', 0, 7, None, -1, None, None, None, None, None, None, None, None, None, None] ] # Check the helix data. @@ -3674,6 +3674,49 @@ self.assertEqual(len(cdp.structure.sheets), 2) self.assertEqual(cdp.structure.sheets[0], sheets[0]) self.assertEqual(cdp.structure.sheets[1], sheets[1]) + + + def test_pdb_combined_secondary_structure(self): + """Test the handling of secondary structure metadata when combining multiple PDB structures.""" + + # Path of the structure file. + path = status.install_path + sep+'test_suite'+sep+'shared_data'+sep+'structures' + + # Read a PDB file twice as two different molecules. + self.interpreter.structure.read_pdb('1J7O.pdb', dir=path, set_mol_name='N-dom', read_model=1, set_model_num=1) + self.interpreter.structure.read_pdb('1J7P.pdb', dir=path, set_mol_name='C-dom', read_model=1, set_model_num=1) + + # Create a PDB file. + file = DummyFileObject() + self.interpreter.structure.write_pdb(file=file, force=True) + + # The file secondary structure contents, as they should be. + contents = [ + "HELIX 1 1 THR A 5 ASP A 20 1 16 \n", + "HELIX 2 2 THR A 28 LEU A 39 1 12 \n", + "HELIX 3 3 THR A 44 GLU A 54 1 11 \n", + "HELIX 4 4 ASP A 64 MET A 76 1 13 \n", + "HELIX 5 1 GLU B 82 ASP B 93 1 12 \n", + "HELIX 6 2 SER B 101 LEU B 112 1 12 \n", + "HELIX 7 3 THR B 117 ASP B 129 1 13 \n", + "HELIX 8 4 TYR B 138 THR B 146 1 9 \n", + "SHEET 1 A 2 TYR B 99 ILE B 100 0 \n", + "SHEET 2 A 2 VAL B 136 ASN B 137 -1 OVAL B 136 NILE B 100 \n" + ] + + # Check secondary structure contents of the created PDB file. + lines = file.readlines() + index = 0 + print("\n\nChecking the records:\n") + for i in range(len(lines)): + # Only secondary structure records. + if lines[i][:5] not in ['HELIX', 'SHEET']: + continue + + # Check and increment the index. + print(lines[i][:-1]) + self.assertEqual(contents[index], lines[i]) + index += 1 def test_read_gaussian_strychnine(self):