lib.software.bruker

1 ############################################################################### 2 # # 3 # Copyright (C) 2011-2013,2019 Edward d'Auvergne # 4 # # 5 # This file is part of the program relax (http://www.nmr-relax.com). # 6 # # 7 # This program is free software: you can redistribute it and/or modify # 8 # it under the terms of the GNU General Public License as published by # 9 # the Free Software Foundation, either version 3 of the License, or # 10 # (at your option) any later version. # 11 # # 12 # This program is distributed in the hope that it will be useful, # 13 # but WITHOUT ANY WARRANTY; without even the implied warranty of # 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15 # GNU General Public License for more details. # 16 # # 17 # You should have received a copy of the GNU General Public License # 18 # along with this program. If not, see <http://www.gnu.org/licenses/>. # 19 # # 20 ############################################################################### 21 22 # Module docstring. 23 """Module for the reading of Bruker Dynamics Centre (DC) files.""" 24 25 # Python module imports. 26 from re import search, split 27 from warnings import warn 28 29 # relax module imports. 30 from lib.errors import RelaxError 31 from lib.io import open_read_file 32 from lib.physical_constants import element_from_isotope 33 from lib.warnings import RelaxWarning 34 35

36 -def convert_relax_data(data):

37 """Determine the relaxation data from the given DC data. 38 39 @param data: The list of Tx, Tx error, and scaling factor for a given residue from the DC file. 40 @type data: list of str 41 """ 42 43 # Convert the value from Tx to Rx. 44 rx = 1.0 / float(data[0]) 45 46 # Remove the scaling. 47 rx_err = float(data[1]) / float(data[2]) 48 49 # Convert the Tx error to an Rx error. 50 rx_err = rx**2 * rx_err 51 52 # Return the value and error. 53 return rx, rx_err

54 55

56 -def create_object(file=None, dir=None):

57 """Parse the DC data file and create and return an object representation of the data. 58 59 @keyword file: The name of the file to open. 60 @type file: str 61 @keyword dir: The directory containing the file (defaults to the current directory if None). 62 @type dir: str or None 63 @return: The object representation of the Bruker DC file. 64 @rtype: DCObject instance 65 """ 66 67 # Extract the data from the file. 68 file_handle = open_read_file(file, dir) 69 lines = file_handle.readlines() 70 file_handle.close() 71 72 # Create the object. 73 obj = DCObject() 74 obj.populate(lines) 75 obj.process() 76 77 # Return the object. 78 return obj

79 80

81 -def get_res_num(data):

82 """Determine the residue number from the given DC data. 83 84 @param data: The list of residue info, split by whitespace, from the DC file. 85 @type data: list of str 86 """ 87 88 # Init. 89 res_num = None 90 91 # Split the data. 92 row = split('([0-9]+)', data) 93 94 # Loop over the new list. 95 for j in range(len(row)): 96 try: 97 res_num = int(row[j]) 98 except ValueError: 99 pass 100 101 # Return the value. 102 return ":%s" % res_num

103 104 105

106 -class DCObject:

107 """An object representation of the Bruker DC file data.""" 108

109 - def __init__(self):

110 """Initialise the object.""" 111 112 # The dictionary of header information. 113 self._header = {} 114 115 # The sections of the file. 116 self._sections = [] 117 118 # Initialise some structure. 119 self.ri_type = None 120 self.version = None

121 122

123 - def populate(self, lines):

124 """Populate the object with the file data. 125 126 @param lines: The Bruker DC file data with each list element being a line of the data file. 127 @type lines: list of str 128 """ 129 130 # Loop over the data. 131 in_sections = False 132 for i in range(len(lines)): 133 # Check the file. 134 if i == 0: 135 if lines[0].strip() != "$##1.0": 136 raise RelaxError("Unknown file format, Bruker DC files must start with $##1.0 on the first line.") 137 else: 138 continue 139 140 # Split the line. 141 row = split("\t", lines[i]) 142 143 # Strip the rubbish. 144 for j in range(len(row)): 145 row[j] = row[j].strip() 146 147 # Skip empty lines. 148 if len(row) == 0 or row == ['']: 149 continue 150 151 # Inside a new section. 152 if row[0] == "SECTION:": 153 # No longer in the header. 154 in_sections = True 155 156 # Create a new section. 157 if row[1] == "sample information": 158 self.sample_information = DCSampleInfo() 159 self._sections.append(self.sample_information) 160 elif row[1] == "relevant parameters": 161 self.parameters = DCParams() 162 self._sections.append(self.parameters) 163 elif row[1] == "integrals": 164 self.intensities = DCIntegrals(err=False, bc=False) 165 self._sections.append(self.intensities) 166 elif row[1] == "integral errors": 167 self.intensity_errors = DCIntegrals(err=True, bc=False) 168 self._sections.append(self.intensity_errors) 169 elif row[1] == "integrals back calculated from fit": 170 self.intensities_bc = DCIntegrals(err=True, bc=True) 171 self._sections.append(self.intensities_bc) 172 elif row[1] == "details": 173 self.details = DCDetails() 174 self._sections.append(self.details) 175 elif row[1] == "results": 176 self.results = DCResults() 177 self._sections.append(self.results) 178 179 # Unknown section. 180 else: 181 warn(RelaxWarning("The Bruker DC file section \"%s\" is unknown." % row[1])) 182 183 # Store the header info. 184 if not in_sections: 185 self._header[row[0]] = row[1] 186 187 # Or store the section info. 188 else: 189 self._sections[-1].add(row)

190 191

192 - def process(self):

193 """Process the Bruker DC data already present in the object.""" 194 195 # Experiment type. 196 if search('T1', self._header['Project:']): 197 self.ri_type = 'R1' 198 elif search('T2', self._header['Project:']): 199 self.ri_type = 'R2' 200 elif search('NOE', self._header['Project:']): 201 self.ri_type = 'NOE' 202 203 # The DC version. 204 if 'generated by:' in self._header: 205 self.version = self._header['generated by:'] 206 207 # Loop over the sections. 208 for section in self._sections: 209 section.process()

210 211 212

213 -class DCSection(object):

214 """Base class for the various Bruker DC sections.""" 215

216 - def __init__(self):

217 """Initialise the Bruker DC section object.""" 218 219 # The file data. 220 self._data = []

221 222

223 - def add(self, elements):

224 """Store the data. 225 226 @param elements: The Bruker DC file line split by tabs, with whitespace removed. 227 @type elements: list of str 228 """ 229 230 # Skip the section line. 231 if elements[0] == "SECTION:": 232 return 233 234 # Store the data. 235 self._data.append(elements)

236 237 238

239 -class DCDetails(DCSection):

240 """Class for the Bruker DC analysis information.""" 241

242 - def __init__(self):

243 """Initialise the Bruker DC section object.""" 244 245 # Initialise the base class. 246 super(DCDetails, self).__init__() 247 248 # Initialise some structures. 249 self.int_type = None

250 251

252 - def process(self):

253 """Process the Bruker DC data already present in the section object.""" 254 255 # Loop over the data. 256 for i in range(len(self._data)): 257 # Check for bad errors. 258 if self._data[i][0] == 'Systematic error estimation of data:': 259 # Badness. 260 if self._data[i][1] == 'worst case per peak scenario': 261 raise RelaxError("The errors estimation method \"worst case per peak scenario\" is not suitable for model-free analysis. Please go back to the DC and switch to \"average variance calculation\".") 262 263 # Extract the integration method. 264 if self._data[i][0] == 'Used integrals:': 265 # Peak heights. 266 if self._data[i][1] == 'peak intensities': 267 self.int_type = 'height' 268 269 # Peak volumes: 270 if self._data[i][1] == 'area integral': 271 self.int_type = 'volume'

272 273 274

275 -class DCIntegrals(DCSection):

276 """Class for the Bruker DC peak intensity information.""" 277

278 - def __init__(self, err=False, bc=False):

279 """Initialise the Bruker DC section object. 280 281 @keyword err: A flag which if True means that the data if for the peak intensity errors. 282 @type err: bool 283 @keyword bc: A flag which if True means that this is the back-calculated peak intensity data. 284 @type bc: bool 285 """ 286 287 # Initialise the base class. 288 super(DCIntegrals, self).__init__() 289 290 # Store the peak intensity type info. 291 self.err = err 292 self.bc = bc 293 294 # Initialise some structures. 295 self.ids = [] 296 self.relaxation_time = [] 297 self.peak_intensity = {}

298 299

300 - def process(self):

301 """Process the Bruker DC data already present in the section object.""" 302 303 # Loop over the data. 304 for i in range(len(self._data)): 305 # The mixing times. 306 if self._data[i][0] == 'Mixing time [s]:': 307 for j in range(1, len(self._data[i])): 308 self.relaxation_time.append(float(self._data[i][j])) 309 310 # The spectra names. 311 elif self._data[i][0] == 'Peak name': 312 for j in range(1, len(self._data[i])): 313 self.ids.append(self._data[i][j]) 314 315 # The peak intensities. 316 else: 317 # The residue info. 318 res_num = get_res_num(self._data[i][0]) 319 if res_num not in self.peak_intensity: 320 self.peak_intensity[res_num] = [] 321 322 # Store the data. 323 for j in range(1, len(self._data[i])): 324 self.peak_intensity[res_num].append(float(self._data[i][j]))

325 326 327

328 -class DCParams(DCSection):

329 """Class for the Bruker DC parameter information.""" 330

331 - def process(self):

332 """Process the Bruker DC data already present in the section object.""" 333 334 # Loop over the data. 335 for i in range(len(self._data)): 336 # Get the frequency, converting to Hz. 337 if self._data[i][0] == 'Proton frequency[MHz]:': 338 self.frq = float(self._data[i][1]) * 1e6

339 340 341

342 -class DCResults(DCSection):

343 """Class for the Bruker DC results.""" 344

345 - def __init__(self):

346 """Initialise the Bruker DC section object.""" 347 348 # Initialise the base class. 349 super(DCResults, self).__init__() 350 351 # Initialise some structures. 352 self.sequence = [] 353 self.f1 = {} 354 self.f2 = {} 355 self.I0 = {} 356 self.I0_err = {} 357 self.Tx = {} 358 self.Tx_err = {} 359 self.Tx_err_scale = {} 360 self.Rx = {} 361 self.Rx_err = {} 362 self.fit_info = {} 363 364 # Data indices. 365 self.indices = { 366 'f1': None, 367 'f2': None, 368 'I0': None, 369 'I0_err': None, 370 'Tx': None, 371 'Tx_err': None, 372 'Tx_err_scale': None, 373 'Rx': None, 374 'Rx_err': None, 375 'fit_info': None 376 }

377 378

379 - def process(self):

380 """Process the Bruker DC data already present in the section object.""" 381 382 # Loop over the data. 383 for i in range(len(self._data)): 384 # The metadata. 385 if self._data[i][0] == 'Peak name': 386 for j in range(1, len(self._data[i])): 387 if self._data[i][j] == 'F1 [ppm]': 388 self.indices['f1'] = j 389 elif self._data[i][j] == 'F2 [ppm]': 390 self.indices['f2'] = j 391 elif self._data[i][j] == 'Io': 392 self.indices['I0'] = j 393 elif self._data[i][j] == 'error' and self._data[i][j-1] == 'Io': 394 self.indices['I0_err'] = j 395 elif self._data[i][j] in ['T1 [s]', 'T2 [s]']: 396 self.indices['Tx'] = j 397 elif self._data[i][j] == 'error' and self._data[i][j-1] in ['T1 [s]', 'T2 [s]']: 398 self.indices['Tx_err'] = j 399 elif self._data[i][j] == 'errorScale' and self._data[i][j-2] in ['T1 [s]', 'T2 [s]']: 400 self.indices['Tx_err_scale'] = j 401 elif self._data[i][j] in ['R1 [rad/s]', 'R2 [rad/s]', 'NOE', 'NOE [ ]', 'NOE [none]']: 402 self.indices['Rx'] = j 403 elif self._data[i][j] in ['R1 sd [rad/s]', 'R2 sd [rad/s]']: 404 self.indices['Rx_err'] = j 405 elif self._data[i][j] == 'error' and self._data[i][j-1] in ['NOE', 'NOE [ ]', 'NOE [none]']: 406 self.indices['Rx_err'] = j 407 elif self._data[i][j] == 'fitInfo': 408 self.indices['fit_info'] = j 409 410 # Catch old PDC files (to fix https://web.archive.org/web/https://gna.org/bugs/?20152). 411 if self.indices['Rx'] == None: 412 raise RelaxError("The old Protein Dynamics Center (PDC) files with relaxation times but no relaxation rates are not supported.") 413 414 # The relaxation data. 415 else: 416 # The residue info. 417 res_id = get_res_num(self._data[i][0]) 418 self.sequence.append(res_id) 419 420 # Store the data. 421 if self.indices['f1'] != None: 422 self.f1[res_id] = float(self._data[i][self.indices['f1']]) 423 if self.indices['f2'] != None: 424 self.f2[res_id] = float(self._data[i][self.indices['f2']]) 425 if self.indices['I0'] != None: 426 self.I0[res_id] = float(self._data[i][self.indices['I0']]) 427 if self.indices['I0_err'] != None: 428 self.I0_err[res_id] = float(self._data[i][self.indices['I0_err']]) 429 if self.indices['Tx'] != None: 430 self.Tx[res_id] = float(self._data[i][self.indices['Tx']]) 431 if self.indices['Tx_err'] != None: 432 self.Tx_err[res_id] = float(self._data[i][self.indices['Tx_err']]) 433 if self.indices['Tx_err_scale'] != None: 434 self.Tx_err_scale[res_id] = float(self._data[i][self.indices['Tx_err_scale']]) 435 if self.indices['Rx'] != None: 436 self.Rx[res_id] = float(self._data[i][self.indices['Rx']]) 437 if self.indices['Rx_err'] != None: 438 self.Rx_err[res_id] = float(self._data[i][self.indices['Rx_err']]) 439 if self.indices['fit_info'] != None: 440 self.fit_info[res_id] = self._data[i][self.indices['fit_info']]

441 442 443

444 -class DCSampleInfo(DCSection):

445 """Class for the Bruker DC sample information.""" 446

447 - def process(self):

448 """Process the Bruker DC data already present in the section object.""" 449 450 # Loop over the data. 451 for i in range(len(self._data)): 452 # The labelling. 453 if self._data[i][0] == 'Labelling:': 454 # The spin isotope. 455 self.isotope = self._data[i][1] 456 457 # The name of the spins. 458 self.spin_name = split('([A-Z]+)', self._data[i][1])[1] 459 460 # The atom name. 461 self.atom_name = element_from_isotope(self.isotope)

462

Source Code for Module lib.software.bruker_dc