1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 """The spectrum user function definitions."""
24
25
26 from os import sep
27 import dep_check
28 if dep_check.wx_module:
29 from wx import FD_OPEN
30 else:
31 FD_OPEN = -1
32
33
34 from graphics import WIZARD_IMAGE_PATH
35 from pipe_control import spectrum
36 from user_functions.data import Uf_info; uf_info = Uf_info()
37 from user_functions.data import Uf_tables; uf_tables = Uf_tables()
38 from user_functions.objects import Desc_container
39
40
41
42 uf_class = uf_info.add_class('spectrum')
43 uf_class.title = "Class for supporting the input of spectral data."
44 uf_class.menu_text = "&spectrum"
45 uf_class.gui_icon = "relax.fid"
46
47
48
49 uf = uf_info.add_uf('spectrum.baseplane_rmsd')
50 uf.title = "Set the baseplane RMSD of a given spin in a spectrum for error analysis."
51 uf.title_short = "Baseplane RMSD setting."
52 uf.add_keyarg(
53 name = "error",
54 default = 0.0,
55 py_type = "num",
56 desc_short = "error",
57 desc = "The baseplane RMSD error value."
58 )
59 uf.add_keyarg(
60 name = "spectrum_id",
61 py_type = "str",
62 desc_short = "spectrum ID string",
63 desc = "The spectrum ID string.",
64 wiz_element_type = 'combo',
65 wiz_combo_iter = spectrum.get_ids,
66 wiz_read_only = True
67 )
68 uf.add_keyarg(
69 name = "spin_id",
70 py_type = "str",
71 desc_short = "spin ID string",
72 desc = "The spin ID string.",
73 can_be_none = True
74 )
75
76 uf.desc.append(Desc_container())
77 uf.desc[-1].add_paragraph("The spectrum ID identifies the spectrum associated with the error and must correspond to a previously loaded set of intensities. If the spin ID is unset, then the error value for all spins will be set to the supplied value.")
78 uf.backend = spectrum.baseplane_rmsd
79 uf.menu_text = "&baseplane_rmsd"
80 uf.gui_icon = "oxygen.actions.edit-rename"
81 uf.wizard_size = (800, 500)
82 uf.wizard_image = WIZARD_IMAGE_PATH + 'spectrum' + sep + 'spectrum_200.png'
83
84
85
86 uf = uf_info.add_uf('spectrum.delete')
87 uf.title = "Delete the spectral data corresponding to the spectrum ID string."
88 uf.title_short = "Spectral data deletion."
89 uf.add_keyarg(
90 name = "spectrum_id",
91 py_type = "str",
92 desc_short = "spectrum ID string",
93 desc = "The unique spectrum ID string.",
94 wiz_element_type = 'combo',
95 wiz_combo_iter = spectrum.get_ids,
96 wiz_read_only = True
97 )
98
99 uf.desc.append(Desc_container())
100 uf.desc[-1].add_paragraph("The spectral data corresponding to the given spectrum ID string will be removed from the current data pipe.")
101
102 uf.desc.append(Desc_container("Prompt examples"))
103 uf.desc[-1].add_paragraph("To delete the peak height data corresponding to the ID 'R1 ncyc5', type:")
104 uf.desc[-1].add_prompt("relax> spectrum.delete('R1 ncyc5')")
105 uf.backend = spectrum.delete
106 uf.menu_text = "&delete"
107 uf.gui_icon = "oxygen.actions.list-remove"
108 uf.wizard_size = (700, 400)
109 uf.wizard_image = WIZARD_IMAGE_PATH + 'spectrum' + sep + 'spectrum_200.png'
110
111
112
113 uf = uf_info.add_uf('spectrum.error_analysis')
114 uf.title = "Perform an error analysis for peak intensities."
115 uf.title_short = "Peak intensity error analysis."
116 uf.add_keyarg(
117 name = "subset",
118 py_type = "str_list",
119 desc_short = "subset spectrum IDs",
120 desc = "The list of spectrum ID strings to restrict the error analysis to.",
121 wiz_combo_iter = spectrum.get_ids,
122 wiz_read_only = True,
123 can_be_none = True
124 )
125
126 uf.desc.append(Desc_container())
127 uf.desc[-1].add_paragraph("This user function must only be called after all peak intensities have been loaded and all other necessary spectral information set. This includes the baseplane RMSD and the number of points used in volume integration, both of which are only used if spectra have not been replicated.")
128 uf.desc[-1].add_paragraph("The error analysis can be restricted to a subset of the loaded spectral data. This is useful, for example, if half the spectra have been collected on one spectrometer and the other half on a different spectrometer.")
129 uf.desc[-1].add_paragraph("Six different types of error analysis are supported depending on whether peak heights or volumes are supplied, whether noise is determined from replicated spectra or the RMSD of the baseplane noise, and whether all spectra or only a subset have been duplicated. These are:")
130 table = uf_tables.add_table(label="table: peak intensity error analysis", caption="The six peak intensity error analysis types.")
131 table.add_headings(["Int type", "Noise source", "Error scope"])
132 table.add_row(["Heights", "RMSD baseplane", "One sigma per peak per spectrum"])
133 table.add_row(["Heights", "Partial duplicate + variance averaging", "One sigma for all peaks, all spectra"])
134 table.add_row(["Heights", "All replicated + variance averaging", "One sigma per replicated spectra set"])
135 table.add_row(["Volumes", "RMSD baseplane", "One sigma per peak per spectrum"])
136 table.add_row(["Volumes", "Partial duplicate + variance averaging", "One sigma for all peaks, all spectra"])
137 table.add_row(["Volumes", "All replicated + variance averaging", "One sigma per replicated spectra set"])
138 uf.desc[-1].add_table(table.label)
139
140 uf.desc.append(Desc_container("Peak heights with baseplane noise RMSD"))
141 uf.desc[-1].add_paragraph("When none of the spectra have been replicated, then the peak height errors are calculated using the RMSD of the baseplane noise, the value of which is set by the spectrum.baseplane_rmsd user function. This results in a different error per peak per spectrum. The standard deviation error measure for the peak height, sigma_I, is set to the RMSD value.")
142
143 uf.desc.append(Desc_container("Peak heights with partially replicated spectra"))
144 uf.desc[-1].add_paragraph("When spectra are replicated, the variance for a single spin at a single replicated spectra set is calculated by the formula")
145 uf.desc[-1].add_item_list_element(None, "sigma^2 = sum({Ii - Iav}^2) / (n - 1),")
146 uf.desc[-1].add_paragraph("where sigma^2 is the variance, sigma is the standard deviation, n is the size of the replicated spectra set with i being the corresponding index, Ii is the peak intensity for spectrum i, and Iav is the mean over all spectra i.e. the sum of all peak intensities divided by n.")
147 uf.desc[-1].add_paragraph("As the value of n in the above equation is always very low since normally only a couple of spectra are collected per replicated spectra set, the variance of all spins is averaged for a single replicated spectra set. Although this results in all spins having the same error, the accuracy of the error estimate is significantly improved.")
148 uf.desc[-1].add_paragraph("If there are in addition to the replicated spectra loaded peak intensities which only consist of a single spectrum, i.e. not all spectra are replicated, then the variances of replicated replicated spectra sets will be averaged. This will be used for the entire experiment so that there will be only a single error value for all spins and for all spectra.")
149
150 uf.desc.append(Desc_container("Peak heights with all spectra replicated"))
151 uf.desc[-1].add_paragraph("If all spectra are collected in duplicate (triplicate or higher number of spectra are supported), the each replicated spectra set will have its own error estimate. The error for a single peak is calculated as when partially replicated spectra are collected, and these are again averaged to give a single error per replicated spectra set. However as all replicated spectra sets will have their own error estimate, variance averaging across all spectra sets will not be performed.")
152
153 uf.desc.append(Desc_container("Peak volumes with baseplane noise RMSD"))
154 uf.desc[-1].add_paragraph("The method of error analysis when no spectra have been replicated and peak volumes are used is highly dependent on the integration method. Many methods simply sum the number of points within a fixed region, either a box or oval object. The number of points used, N, must be specified by another user function in this class. Then the error is simply given by the sum of variances:")
155 uf.desc[-1].add_item_list_element(None, "sigma_vol^2 = sigma_i^2 * N,")
156 uf.desc[-1].add_paragraph("where sigma_vol is the standard deviation of the volume, sigma_i is the standard deviation of a single point assumed to be equal to the RMSD of the baseplane noise, and N is the total number of points used in the summation integration method. For a box integration method, this converts to the Nicholson, Kay, Baldisseri, Arango, Young, Bax, and Torchia (1992) Biochemistry, 31: 5253-5263 equation:")
157 uf.desc[-1].add_item_list_element(None, "sigma_vol = sigma_i * sqrt(n*m),")
158 uf.desc[-1].add_paragraph("where n and m are the dimensions of the box. Note that a number of programs, for example peakint (http://hugin.ethz.ch/wuthrich/software/xeasy/xeasy_m15.html) does not use all points within the box. And if the number N can not be determined, this category of error analysis is not possible.")
159 uf.desc[-1].add_paragraph("Also note that non-point summation methods, for example when line shape fitting is used to determine peak volumes, the equations above cannot be used. Hence again this category of error analysis cannot be used. This is the case for one of the three integration methods used by Sparky (http://www.cgl.ucsf.edu/home/sparky/manual/peaks.html#Integration). And if fancy techniques are used, for example as Cara does to deconvolute overlapping peaks (http://www.cara.ethz.ch/Wiki/Integration), this again makes this error analysis impossible.")
160
161 uf.desc.append(Desc_container("Peak volumes with partially replicated spectra"))
162 uf.desc[-1].add_paragraph("When peak volumes are measured by any integration method and a few of the spectra are replicated, then the intensity errors are calculated identically as described in the 'Peak heights with partially replicated spectra' section above.")
163
164 uf.desc.append(Desc_container("Peak volumes with all spectra replicated"))
165 uf.desc[-1].add_paragraph("With all spectra replicated and again using any integration methodology, the intensity errors can be calculated as described in the 'Peak heights with all spectra replicated' section above.")
166 uf.backend = spectrum.error_analysis
167 uf.menu_text = "&error_analysis"
168 uf.gui_icon = "oxygen.categories.applications-education"
169 uf.wizard_height_desc = 530
170 uf.wizard_size = (1000, 700)
171 uf.wizard_image = WIZARD_IMAGE_PATH + 'spectrum' + sep + 'spectrum_200.png'
172 uf.wizard_apply_button = False
173
174
175
176 uf = uf_info.add_uf('spectrum.integration_points')
177 uf.title = "Set the number of summed points used in volume integration of a given spin in a spectrum."
178 uf.title_short = "Number of integration points."
179 uf.add_keyarg(
180 name = "N",
181 py_type = "int",
182 min = 1,
183 max = 10000000,
184 desc_short = "number of summed points",
185 desc = "The number of points used by the summation volume integration method."
186 )
187 uf.add_keyarg(
188 name = "spectrum_id",
189 py_type = "str",
190 desc_short = "spectrum ID string",
191 desc = "The spectrum ID string.",
192 wiz_element_type = 'combo',
193 wiz_combo_iter = spectrum.get_ids,
194 wiz_read_only = True
195 )
196 uf.add_keyarg(
197 name = "spin_id",
198 py_type = "str",
199 desc_short = "spin ID string",
200 desc = "Restrict setting the number to certain spins.",
201 can_be_none = True
202 )
203
204 uf.desc.append(Desc_container())
205 uf.desc[-1].add_paragraph("For a complete description of which integration methods and how many points N are used for different integration techniques, please see the spectrum.error_analysis user function documentation.")
206 uf.desc[-1].add_paragraph("The spectrum ID identifies the spectrum associated with the value of N and must correspond to a previously loaded set of intensities. If the spin ID is unset, then the number of summed points for all spins will be set to the supplied value.")
207 uf.backend = spectrum.integration_points
208 uf.menu_text = "&integration_points"
209 uf.gui_icon = "oxygen.actions.edit-rename"
210 uf.wizard_size = (900, 600)
211 uf.wizard_image = WIZARD_IMAGE_PATH + 'spectrum' + sep + 'spectrum_200.png'
212
213
214
215 uf = uf_info.add_uf('spectrum.read_intensities')
216 uf.title = "Read peak intensities from a file."
217 uf.title_short = "Peak intensity reading."
218 uf.add_keyarg(
219 name = "file",
220 py_type = "str",
221 arg_type = "file sel",
222 desc_short = "file name",
223 desc = "The name of the file containing the intensity data.",
224 wiz_filesel_style = FD_OPEN
225 )
226 uf.add_keyarg(
227 name = "dir",
228 py_type = "str",
229 arg_type = "dir",
230 desc_short = "directory name",
231 desc = "The directory where the file is located.",
232 can_be_none = True
233 )
234 uf.add_keyarg(
235 name = "spectrum_id",
236 py_type = "str_or_str_list",
237 desc_short = "spectrum ID string",
238 desc = "The unique spectrum ID string or list of strings to associate with the peak intensity values. If 'auto' is provided for a NMRPipe seriesTab formatted file, the IDs are auto generated in form of Z_A{i}."
239 )
240 uf.add_keyarg(
241 name = "dim",
242 default = 1,
243 py_type = "int",
244 min = 1,
245 desc_short = "spectral dimension to read",
246 desc = "Associate the data with the spins of any dimension in the peak list. This defaults to w1, the heteronucleus in HSQC type experiments."
247 )
248 uf.add_keyarg(
249 name = "int_method",
250 default = "height",
251 py_type = "str",
252 desc_short = "peak integration method",
253 desc = "The method by which peaks were integrated.",
254 wiz_element_type = "combo",
255 wiz_combo_choices = ["height", "point sum", "other"],
256 wiz_read_only = True
257 )
258 uf.add_keyarg(
259 name = "int_col",
260 py_type = "int_or_int_list",
261 desc_short = "intensity column",
262 desc = "The optional column containing the peak intensity data (used by the generic intensity file format, or if the intensities are in a non-standard column).",
263 can_be_none = True
264 )
265 uf.add_keyarg(
266 name = "spin_id_col",
267 py_type = "int",
268 arg_type = "free format",
269 desc_short = "spin ID string column",
270 desc = "The spin ID string column used by the generic intensity file format (an alternative to the mol, res, and spin name and number columns).",
271 can_be_none = True
272 )
273 uf.add_keyarg(
274 name = "mol_name_col",
275 py_type = "int",
276 arg_type = "free format",
277 desc_short = "molecule name column",
278 desc = "The molecule name column used by the generic intensity file format (alternative to the spin ID column).",
279 can_be_none = True
280 )
281 uf.add_keyarg(
282 name = "res_num_col",
283 py_type = "int",
284 arg_type = "free format",
285 desc_short = "residue number column",
286 desc = "The residue number column used by the generic intensity file format (alternative to the spin ID column).",
287 can_be_none = True
288 )
289 uf.add_keyarg(
290 name = "res_name_col",
291 py_type = "int",
292 arg_type = "free format",
293 desc_short = "residue name column",
294 desc = "The residue name column used by the generic intensity file format (alternative to the spin ID column).",
295 can_be_none = True
296 )
297 uf.add_keyarg(
298 name = "spin_num_col",
299 py_type = "int",
300 arg_type = "free format",
301 desc_short = "spin number column",
302 desc = "The spin number column used by the generic intensity file format (alternative to the spin ID column).",
303 can_be_none = True
304 )
305 uf.add_keyarg(
306 name = "spin_name_col",
307 py_type = "int",
308 arg_type = "free format",
309 desc_short = "spin name column",
310 desc = "The spin name column used by the generic intensity file format (alternative to the spin ID column).",
311 can_be_none = True
312 )
313 uf.add_keyarg(
314 name = "sep",
315 py_type = "str",
316 arg_type = "free format",
317 desc_short = "column separator",
318 desc = "The column separator used by the generic intensity format (the default is white space).",
319 can_be_none = True
320 )
321 uf.add_keyarg(
322 name = "spin_id",
323 py_type = "str",
324 desc_short = "spin ID string",
325 desc = "The spin ID string used to restrict the loading of data to certain spin subsets.",
326 can_be_none = True
327 )
328 uf.add_keyarg(
329 name = "ncproc",
330 py_type = "int",
331 desc_short = "Bruker ncproc parameter",
332 desc = "The Bruker specific FID intensity scaling factor.",
333 can_be_none = True
334 )
335
336 uf.desc.append(Desc_container())
337 uf.desc[-1].add_paragraph("The peak intensity can either be from peak heights or peak volumes.")
338 uf.desc[-1].add_paragraph("The spectrum ID is a label which is subsequently utilised by other user functions. If this identifier matches that of a previously loaded set of intensities, then this indicates a replicated spectrum.")
339 uf.desc[-1].add_paragraph("The spectral dimension is used to specify if the intensity data should be loaded into the spins identified by the first dimension w1, second dimension w2, etc.")
340 uf.desc[-1].add_paragraph("The integration method is required for the subsequent error analysis. When peak heights are measured, this should be set to 'height'. Volume integration methods are a bit varied and hence two values are accepted. If the volume integration involves pure point summation, with no deconvolution algorithms or other methods affecting peak heights, then the value should be set to 'point sum'. All other volume integration methods, e.g. line shape fitting, the value should be set to 'other'.")
341 uf.desc[-1].add_paragraph("If a series of intensities extracted from Bruker FID files processed in Topspin or XWinNMR are to be compared, the ncproc parameter may need to be supplied. This is because this FID is stored using integer representation and is scaled using ncproc to avoid numerical truncation artifacts. If two spectra have significantly different maximal intensities, then ncproc will be different for both. The intensity scaling is binary, i.e. 2**ncproc. Therefore if spectrum A has an ncproc of 6 and and spectrum B a value of 7, then a reference intensity in B will be double that of A. Internally, relax stores the intensities scaled by 2**ncproc.")
342
343 uf.desc.append(Desc_container("File formats"))
344 uf.desc[-1].add_paragraph("The peak list or intensity file will be automatically determined.")
345 uf.desc[-1].add_paragraph("Sparky peak list: The file should be a Sparky peak list saved after typing the command 'lt'. The default is to assume that columns 0, 1, 2, and 3 (1st, 2nd, 3rd, and 4th) contain the Sparky assignment, w1, w2, and peak intensity data respectively. The frequency data w1 and w2 are ignored while the peak intensity data can either be the peak height or volume displayed by changing the window options. If the peak intensity data is not within column 3, set the integration column to the appropriate number (column numbering starts from 0 rather than 1).")
346 uf.desc[-1].add_paragraph("XEasy peak list: The file should be the saved XEasy text window output of the list peak entries command, 'tw' followed by 'le'. As the columns are fixed, the peak intensity column is hardwired to number 10 (the 11th column) which contains either the peak height or peak volume data. Because the columns are fixed, the integration column number will be ignored.")
347 uf.desc[-1].add_paragraph("NMRView: The file should be a NMRView peak list. The default is to use column 16 (which contains peak heights) for peak intensities. To use use peak volumes (or evolumes), int_col must be set to 15.")
348 uf.desc[-1].add_paragraph("NMRPipe seriesTab: The file should be a NMRPipe-format Spectral Series list. If the spectrum_id='auto', the IDs are auto generated in form of Z_A{i}.")
349 uf.desc[-1].add_paragraph("Generic intensity file: This is a generic format which can be created by scripting to support non-supported peak lists. It should contain in the first few columns enough information to identify the spin. This can include columns for the molecule name, residue number, residue name, spin number, and spin name. Alternatively a spin ID string column can be used. The peak intensities can be placed in another column specified by the integration column number. Intensities from multiple spectra can be placed into different columns, and these can then be specified simultaneously by setting the integration column value to a list of columns. This list must be matched by setting the spectrum ID to a list of the same length. If columns are delimited by a character other than whitespace, this can be specified with the column separator. The spin ID can be used to restrict the loading to specific spin subsets.")
350
351 uf.desc.append(Desc_container("Prompt examples"))
352 uf.desc[-1].add_paragraph("To read the reference and saturated spectra peak heights from the Sparky formatted files 'ref.list' and 'sat.list', type:")
353 uf.desc[-1].add_prompt("relax> spectrum.read_intensities(file='ref.list', spectrum_id='ref')")
354 uf.desc[-1].add_prompt("relax> spectrum.read_intensities(file='sat.list', spectrum_id='sat')")
355 uf.desc[-1].add_paragraph("To read the reference and saturated spectra peak heights from the XEasy formatted files 'ref.text' and 'sat.text', type:")
356 uf.desc[-1].add_prompt("relax> spectrum.read_intensities(file='ref.text', spectrum_id='ref')")
357 uf.desc[-1].add_prompt("relax> spectrum.read_intensities(file='sat.text', spectrum_id='sat')")
358 uf.backend = spectrum.read
359 uf.menu_text = "&read_intensities"
360 uf.gui_icon = "oxygen.actions.document-open"
361 uf.wizard_height_desc = 300
362 uf.wizard_size = (1000, 750)
363 uf.wizard_image = WIZARD_IMAGE_PATH + 'spectrum' + sep + 'spectrum_200.png'
364
365
366
367 uf = uf_info.add_uf('spectrum.replicated')
368 uf.title = "Specify which spectra are replicates of each other."
369 uf.title_short = "Replicate spectra."
370 uf.add_keyarg(
371 name = "spectrum_ids",
372 py_type = "str_or_str_list",
373 desc_short = "spectrum ID strings",
374 desc = "The list of replicated spectra ID strings.",
375 wiz_element_type = 'combo_list',
376 wiz_combo_iter = spectrum.get_ids,
377 wiz_combo_list_min = 2,
378 wiz_read_only = True
379 )
380
381 uf.desc.append(Desc_container())
382 uf.desc[-1].add_paragraph("This is used to identify which of the loaded spectra are replicates of each other. Specifying the replicates is essential for error analysis if the baseplane RMSD has not been supplied.")
383
384 uf.desc.append(Desc_container("Prompt examples"))
385 uf.desc[-1].add_paragraph("To specify that the NOE spectra labelled 'ref1', 'ref2', and 'ref3' are the same spectrum replicated, type one of:")
386 uf.desc[-1].add_prompt("relax> spectrum.replicated(['ref1', 'ref2', 'ref3'])")
387 uf.desc[-1].add_prompt("relax> spectrum.replicated(spectrum_ids=['ref1', 'ref2', 'ref3'])")
388 uf.desc[-1].add_paragraph("To specify that the two R2 spectra 'ncyc2' and 'ncyc2b' are the same time point, type:")
389 uf.desc[-1].add_prompt("relax> spectrum.replicated(['ncyc2', 'ncyc2b'])")
390 uf.backend = spectrum.replicated
391 uf.menu_text = "re&plicated"
392 uf.gui_icon = "oxygen.actions.edit-rename"
393 uf.wizard_size = (700, 500)
394 uf.wizard_image = WIZARD_IMAGE_PATH + 'spectrum' + sep + 'spectrum_200.png'
395