1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 """The spectrum user function definitions."""
24
25
26 from os import sep
27 import dep_check
28 if dep_check.wx_module:
29 from wx import FD_OPEN
30 else:
31 FD_OPEN = -1
32
33
34 from generic_fns import spectrum
35 from graphics import WIZARD_IMAGE_PATH
36 from user_functions.data import Uf_info; uf_info = Uf_info()
37 from user_functions.data import Uf_tables; uf_tables = Uf_tables()
38 from user_functions.objects import Desc_container
39
40
41
42 uf_class = uf_info.add_class('spectrum')
43 uf_class.title = "Class for supporting the input of spectral data."
44 uf_class.menu_text = "&spectrum"
45 uf_class.gui_icon = "relax.fid"
46
47
48
49 uf = uf_info.add_uf('spectrum.baseplane_rmsd')
50 uf.title = "Set the baseplane RMSD of a given spin in a spectrum for error analysis."
51 uf.title_short = "Baseplane RMSD setting."
52 uf.add_keyarg(
53 name = "error",
54 default = 0.0,
55 py_type = "num",
56 desc_short = "error",
57 desc = "The baseplane RMSD error value."
58 )
59 uf.add_keyarg(
60 name = "spectrum_id",
61 py_type = "str",
62 desc_short = "spectrum ID string",
63 desc = "The spectrum ID string.",
64 wiz_element_type = 'combo',
65 wiz_combo_iter = spectrum.get_ids,
66 wiz_read_only = True
67 )
68 uf.add_keyarg(
69 name = "spin_id",
70 py_type = "str",
71 desc_short = "spin ID string",
72 desc = "The spin ID string.",
73 can_be_none = True
74 )
75
76 uf.desc.append(Desc_container())
77 uf.desc[-1].add_paragraph("The spectrum ID identifies the spectrum associated with the error and must correspond to a previously loaded set of intensities. If the spin ID is unset, then the error value for all spins will be set to the supplied value.")
78 uf.backend = spectrum.baseplane_rmsd
79 uf.menu_text = "&baseplane_rmsd"
80 uf.gui_icon = "oxygen.actions.edit-rename"
81 uf.wizard_size = (800, 500)
82 uf.wizard_image = WIZARD_IMAGE_PATH + 'spectrum' + sep + 'spectrum_200.png'
83
84
85
86 uf = uf_info.add_uf('spectrum.delete')
87 uf.title = "Delete the spectral data corresponding to the spectrum ID string."
88 uf.title_short = "Spectral data deletion."
89 uf.add_keyarg(
90 name = "spectrum_id",
91 py_type = "str",
92 desc_short = "spectrum ID string",
93 desc = "The unique spectrum ID string.",
94 wiz_element_type = 'combo',
95 wiz_combo_iter = spectrum.get_ids,
96 wiz_read_only = True
97 )
98
99 uf.desc.append(Desc_container())
100 uf.desc[-1].add_paragraph("The spectral data corresponding to the given spectrum ID string will be removed from the current data pipe.")
101
102 uf.desc.append(Desc_container("Prompt examples"))
103 uf.desc[-1].add_paragraph("To delete the peak height data corresponding to the ID 'R1 ncyc5', type:")
104 uf.desc[-1].add_prompt("relax> spectrum.delete('R1 ncyc5')")
105 uf.backend = spectrum.delete
106 uf.menu_text = "&delete"
107 uf.gui_icon = "oxygen.actions.list-remove"
108 uf.wizard_size = (700, 400)
109 uf.wizard_image = WIZARD_IMAGE_PATH + 'spectrum' + sep + 'spectrum_200.png'
110
111
112
113 uf = uf_info.add_uf('spectrum.error_analysis')
114 uf.title = "Perform an error analysis for peak intensities."
115 uf.title_short = "Peak intensity error analysis."
116
117 uf.desc.append(Desc_container())
118 uf.desc[-1].add_paragraph("This user function must only be called after all peak intensities have been loaded and all other necessary spectral information set. This includes the baseplane RMSD and the number of points used in volume integration, both of which are only used if spectra have not been replicated.")
119 uf.desc[-1].add_paragraph("Six different types of error analysis are supported depending on whether peak heights or volumes are supplied, whether noise is determined from replicated spectra or the RMSD of the baseplane noise, and whether all spectra or only a subset have been duplicated. These are:")
120 table = uf_tables.add_table(label="table: peak intensity error analysis", caption="The six peak intensity error analysis types.")
121 table.add_headings(["Int type", "Noise source", "Error scope"])
122 table.add_row(["Heights", "RMSD baseplane", "One sigma per peak per spectrum"])
123 table.add_row(["Heights", "Partial duplicate + variance averaging", "One sigma for all peaks, all spectra"])
124 table.add_row(["Heights", "All replicated + variance averaging", "One sigma per replicated spectra set"])
125 table.add_row(["Volumes", "RMSD baseplane", "One sigma per peak per spectrum"])
126 table.add_row(["Volumes", "Partial duplicate + variance averaging", "One sigma for all peaks, all spectra"])
127 table.add_row(["Volumes", "All replicated + variance averaging", "One sigma per replicated spectra set"])
128 uf.desc[-1].add_table(table.label)
129
130 uf.desc.append(Desc_container("Peak heights with baseplane noise RMSD"))
131 uf.desc[-1].add_paragraph("When none of the spectra have been replicated, then the peak height errors are calculated using the RMSD of the baseplane noise, the value of which is set by the spectrum.baseplane_rmsd user function. This results in a different error per peak per spectrum. The standard deviation error measure for the peak height, sigma_I, is set to the RMSD value.")
132
133 uf.desc.append(Desc_container("Peak heights with partially replicated spectra"))
134 uf.desc[-1].add_paragraph("When spectra are replicated, the variance for a single spin at a single replicated spectra set is calculated by the formula")
135 uf.desc[-1].add_item_list_element(None, "sigma^2 = sum({Ii - Iav}^2) / (n - 1),")
136 uf.desc[-1].add_paragraph("where sigma^2 is the variance, sigma is the standard deviation, n is the size of the replicated spectra set with i being the corresponding index, Ii is the peak intensity for spectrum i, and Iav is the mean over all spectra i.e. the sum of all peak intensities divided by n.")
137 uf.desc[-1].add_paragraph("As the value of n in the above equation is always very low since normally only a couple of spectra are collected per replicated spectra set, the variance of all spins is averaged for a single replicated spectra set. Although this results in all spins having the same error, the accuracy of the error estimate is significantly improved.")
138 uf.desc[-1].add_paragraph("If there are in addition to the replicated spectra loaded peak intensities which only consist of a single spectrum, i.e. not all spectra are replicated, then the variances of replicated replicated spectra sets will be averaged. This will be used for the entire experiment so that there will be only a single error value for all spins and for all spectra.")
139
140 uf.desc.append(Desc_container("Peak heights with all spectra replicated"))
141 uf.desc[-1].add_paragraph("If all spectra are collected in duplicate (triplicate or higher number of spectra are supported), the each replicated spectra set will have its own error estimate. The error for a single peak is calculated as when partially replicated spectra are collected, and these are again averaged to give a single error per replicated spectra set. However as all replicated spectra sets will have their own error estimate, variance averaging across all spectra sets will not be performed.")
142
143 uf.desc.append(Desc_container("Peak volumes with baseplane noise RMSD"))
144 uf.desc[-1].add_paragraph("The method of error analysis when no spectra have been replicated and peak volumes are used is highly dependent on the integration method. Many methods simply sum the number of points within a fixed region, either a box or oval object. The number of points used, N, must be specified by another user function in this class. Then the error is simply given by the sum of variances:")
145 uf.desc[-1].add_item_list_element(None, "sigma_vol^2 = sigma_i^2 * N,")
146 uf.desc[-1].add_paragraph("where sigma_vol is the standard deviation of the volume, sigma_i is the standard deviation of a single point assumed to be equal to the RMSD of the baseplane noise, and N is the total number of points used in the summation integration method. For a box integration method, this converts to the Nicholson, Kay, Baldisseri, Arango, Young, Bax, and Torchia (1992) Biochemistry, 31: 5253-5263 equation:")
147 uf.desc[-1].add_item_list_element(None, "sigma_vol = sigma_i * sqrt(n*m),")
148 uf.desc[-1].add_paragraph("where n and m are the dimensions of the box. Note that a number of programs, for example peakint (http://hugin.ethz.ch/wuthrich/software/xeasy/xeasy_m15.html) does not use all points within the box. And if the number N can not be determined, this category of error analysis is not possible.")
149 uf.desc[-1].add_paragraph("Also note that non-point summation methods, for example when line shape fitting is used to determine peak volumes, the equations above cannot be used. Hence again this category of error analysis cannot be used. This is the case for one of the three integration methods used by Sparky (http://www.cgl.ucsf.edu/home/sparky/manual/peaks.html#Integration). And if fancy techniques are used, for example as Cara does to deconvolute overlapping peaks (http://www.cara.ethz.ch/Wiki/Integration), this again makes this error analysis impossible.")
150
151 uf.desc.append(Desc_container("Peak volumes with partially replicated spectra"))
152 uf.desc[-1].add_paragraph("When peak volumes are measured by any integration method and a few of the spectra are replicated, then the intensity errors are calculated identically as described in the 'Peak heights with partially replicated spectra' section above.")
153
154 uf.desc.append(Desc_container("Peak volumes with all spectra replicated"))
155 uf.desc[-1].add_paragraph("With all spectra replicated and again using any integration methodology, the intensity errors can be calculated as described in the 'Peak heights with all spectra replicated' section above.")
156 uf.backend = spectrum.error_analysis
157 uf.menu_text = "&error_analysis"
158 uf.gui_icon = "oxygen.categories.applications-education"
159 uf.wizard_height_desc = 550
160 uf.wizard_size = (1000, 700)
161 uf.wizard_image = WIZARD_IMAGE_PATH + 'spectrum' + sep + 'spectrum_200.png'
162 uf.wizard_apply_button = False
163
164
165
166 uf = uf_info.add_uf('spectrum.integration_points')
167 uf.title = "Set the number of summed points used in volume integration of a given spin in a spectrum."
168 uf.title_short = "Number of integration points."
169 uf.add_keyarg(
170 name = "N",
171 py_type = "int",
172 min = 1,
173 max = 10000000,
174 desc_short = "number of summed points",
175 desc = "The number of points used by the summation volume integration method."
176 )
177 uf.add_keyarg(
178 name = "spectrum_id",
179 py_type = "str",
180 desc_short = "spectrum ID string",
181 desc = "The spectrum ID string.",
182 wiz_element_type = 'combo',
183 wiz_combo_iter = spectrum.get_ids,
184 wiz_read_only = True
185 )
186 uf.add_keyarg(
187 name = "spin_id",
188 py_type = "str",
189 desc_short = "spin ID string",
190 desc = "Restrict setting the number to certain spins.",
191 can_be_none = True
192 )
193
194 uf.desc.append(Desc_container())
195 uf.desc[-1].add_paragraph("For a complete description of which integration methods and how many points N are used for different integration techniques, please see the spectrum.error_analysis user function documentation.")
196 uf.desc[-1].add_paragraph("The spectrum ID identifies the spectrum associated with the value of N and must correspond to a previously loaded set of intensities. If the spin ID is unset, then the number of summed points for all spins will be set to the supplied value.")
197 uf.backend = spectrum.integration_points
198 uf.menu_text = "&integration_points"
199 uf.gui_icon = "oxygen.actions.edit-rename"
200 uf.wizard_size = (900, 600)
201 uf.wizard_image = WIZARD_IMAGE_PATH + 'spectrum' + sep + 'spectrum_200.png'
202
203
204
205 uf = uf_info.add_uf('spectrum.read_intensities')
206 uf.title = "Read peak intensities from a file."
207 uf.title_short = "Peak intensity reading."
208 uf.add_keyarg(
209 name = "file",
210 py_type = "str",
211 arg_type = "file sel",
212 desc_short = "file name",
213 desc = "The name of the file containing the intensity data.",
214 wiz_filesel_style = FD_OPEN
215 )
216 uf.add_keyarg(
217 name = "dir",
218 py_type = "str",
219 arg_type = "dir",
220 desc_short = "directory name",
221 desc = "The directory where the file is located.",
222 can_be_none = True
223 )
224 uf.add_keyarg(
225 name = "spectrum_id",
226 py_type = "str",
227 desc_short = "spectrum ID string",
228 desc = "The unique spectrum ID string to associate with the peak intensity values."
229 )
230 uf.add_keyarg(
231 name = "heteronuc",
232 default = "N",
233 py_type = "str",
234 desc_short = "heteronucleus name",
235 desc = "The name of the heteronucleus as specified in the peak intensity file."
236 )
237 uf.add_keyarg(
238 name = "proton",
239 default = "HN",
240 py_type = "str",
241 desc_short = "proton name",
242 desc = "The name of the proton as specified in the peak intensity file."
243 )
244 uf.add_keyarg(
245 name = "int_method",
246 default = "height",
247 py_type = "str",
248 desc_short = "peak integration method",
249 desc = "The method by which peaks were integrated.",
250 wiz_element_type = "combo",
251 wiz_combo_choices = ["height", "point sum", "other"],
252 wiz_read_only = True
253 )
254 uf.add_keyarg(
255 name = "int_col",
256 py_type = "int_or_int_list",
257 desc_short = "intensity column",
258 desc = "The optional column containing the peak intensity data (used by the generic intensity file format, or if the intensities are in a non-standard column).",
259 can_be_none = True
260 )
261 uf.add_keyarg(
262 name = "spin_id_col",
263 py_type = "int",
264 arg_type = "free format",
265 desc_short = "spin ID string column",
266 desc = "The spin ID string column used by the generic intensity file format (an alternative to the mol, res, and spin name and number columns).",
267 can_be_none = True
268 )
269 uf.add_keyarg(
270 name = "mol_name_col",
271 py_type = "int",
272 arg_type = "free format",
273 desc_short = "molecule name column",
274 desc = "The molecule name column used by the generic intensity file format (alternative to the spin ID column).",
275 can_be_none = True
276 )
277 uf.add_keyarg(
278 name = "res_num_col",
279 py_type = "int",
280 arg_type = "free format",
281 desc_short = "residue number column",
282 desc = "The residue number column used by the generic intensity file format (alternative to the spin ID column).",
283 can_be_none = True
284 )
285 uf.add_keyarg(
286 name = "res_name_col",
287 py_type = "int",
288 arg_type = "free format",
289 desc_short = "residue name column",
290 desc = "The residue name column used by the generic intensity file format (alternative to the spin ID column).",
291 can_be_none = True
292 )
293 uf.add_keyarg(
294 name = "spin_num_col",
295 py_type = "int",
296 arg_type = "free format",
297 desc_short = "spin number column",
298 desc = "The spin number column used by the generic intensity file format (alternative to the spin ID column).",
299 can_be_none = True
300 )
301 uf.add_keyarg(
302 name = "spin_name_col",
303 py_type = "int",
304 arg_type = "free format",
305 desc_short = "spin name column",
306 desc = "The spin name column used by the generic intensity file format (alternative to the spin ID column).",
307 can_be_none = True
308 )
309 uf.add_keyarg(
310 name = "sep",
311 py_type = "str",
312 arg_type = "free format",
313 desc_short = "column separator",
314 desc = "The column separator used by the generic intensity format (the default is white space).",
315 can_be_none = True
316 )
317 uf.add_keyarg(
318 name = "spin_id",
319 py_type = "str",
320 desc_short = "spin ID string",
321 desc = "The spin ID string used by the generic intensity file format to restrict the loading of data to certain spin subsets.",
322 can_be_none = True
323 )
324 uf.add_keyarg(
325 name = "ncproc",
326 py_type = "int",
327 desc_short = "Bruker ncproc parameter",
328 desc = "The Bruker specific FID intensity scaling factor.",
329 can_be_none = True
330 )
331
332 uf.desc.append(Desc_container())
333 uf.desc[-1].add_paragraph("The peak intensity can either be from peak heights or peak volumes.")
334 uf.desc[-1].add_paragraph("The spectrum ID is a label which is subsequently utilised by other user functions. If this identifier matches that of a previously loaded set of intensities, then this indicates a replicated spectrum.")
335 uf.desc[-1].add_paragraph("The heteronucleus and proton should be set respectively to the name of the heteronucleus and proton in the file. Only those lines which match these labels will be used.")
336 uf.desc[-1].add_paragraph("The integration method is required for the subsequent error analysis. When peak heights are measured, this should be set to 'height'. Volume integration methods are a bit varied and hence two values are accepted. If the volume integration involves pure point summation, with no deconvolution algorithms or other methods affecting peak heights, then the value should be set to 'point sum'. All other volume integration methods, e.g. line shape fitting, the value should be set to 'other'.")
337 uf.desc[-1].add_paragraph("If a series of intensities extracted from Bruker FID files processed in Topspin or XWinNMR are to be compared, the ncproc parameter may need to be supplied. This is because this FID is stored using integer representation and is scaled using ncproc to avoid numerical truncation artifacts. If two spectra have significantly different maximal intensities, then ncproc will be different for both. The intensity scaling is binary, i.e. 2**ncproc. Therefore if spectrum A has an ncproc of 6 and and spectrum B a value of 7, then a reference intensity in B will be double that of A. Internally, relax stores the intensities scaled by 2**ncproc.")
338
339 uf.desc.append(Desc_container("File formats"))
340 uf.desc[-1].add_paragraph("The peak list or intensity file will be automatically determined.")
341 uf.desc[-1].add_paragraph("Sparky peak list: The file should be a Sparky peak list saved after typing the command 'lt'. The default is to assume that columns 0, 1, 2, and 3 (1st, 2nd, 3rd, and 4th) contain the Sparky assignment, w1, w2, and peak intensity data respectively. The frequency data w1 and w2 are ignored while the peak intensity data can either be the peak height or volume displayed by changing the window options. If the peak intensity data is not within column 3, set the integration column to the appropriate number (column numbering starts from 0 rather than 1).")
342 uf.desc[-1].add_paragraph("XEasy peak list: The file should be the saved XEasy text window output of the list peak entries command, 'tw' followed by 'le'. As the columns are fixed, the peak intensity column is hardwired to number 10 (the 11th column) which contains either the peak height or peak volume data. Because the columns are fixed, the integration column number will be ignored.")
343 uf.desc[-1].add_paragraph("NMRView: The file should be a NMRView peak list. The default is to use column 16 (which contains peak heights) for peak intensities. To use use peak volumes (or evolumes), int_col must be set to 15.")
344 uf.desc[-1].add_paragraph("Generic intensity file: This is a generic format which can be created by scripting to support non-supported peak lists. It should contain in the first few columns enough information to identify the spin. This can include columns for the molecule name, residue number, residue name, spin number, and spin name. Alternatively a spin ID string column can be used. The peak intensities can be placed in another column specified by the integration column number. Intensities from multiple spectra can be placed into different columns, and these can then be specified simultaneously by setting the integration column value to a list of columns. This list must be matched by setting the spectrum ID to a list of the same length. If columns are delimited by a character other than whitespace, this can be specified with the column separator. The spin ID can be used to restrict the loading to specific spin subsets.")
345
346 uf.desc.append(Desc_container("Prompt examples"))
347 uf.desc[-1].add_paragraph("To read the reference and saturated spectra peak heights from the Sparky formatted files 'ref.list' and 'sat.list', type:")
348 uf.desc[-1].add_prompt("relax> spectrum.read_intensities(file='ref.list', spectrum_id='ref')")
349 uf.desc[-1].add_prompt("relax> spectrum.read_intensities(file='sat.list', spectrum_id='sat')")
350 uf.desc[-1].add_paragraph("To read the reference and saturated spectra peak heights from the XEasy formatted files 'ref.text' and 'sat.text', type:")
351 uf.desc[-1].add_prompt("relax> spectrum.read_intensities(file='ref.text', spectrum_id='ref')")
352 uf.desc[-1].add_prompt("relax> spectrum.read_intensities(file='sat.text', spectrum_id='sat')")
353 uf.backend = spectrum.read
354 uf.menu_text = "&read"
355 uf.gui_icon = "oxygen.actions.document-open"
356 uf.wizard_height_desc = 140
357 uf.wizard_size = (1000, 750)
358 uf.wizard_image = WIZARD_IMAGE_PATH + 'spectrum' + sep + 'spectrum_200.png'
359
360
361
362 uf = uf_info.add_uf('spectrum.replicated')
363 uf.title = "Specify which spectra are replicates of each other."
364 uf.title_short = "Replicate spectra."
365 uf.add_keyarg(
366 name = "spectrum_ids",
367 py_type = "str_or_str_list",
368 desc_short = "spectrum ID strings",
369 desc = "The list of replicated spectra ID strings.",
370 wiz_element_type = 'combo_list',
371 wiz_combo_iter = spectrum.get_ids,
372 wiz_combo_list_min = 2,
373 wiz_read_only = True
374 )
375
376 uf.desc.append(Desc_container())
377 uf.desc[-1].add_paragraph("This is used to identify which of the loaded spectra are replicates of each other. Specifying the replicates is essential for error analysis if the baseplane RMSD has not been supplied.")
378
379 uf.desc.append(Desc_container("Prompt examples"))
380 uf.desc[-1].add_paragraph("To specify that the NOE spectra labelled 'ref1', 'ref2', and 'ref3' are the same spectrum replicated, type one of:")
381 uf.desc[-1].add_prompt("relax> spectrum.replicated(['ref1', 'ref2', 'ref3'])")
382 uf.desc[-1].add_prompt("relax> spectrum.replicated(spectrum_ids=['ref1', 'ref2', 'ref3'])")
383 uf.desc[-1].add_paragraph("To specify that the two R2 spectra 'ncyc2' and 'ncyc2b' are the same time point, type:")
384 uf.desc[-1].add_prompt("relax> spectrum.replicated(['ncyc2', 'ncyc2b'])")
385 uf.backend = spectrum.replicated
386 uf.menu_text = "re&plicated"
387 uf.gui_icon = "oxygen.actions.edit-rename"
388 uf.wizard_size = (700, 500)
389 uf.wizard_image = WIZARD_IMAGE_PATH + 'spectrum' + sep + 'spectrum_200.png'
390