1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 """The spectrum user function definitions."""
25
26
27 from os import sep
28 import wx
29
30
31 from generic_fns import spectrum
32 from graphics import WIZARD_IMAGE_PATH
33 from user_functions.data import Uf_info; uf_info = Uf_info()
34 from user_functions.data import Uf_tables; uf_tables = Uf_tables()
35 from user_functions.objects import Desc_container
36
37
38
39 uf_class = uf_info.add_class('spectrum')
40 uf_class.title = "Class for supporting the input of spectral data."
41 uf_class.menu_text = "&spectrum"
42 uf_class.gui_icon = "relax.fid"
43
44
45
46 uf = uf_info.add_uf('spectrum.baseplane_rmsd')
47 uf.title = "Set the baseplane RMSD of a given spin in a spectrum for error analysis."
48 uf.title_short = "Baseplane RMSD setting."
49 uf.add_keyarg(
50 name = "error",
51 default = 0.0,
52 py_type = "num",
53 desc_short = "error",
54 desc = "The baseplane RMSD error value."
55 )
56 uf.add_keyarg(
57 name = "spectrum_id",
58 py_type = "str",
59 desc_short = "spectrum ID string",
60 desc = "The spectrum ID string.",
61 wiz_element_type = 'combo',
62 wiz_combo_iter = spectrum.get_ids,
63 wiz_read_only = True
64 )
65 uf.add_keyarg(
66 name = "spin_id",
67 py_type = "str",
68 desc_short = "spin ID string",
69 desc = "The spin ID string.",
70 can_be_none = True
71 )
72
73 uf.desc.append(Desc_container())
74 uf.desc[-1].add_paragraph("The spectrum ID identifies the spectrum associated with the error and must correspond to a previously loaded set of intensities. If the spin ID is unset, then the error value for all spins will be set to the supplied value.")
75 uf.backend = spectrum.baseplane_rmsd
76 uf.menu_text = "&baseplane_rmsd"
77 uf.gui_icon = "oxygen.actions.edit-rename"
78 uf.wizard_size = (800, 500)
79 uf.wizard_image = WIZARD_IMAGE_PATH + 'spectrum' + sep + 'spectrum_200.png'
80
81
82
83 uf = uf_info.add_uf('spectrum.delete')
84 uf.title = "Delete the spectral data corresponding to the spectrum ID string."
85 uf.title_short = "Spectral data deletion."
86 uf.add_keyarg(
87 name = "spectrum_id",
88 py_type = "str",
89 desc_short = "spectrum ID string",
90 desc = "The unique spectrum ID string.",
91 wiz_element_type = 'combo',
92 wiz_combo_iter = spectrum.get_ids,
93 wiz_read_only = True
94 )
95
96 uf.desc.append(Desc_container())
97 uf.desc[-1].add_paragraph("The spectral data corresponding to the given spectrum ID string will be removed from the current data pipe.")
98
99 uf.desc.append(Desc_container("Prompt examples"))
100 uf.desc[-1].add_paragraph("To delete the peak height data corresponding to the ID 'R1 ncyc5', type:")
101 uf.desc[-1].add_prompt("relax> spectrum.delete('R1 ncyc5')")
102 uf.backend = spectrum.delete
103 uf.menu_text = "&delete"
104 uf.gui_icon = "oxygen.actions.list-remove"
105 uf.wizard_size = (700, 400)
106 uf.wizard_image = WIZARD_IMAGE_PATH + 'spectrum' + sep + 'spectrum_200.png'
107
108
109
110 uf = uf_info.add_uf('spectrum.error_analysis')
111 uf.title = "Perform an error analysis for peak intensities."
112 uf.title_short = "Peak intensity error analysis."
113
114 uf.desc.append(Desc_container())
115 uf.desc[-1].add_paragraph("This user function must only be called after all peak intensities have been loaded and all other necessary spectral information set. This includes the baseplane RMSD and the number of points used in volume integration, both of which are only used if spectra have not been replicated.")
116 uf.desc[-1].add_paragraph("Six different types of error analysis are supported depending on whether peak heights or volumes are supplied, whether noise is determined from replicated spectra or the RMSD of the baseplane noise, and whether all spectra or only a subset have been duplicated. These are:")
117 table = uf_tables.add_table(label="table: peak intensity error analysis", caption="The six peak intensity error analysis types.")
118 table.add_headings(["Int type", "Noise source", "Error scope"])
119 table.add_row(["Heights", "RMSD baseplane", "One sigma per peak per spectrum"])
120 table.add_row(["Heights", "Partial duplicate + variance averaging", "One sigma for all peaks, all spectra"])
121 table.add_row(["Heights", "All replicated + variance averaging", "One sigma per replicated spectra set"])
122 table.add_row(["Volumes", "RMSD baseplane", "One sigma per peak per spectrum"])
123 table.add_row(["Volumes", "Partial duplicate + variance averaging", "One sigma for all peaks, all spectra"])
124 table.add_row(["Volumes", "All replicated + variance averaging", "One sigma per replicated spectra set"])
125 uf.desc[-1].add_table(table.label)
126
127 uf.desc.append(Desc_container("Peak heights with baseplane noise RMSD"))
128 uf.desc[-1].add_paragraph("When none of the spectra have been replicated, then the peak height errors are calculated using the RMSD of the baseplane noise, the value of which is set by the spectrum.baseplane_rmsd() user function. This results in a different error per peak per spectrum. The standard deviation error measure for the peak height, sigma_I, is set to the RMSD value.")
129
130 uf.desc.append(Desc_container("Peak heights with partially replicated spectra"))
131 uf.desc[-1].add_paragraph("When spectra are replicated, the variance for a single spin at a single replicated spectra set is calculated by the formula")
132 uf.desc[-1].add_item_list_element(None, "sigma^2 = sum({Ii - Iav}^2) / (n - 1),")
133 uf.desc[-1].add_paragraph("where sigma^2 is the variance, sigma is the standard deviation, n is the size of the replicated spectra set with i being the corresponding index, Ii is the peak intensity for spectrum i, and Iav is the mean over all spectra i.e. the sum of all peak intensities divided by n.")
134 uf.desc[-1].add_paragraph("As the value of n in the above equation is always very low since normally only a couple of spectra are collected per replicated spectra set, the variance of all spins is averaged for a single replicated spectra set. Although this results in all spins having the same error, the accuracy of the error estimate is significantly improved.")
135 uf.desc[-1].add_paragraph("If there are in addition to the replicated spectra loaded peak intensities which only consist of a single spectrum, i.e. not all spectra are replicated, then the variances of replicated replicated spectra sets will be averaged. This will be used for the entire experiment so that there will be only a single error value for all spins and for all spectra.")
136
137 uf.desc.append(Desc_container("Peak heights with all spectra replicated"))
138 uf.desc[-1].add_paragraph("If all spectra are collected in duplicate (triplicate or higher number of spectra are supported), the each replicated spectra set will have its own error estimate. The error for a single peak is calculated as when partially replicated spectra are collected, and these are again averaged to give a single error per replicated spectra set. However as all replicated spectra sets will have their own error estimate, variance averaging across all spectra sets will not be performed.")
139
140 uf.desc.append(Desc_container("Peak volumes with baseplane noise RMSD"))
141 uf.desc[-1].add_paragraph("The method of error analysis when no spectra have been replicated and peak volumes are used is highly dependent on the integration method. Many methods simply sum the number of points within a fixed region, either a box or oval object. The number of points used, N, must be specified by another user function in this class. Then the error is simply given by the sum of variances:")
142 uf.desc[-1].add_item_list_element(None, "sigma_vol^2 = sigma_i^2 * N,")
143 uf.desc[-1].add_paragraph("where sigma_vol is the standard deviation of the volume, sigma_i is the standard deviation of a single point assumed to be equal to the RMSD of the baseplane noise, and N is the total number of points used in the summation integration method. For a box integration method, this converts to the Nicholson, Kay, Baldisseri, Arango, Young, Bax, and Torchia (1992) Biochemistry, 31: 5253-5263 equation:")
144 uf.desc[-1].add_item_list_element(None, "sigma_vol = sigma_i * sqrt(n*m),")
145 uf.desc[-1].add_paragraph("where n and m are the dimensions of the box. Note that a number of programs, for example peakint (http://hugin.ethz.ch/wuthrich/software/xeasy/xeasy_m15.html) does not use all points within the box. And if the number N can not be determined, this category of error analysis is not possible.")
146 uf.desc[-1].add_paragraph("Also note that non-point summation methods, for example when line shape fitting is used to determine peak volumes, the equations above cannot be used. Hence again this category of error analysis cannot be used. This is the case for one of the three integration methods used by Sparky (http://www.cgl.ucsf.edu/home/sparky/manual/peaks.html#Integration). And if fancy techniques are used, for example as Cara does to deconvolute overlapping peaks (http://www.cara.ethz.ch/Wiki/Integration), this again makes this error analysis impossible.")
147
148 uf.desc.append(Desc_container("Peak volumes with partially replicated spectra"))
149 uf.desc[-1].add_paragraph("When peak volumes are measured by any integration method and a few of the spectra are replicated, then the intensity errors are calculated identically as described in the 'Peak heights with partially replicated spectra' section above.")
150
151 uf.desc.append(Desc_container("Peak volumes with all spectra replicated"))
152 uf.desc[-1].add_paragraph("With all spectra replicated and again using any integration methodology, the intensity errors can be calculated as described in the 'Peak heights with all spectra replicated' section above.")
153 uf.backend = spectrum.error_analysis
154 uf.menu_text = "&error_analysis"
155 uf.gui_icon = "oxygen.categories.applications-education"
156 uf.wizard_height_desc = 550
157 uf.wizard_size = (1000, 700)
158 uf.wizard_image = WIZARD_IMAGE_PATH + 'spectrum' + sep + 'spectrum_200.png'
159 uf.wizard_apply_button = False
160
161
162
163 uf = uf_info.add_uf('spectrum.integration_points')
164 uf.title = "Set the number of summed points used in volume integration of a given spin in a spectrum."
165 uf.title_short = "Number of integration points."
166 uf.add_keyarg(
167 name = "N",
168 py_type = "int",
169 min = 1,
170 max = 10000000,
171 desc_short = "number of summed points",
172 desc = "The number of points used by the summation volume integration method."
173 )
174 uf.add_keyarg(
175 name = "spectrum_id",
176 py_type = "str",
177 desc_short = "spectrum ID string",
178 desc = "The spectrum ID string.",
179 wiz_element_type = 'combo',
180 wiz_combo_iter = spectrum.get_ids,
181 wiz_read_only = True
182 )
183 uf.add_keyarg(
184 name = "spin_id",
185 py_type = "str",
186 desc_short = "spin ID string",
187 desc = "Restrict setting the number to certain spins.",
188 can_be_none = True
189 )
190
191 uf.desc.append(Desc_container())
192 uf.desc[-1].add_paragraph("For a complete description of which integration methods and how many points N are used for different integration techniques, please see the spectrum.error_analysis user function documentation.")
193 uf.desc[-1].add_paragraph("The spectrum ID identifies the spectrum associated with the value of N and must correspond to a previously loaded set of intensities. If the spin ID is unset, then the number of summed points for all spins will be set to the supplied value.")
194 uf.backend = spectrum.integration_points
195 uf.menu_text = "&integration_points"
196 uf.gui_icon = "oxygen.actions.edit-rename"
197 uf.wizard_size = (900, 600)
198 uf.wizard_image = WIZARD_IMAGE_PATH + 'spectrum' + sep + 'spectrum_200.png'
199
200
201
202 uf = uf_info.add_uf('spectrum.read_intensities')
203 uf.title = "Read peak intensities from a file."
204 uf.title_short = "Peak intensity reading."
205 uf.add_keyarg(
206 name = "file",
207 py_type = "str",
208 arg_type = "file sel",
209 desc_short = "file name",
210 desc = "The name of the file containing the intensity data.",
211 wiz_filesel_style = wx.FD_OPEN
212 )
213 uf.add_keyarg(
214 name = "dir",
215 py_type = "str",
216 arg_type = "dir",
217 desc_short = "directory name",
218 desc = "The directory where the file is located.",
219 can_be_none = True
220 )
221 uf.add_keyarg(
222 name = "spectrum_id",
223 py_type = "str",
224 desc_short = "spectrum ID string",
225 desc = "The unique spectrum ID string to associate with the peak intensity values."
226 )
227 uf.add_keyarg(
228 name = "heteronuc",
229 default = "N",
230 py_type = "str",
231 desc_short = "heteronucleus name",
232 desc = "The name of the heteronucleus as specified in the peak intensity file."
233 )
234 uf.add_keyarg(
235 name = "proton",
236 default = "HN",
237 py_type = "str",
238 desc_short = "proton name",
239 desc = "The name of the proton as specified in the peak intensity file."
240 )
241 uf.add_keyarg(
242 name = "int_method",
243 default = "height",
244 py_type = "str",
245 desc_short = "peak integration method",
246 desc = "The method by which peaks were integrated.",
247 wiz_element_type = "combo",
248 wiz_combo_choices = ["height", "point sum", "other"],
249 wiz_read_only = True
250 )
251 uf.add_keyarg(
252 name = "int_col",
253 py_type = "int_or_int_list",
254 desc_short = "intensity column",
255 desc = "The optional column containing the peak intensity data (used by the generic intensity file format, or if the intensities are in a non-standard column).",
256 can_be_none = True
257 )
258 uf.add_keyarg(
259 name = "spin_id_col",
260 py_type = "int",
261 arg_type = "free format",
262 desc_short = "spin ID string column",
263 desc = "The spin ID string column used by the generic intensity file format (an alternative to the mol, res, and spin name and number columns).",
264 can_be_none = True
265 )
266 uf.add_keyarg(
267 name = "mol_name_col",
268 py_type = "int",
269 arg_type = "free format",
270 desc_short = "molecule name column",
271 desc = "The molecule name column used by the generic intensity file format (alternative to the spin ID column).",
272 can_be_none = True
273 )
274 uf.add_keyarg(
275 name = "res_num_col",
276 py_type = "int",
277 arg_type = "free format",
278 desc_short = "residue number column",
279 desc = "The residue number column used by the generic intensity file format (alternative to the spin ID column).",
280 can_be_none = True
281 )
282 uf.add_keyarg(
283 name = "res_name_col",
284 py_type = "int",
285 arg_type = "free format",
286 desc_short = "residue name column",
287 desc = "The residue name column used by the generic intensity file format (alternative to the spin ID column).",
288 can_be_none = True
289 )
290 uf.add_keyarg(
291 name = "spin_num_col",
292 py_type = "int",
293 arg_type = "free format",
294 desc_short = "spin number column",
295 desc = "The spin number column used by the generic intensity file format (alternative to the spin ID column).",
296 can_be_none = True
297 )
298 uf.add_keyarg(
299 name = "spin_name_col",
300 py_type = "int",
301 arg_type = "free format",
302 desc_short = "spin name column",
303 desc = "The spin name column used by the generic intensity file format (alternative to the spin ID column).",
304 can_be_none = True
305 )
306 uf.add_keyarg(
307 name = "sep",
308 py_type = "str",
309 arg_type = "free format",
310 desc_short = "column separator",
311 desc = "The column separator used by the generic intensity format (the default is white space).",
312 can_be_none = True
313 )
314 uf.add_keyarg(
315 name = "spin_id",
316 py_type = "str",
317 desc_short = "spin ID string",
318 desc = "The spin ID string used by the generic intensity file format to restrict the loading of data to certain spin subsets.",
319 can_be_none = True
320 )
321 uf.add_keyarg(
322 name = "ncproc",
323 py_type = "int",
324 desc_short = "Bruker ncproc parameter",
325 desc = "The Bruker specific FID intensity scaling factor.",
326 can_be_none = True
327 )
328
329 uf.desc.append(Desc_container())
330 uf.desc[-1].add_paragraph("The peak intensity can either be from peak heights or peak volumes.")
331 uf.desc[-1].add_paragraph("The spectrum ID is a label which is subsequently utilised by other user functions. If this identifier matches that of a previously loaded set of intensities, then this indicates a replicated spectrum.")
332 uf.desc[-1].add_paragraph("The heteronucleus and proton should be set respectively to the name of the heteronucleus and proton in the file. Only those lines which match these labels will be used.")
333 uf.desc[-1].add_paragraph("The integration method is required for the subsequent error analysis. When peak heights are measured, this should be set to 'height'. Volume integration methods are a bit varied and hence two values are accepted. If the volume integration involves pure point summation, with no deconvolution algorithms or other methods affecting peak heights, then the value should be set to 'point sum'. All other volume integration methods, e.g. line shape fitting, the value should be set to 'other'.")
334 uf.desc[-1].add_paragraph("If a series of intensities extracted from Bruker FID files processed in Topspin or XWinNMR are to be compared, the ncproc parameter may need to be supplied. This is because this FID is stored using integer representation and is scaled using ncproc to avoid numerical truncation artifacts. If two spectra have significantly different maximal intensities, then ncproc will be different for both. The intensity scaling is binary, i.e. 2**ncproc. Therefore if spectrum A has an ncproc of 6 and and spectrum B a value of 7, then a reference intensity in B will be double that of A. Internally, relax stores the intensities scaled by 2**ncproc.")
335
336 uf.desc.append(Desc_container("File formats"))
337 uf.desc[-1].add_paragraph("The peak list or intensity file will be automatically determined.")
338 uf.desc[-1].add_paragraph("Sparky peak list: The file should be a Sparky peak list saved after typing the command 'lt'. The default is to assume that columns 0, 1, 2, and 3 (1st, 2nd, 3rd, and 4th) contain the Sparky assignment, w1, w2, and peak intensity data respectively. The frequency data w1 and w2 are ignored while the peak intensity data can either be the peak height or volume displayed by changing the window options. If the peak intensity data is not within column 3, set the integration column to the appropriate number (column numbering starts from 0 rather than 1).")
339 uf.desc[-1].add_paragraph("XEasy peak list: The file should be the saved XEasy text window output of the list peak entries command, 'tw' followed by 'le'. As the columns are fixed, the peak intensity column is hardwired to number 10 (the 11th column) which contains either the peak height or peak volume data. Because the columns are fixed, the integration column number will be ignored.")
340 uf.desc[-1].add_paragraph("NMRView: The file should be a NMRView peak list. The default is to use column 16 (which contains peak heights) for peak intensities. To use use peak volumes (or evolumes), int_col must be set to 15.")
341 uf.desc[-1].add_paragraph("Generic intensity file: This is a generic format which can be created by scripting to support non-supported peak lists. It should contain in the first few columns enough information to identify the spin. This can include columns for the molecule name, residue number, residue name, spin number, and spin name. Alternatively a spin ID string column can be used. The peak intensities can be placed in another column specified by the integration column number. Intensities from multiple spectra can be placed into different columns, and these can then be specified simultaneously by setting the integration column value to a list of columns. This list must be matched by setting the spectrum ID to a list of the same length. If columns are delimited by a character other than whitespace, this can be specified with the column separator. The spin ID can be used to restrict the loading to specific spin subsets.")
342
343 uf.desc.append(Desc_container("Prompt examples"))
344 uf.desc[-1].add_paragraph("To read the reference and saturated spectra peak heights from the Sparky formatted files 'ref.list' and 'sat.list', type:")
345 uf.desc[-1].add_prompt("relax> spectrum.read_intensities(file='ref.list', spectrum_id='ref')")
346 uf.desc[-1].add_prompt("relax> spectrum.read_intensities(file='sat.list', spectrum_id='sat')")
347 uf.desc[-1].add_paragraph("To read the reference and saturated spectra peak heights from the XEasy formatted files 'ref.text' and 'sat.text', type:")
348 uf.desc[-1].add_prompt("relax> spectrum.read_intensities(file='ref.text', spectrum_id='ref')")
349 uf.desc[-1].add_prompt("relax> spectrum.read_intensities(file='sat.text', spectrum_id='sat')")
350 uf.backend = spectrum.read
351 uf.menu_text = "&read"
352 uf.gui_icon = "oxygen.actions.document-open"
353 uf.wizard_height_desc = 140
354 uf.wizard_size = (1000, 750)
355 uf.wizard_image = WIZARD_IMAGE_PATH + 'spectrum' + sep + 'spectrum_200.png'
356
357
358
359 uf = uf_info.add_uf('spectrum.replicated')
360 uf.title = "Specify which spectra are replicates of each other."
361 uf.title_short = "Replicate spectra."
362 uf.add_keyarg(
363 name = "spectrum_ids",
364 py_type = "str_or_str_list",
365 desc_short = "spectrum ID strings",
366 desc = "The list of replicated spectra ID strings.",
367 wiz_element_type = 'combo_list',
368 wiz_combo_iter = spectrum.get_ids,
369 wiz_combo_list_min = 2,
370 wiz_read_only = True
371 )
372
373 uf.desc.append(Desc_container())
374 uf.desc[-1].add_paragraph("This is used to identify which of the loaded spectra are replicates of each other. Specifying the replicates is essential for error analysis if the baseplane RMSD has not been supplied.")
375
376 uf.desc.append(Desc_container("Prompt examples"))
377 uf.desc[-1].add_paragraph("To specify that the NOE spectra labelled 'ref1', 'ref2', and 'ref3' are the same spectrum replicated, type one of:")
378 uf.desc[-1].add_prompt("relax> spectrum.replicated(['ref1', 'ref2', 'ref3'])")
379 uf.desc[-1].add_prompt("relax> spectrum.replicated(spectrum_ids=['ref1', 'ref2', 'ref3'])")
380 uf.desc[-1].add_paragraph("To specify that the two R2 spectra 'ncyc2' and 'ncyc2b' are the same time point, type:")
381 uf.desc[-1].add_prompt("relax> spectrum.replicated(['ncyc2', 'ncyc2b'])")
382 uf.backend = spectrum.replicated
383 uf.menu_text = "re&plicated"
384 uf.gui_icon = "oxygen.actions.edit-rename"
385 uf.wizard_size = (700, 500)
386 uf.wizard_image = WIZARD_IMAGE_PATH + 'spectrum' + sep + 'spectrum_200.png'
387