1 import logging
2 logger = logging.getLogger('Collections')
3
4 import sets, copy
5
6 import scipy
7
8 import SloppyCell
9 import SloppyCell.Utility as Utility
10 import SloppyCell.KeyedList_mod as KeyedList_mod
11 KeyedList = KeyedList_mod.KeyedList
12
13 if SloppyCell.HAVE_PYPAR:
14 import pypar
15
17 """
18 ExperimentCollection(experiment name list)
19
20 An ExperimentCollection unites a collection of experiments. For now, it's
21 most important function is to collect group the independent variables by
22 Calculation to avoid wasting computer effort redoing calculations.
23
24 Individual experiments can be accessed via dictionary-type indexing.
25 """
29
31 """
32 LoadExperiments(experiment name list)
33
34 Adds the experiments in the list to the collection
35 """
36 if expt.GetName() in self:
37 raise ValueError("Experiment already has name %s"
38 % str(expt.GetName()))
39
40 self[expt.GetName()] = expt
41
43 """
44 GetIndVarsByCalc() -> dictionary
45
46 Returns a dictionary of all the dependent and independent variables for
47 all the calculations required to compare with the data in all the
48 experiments. The dictionary is of the form:
49 dictionary(calculation name) -> ordered list of unique independent
50 variables
51 """
52 varsByCalc = {}
53 for expt in self.values():
54 data = expt.GetData()
55
56 for calc in data:
57 varsByCalc.setdefault(calc, {})
58 for depVar in data[calc]:
59
60
61 varsByCalc[calc].setdefault(depVar, sets.Set())
62 varsByCalc[calc][depVar].\
63 union_update(sets.Set(data[calc][depVar].keys()))
64
65 for period in expt.GetPeriodChecks():
66 calc, depVar = period['calcKey'], period['depVarKey']
67 start, period = period['startTime'], period['period']
68 if calc not in varsByCalc.keys():
69 varsByCalc[calc].setdefault(calc, {})
70 if depVar not in varsByCalc[calc]:
71 varsByCalc[calc].setdefault(depVar, sets.Set())
72 varsByCalc[calc][depVar].union_update([start, start+2.0*period])
73
74 for amplitude in expt.GetAmplitudeChecks():
75 calc, depVar = amplitude['calcKey'], amplitude['depVarKey']
76 start, test, period = (amplitude['startTime'],
77 amplitude['testTime'],
78 amplitude['period'])
79 if calc not in varsByCalc.keys():
80 varsByCalc[calc].setdefault(calc, {})
81 if depVar not in varsByCalc[calc]:
82 varsByCalc[calc].setdefault(depVar, sets.Set())
83 varsByCalc[calc][depVar].union_update([start, start+period,
84 test, test+period])
85
86 for data_set in expt.GetIntegralDataSets():
87 calc = data_set['calcKey']
88 varsByCalc.setdefault(data_set['calcKey'], {})
89 varsByCalc[calc][('full trajectory')] = data_set['interval']
90
91 for ds in expt.scaled_extrema_data:
92 calc = ds['calcKey']
93 varsByCalc.setdefault(ds['calcKey'], {})
94 if ds['type'] == 'max':
95 called = ds['var'] + '_maximum'
96 elif ds['type'] == 'min':
97 called = ds['var'] + '_minimum'
98 varsByCalc[calc][called] = (ds['minTime'], ds['maxTime'])
99
100
101 for calc in varsByCalc:
102 for depVar in varsByCalc[calc]:
103 varsByCalc[calc][depVar] = list(varsByCalc[calc][depVar])
104 varsByCalc[calc][depVar].sort()
105
106 return varsByCalc
107
109 """
110 GetData() -> dictionary
111
112 Returns a dictionary containing all the data for the experiments. The
113 dictionary is of the form:
114 dictionary[expt name][calc name][dependent vars][independent vars]
115 = value.
116
117 Note that value may be an arbitrary object.
118 """
119
120 data = {}
121 for exptName, expt in self.items():
122 data[exptName] = expt.GetData()
123
124 return data
125
127 - def __init__(self, name = '', data = {}, fixedScaleFactors = {},
128 longName = '', shared_sf = []):
129 self.SetName(name)
130 self.SetData(data)
131 self.SetFixedScaleFactors(fixedScaleFactors)
132 self.set_shared_sf(shared_sf)
133 self.periodChecks=[]
134 self.amplitudeChecks=[]
135 self.integral_data=[]
136 self.sf_priors = {}
137 self.scaled_extrema_data = []
138
140 """
141 Return a sorted tuple of the elements of group.
142 """
143 if isinstance(group, str):
144 group = [group]
145 hash_group = sets.Set(group)
146 hash_group = list(hash_group)
147 list(group).sort()
148 hash_group = tuple(group)
149 return hash_group
150
152 """
153 Return tuples representing all the scale factors in this experiment.
154
155 A tuple will contain multiple entries if several variables share a
156 scale factor.
157 """
158
159 measuredVars = sets.Set()
160 for calcId in self.data:
161 measuredVars.union_update(sets.Set(self.data[calcId].keys()))
162 for dataset in self.integral_data:
163 measuredVars.union_update(sets.Set(dataset['vars']))
164 for dataset in self.scaled_extrema_data:
165 measuredVars.add(dataset['var'])
166
167 sf_groups = [self._hashable_group(group) for group
168 in self.get_shared_sf()]
169
170
171 flattened = []
172 for g in sf_groups:
173 flattened.extend(g)
174
175 unshared = [self._hashable_group(var) for var in measuredVars
176 if var not in flattened]
177 sf_groups.extend(unshared)
178 return sf_groups
179
180 - def set_sf_prior(self, group, prior_type, prior_params=None):
181 """
182 Set the type of prior to place on a given group of scalefactors.
183
184 The group contains a collection of variables that are sharing a given
185 scale factor which may be just one variable. You can see what the
186 current groups are with expt.get_sf_groups().
187
188 Currently implemented prior types are:
189 'uniform in sf': This is a uniform prior over scale factors. This
190 is simplest and fastest to compute, but it tends to weight
191 parameter sets that yield large scale factors heavily. It takes no
192 parameters.
193
194 'gaussian in log sf': This is a Gaussian prior over the logarithm
195 of the scale factor. This should avoid the problem of weighting
196 large factors heavily. It takes two parameters: the mean of the
197 normal distribution, and it's standard deviation. For example,
198 parameters (log(3.0), log(10)), will place a prior that holds 95%
199 of the probability between 3 / 10**2 and 3 * 10**2.
200 """
201 hash_group = self._hashable_group(group)
202
203 if hash_group not in self.get_sf_groups():
204 raise ValueError('Unrecognized group to set scale factor prior on. '
205 'If it is a shared scale factor, you need to '
206 'specify every member of the sharing group.')
207
208 available_types = ['uniform in sf', 'gaussian in log sf']
209 if prior_type not in available_types:
210 raise ValueError('Unknown prior type %s. Available types are %s.'
211 % (prior_type, available_types))
212 self.sf_priors[hash_group] = (prior_type, prior_params)
213
215 """
216 Compute the entropy for a given scale factor.
217 """
218 try:
219 prior_type, prior_params = self.sf_priors[sf_group]
220 except KeyError:
221 prior_type, prior_params = 'uniform in sf', None
222
223 if prior_type == 'uniform in sf':
224 if theoryDotTheory != 0:
225 entropy = scipy.log(scipy.sqrt(2*scipy.pi*T/theoryDotTheory))
226 else:
227 entropy = 0
228 elif prior_type == 'gaussian in log sf':
229
230
231
232 try:
233 import SloppyCell.misc_c
234 integrand = SloppyCell.misc_c.log_gaussian_prior_integrand
235 except ImportError:
236 logger.warn('Falling back to python integrand on log gaussian '
237 'prior integration.')
238 exp = scipy.exp
239 def integrand(u, ak, bk, mulB, siglB, T, B_best, lB_best):
240 B = exp(u) * B_best
241 lB = u + lB_best
242 ret = exp(-ak/(2*T) * (B-B_best)**2
243 - (lB-mulB)**2/(2 * siglB**2))
244 return ret
245
246 mulB, siglB = prior_params
247 B_best = theoryDotData/theoryDotTheory
248 lB_best = scipy.log(B_best)
249
250
251
252
253
254 prev_err = scipy.seterr(over='ignore')
255 int_args = (theoryDotTheory, theoryDotData, mulB, siglB, T, B_best,
256 lB_best)
257 ans, temp = scipy.integrate.quad(integrand, -scipy.inf, scipy.inf,
258 args = int_args, limit=1000)
259 scipy.seterr(**prev_err)
260 entropy = scipy.log(ans)
261 else:
262 raise ValueError('Unrecognized prior type: %s.' % prior_type)
263
264 return entropy
265
268
271
274
277
280
282 self.fixedScaleFactors = fixed_sf
283
285 self.shared_sf = shared_sf
286
288 return self.shared_sf
289
291 return self.fixedScaleFactors
292
293
294 SetData = set_data
295 GetData = get_data
296 UpdateData = update_data
297 SetFixedScaleFactors = set_fixed_sf
298 GetFixedScaleFactors = get_fixed_sf
299
300 - def AddPeriodCheck(self, calcKey, chemical, period, sigma, startTime=0.0):
301 """
302 Constrain the period of the oscillations to a value (period)
303 with the error (sigma). The period is found using the maximum
304 to maximum distance of the first two maxima found between
305 startTime and two periods after the startTime.
306 """
307 self.periodChecks.append({'calcKey':calcKey, 'depVarKey':chemical,
308 'period': period, 'sigma': sigma,
309 'startTime': startTime})
310
312 return self.periodChecks
313
314 - def AddAmplitudeCheck(self, calcKey, chemical, startTime, testTime, period,
315 sigma):
316 """
317 Turn on applying a constraint that the integrated
318 area in two different parts of the plot should be the
319 same. startTime and testTime are the starting points to
320 begin the integration for the period-long each.
321 """
322 self.amplitudeChecks.append({'calcKey': calcKey, 'depVarKey': chemical,
323 'startTime': startTime,
324 'testTime': testTime,
325 'period': period, 'sigma': sigma})
326
328 return self.amplitudeChecks
329
331 return self.integral_data
332
335 """
336 Add an integral data set to the experiment.
337
338 calcKey The id of the calculation this data corresponds to
339 traj The trajectory to compare against
340 uncert_traj A trajectory of data uncertainties
341 vars What variables to fit against
342 interval The time interval to fit over, defaults to the entire traj
343 """
344 traj.build_interpolated_traj()
345 uncert_traj.build_interpolated_traj()
346 if interval == None:
347 interval = (traj.get_times()[0], traj.get_times()[-1])
348 self.integral_data.append({'calcKey': calcKey, 'trajectory': traj,
349 'uncert_traj': uncert_traj, 'vars': vars,
350 'interval': interval})
351
352 - def add_scaled_max(self, calcKey, var, maxval, sigma,
353 minTime=None, maxTime=None):
354 self.scaled_extrema_data.append({'calcKey': calcKey, 'var':var,
355 'val':maxval, 'sigma':sigma,
356 'minTime': minTime, 'maxTime':maxTime,
357 'type':'max'})
358 - def add_scaled_min(self, calcKey, var, minval, sigma,
359 minTime=None, maxTime=None):
360 self.scaled_extrema_data.append({'calcKey': calcKey, 'var':var,
361 'val':minval, 'sigma':sigma,
362 'minTime': minTime, 'maxTime':maxTime,
363 'type':'min'})
364
366 """
367 CalculationCollection(calculation name list)
368
369 An CalculationCollection unites a collection of calculations. It is
370 responsible for generating a master list of parameters and passing each
371 Calculation its appropriate parameters.
372
373 Individual calculations can be accessed via dictionary-type indexing.
374
375 XXX: Note that the parameter shuffling has not been extensively tested.
376 """
377
390
392 """
393 LoadCalculations(calculations name list)
394
395 Adds the calculations in the list to the collection and adds their
396 parameters to the parameterSet
397 """
398 if calc.GetName() in self:
399 raise ValueError("Calculation already has name %s"
400 % str(calc.GetName()))
401
402 self.set(calc.GetName(), calc )
403 for pName, pValue in calc.GetParameters().items():
404 self.params.setdefault(pName, pValue)
405
406 - def Calculate(self, varsByCalc, params = None):
407 """
408 Calculate model predictions for everything in varsByCalc.
409
410 varsByCalc is a dictionary of the form:
411 dict[calc name][dep var] = ind var
412
413 The return dictionary is of the form:
414 dictionary[calc name][dep var][ind var] = result
415 """
416 if params is not None:
417 self.params.update(params)
418
419 results = {}
420
421 calcs_to_do = varsByCalc.keys()
422
423 calc_assigned = {}
424 while calcs_to_do:
425
426
427 len_this_block = min(SloppyCell.num_procs, len(calcs_to_do))
428
429 for worker in range(1, len_this_block):
430 calc = calcs_to_do.pop()
431 calc_assigned[worker] = calc
432 logger.debug('Assigning calculation %s to worker %i.'
433 % (calc, worker))
434 command = 'Network.calculate(net, vars, params)'
435 args = {'net': self.get(calc), 'vars': varsByCalc[calc],
436 'params': self.params}
437 pypar.send((command, args), worker)
438
439
440 calc = calcs_to_do.pop()
441
442
443
444 try:
445 results[calc] = self.get(calc).calculate(varsByCalc[calc],
446 self.params)
447 finally:
448
449 for worker in range(1, len_this_block):
450 logger.debug('Receiving result from worker %i.' % worker)
451 results[calc_assigned[worker]] = pypar.receive(worker)
452
453
454
455
456
457 for val in results.values():
458 if isinstance(val, Utility.SloppyCellException):
459 raise val
460
461 return results
462
464 """
465 Calculate sensitivities for model predictions of everything in
466 varsByCalc.
467
468 varsByCalc is a dictionary of the form:
469 dict[calc name][dep var] = ind var
470
471 The return dictionary is of the form:
472 dictionary[calc name][dep var][ind var][param] = result
473 """
474 if params is not None :
475 self.params.update(params)
476
477 calcSensVals, calcVals = {}, {}
478 for (calcName, vars) in varsByCalc.items():
479 calc = self.get(calcName)
480 vars = varsByCalc[calcName]
481 calcPOrder = calc.GetParameters().keys()
482 calc.CalculateSensitivity(varsByCalc[calcName], self.params)
483 calcSensVals[calcName] = calc.GetSensitivityResult(vars)
484 calcVals[calcName] = calc.GetResult(vars)
485
486 return calcVals, calcSensVals
487
489 """
490 Return a deep copy of the collections parameter KeyedList.
491 """
492 self.params = KeyedList()
493 for calc in self.values():
494 for pName, pValue in calc.GetParameters().items():
495 self.params.setdefault(pName, pValue)
496 return self.params
497