SloppyCell.Collections

1 import logging 2 logger = logging.getLogger('Collections') 3 4 import sets, copy 5 6 import scipy 7 8 import SloppyCell 9 import SloppyCell.Utility as Utility 10 import SloppyCell.KeyedList_mod as KeyedList_mod 11 KeyedList = KeyedList_mod.KeyedList 12 13 if SloppyCell.HAVE_PYPAR: 14 import pypar 15

16 -class ExperimentCollection(dict):

17 """ 18 ExperimentCollection(experiment name list) 19 20 An ExperimentCollection unites a collection of experiments. For now, it's 21 most important function is to collect group the independent variables by 22 Calculation to avoid wasting computer effort redoing calculations. 23 24 Individual experiments can be accessed via dictionary-type indexing. 25 """

26 - def __init__(self, exptList = []):

27 for expt in exptList: 28 self.AddExperiment(expt)

29

30 - def AddExperiment(self, expt):

31 """ 32 LoadExperiments(experiment name list) 33 34 Adds the experiments in the list to the collection 35 """ 36 if expt.GetName() in self: 37 raise ValueError("Experiment already has name %s" 38 % str(expt.GetName())) 39 40 self[expt.GetName()] = expt

41

42 - def GetVarsByCalc(self):

43 """ 44 GetIndVarsByCalc() -> dictionary 45 46 Returns a dictionary of all the dependent and independent variables for 47 all the calculations required to compare with the data in all the 48 experiments. The dictionary is of the form: 49 dictionary(calculation name) -> ordered list of unique independent 50 variables 51 """ 52 varsByCalc = {} 53 for expt in self.values(): 54 data = expt.GetData() 55 56 for calc in data: 57 varsByCalc.setdefault(calc, {}) 58 for depVar in data[calc]: 59 # Using a set is a convenient way to make sure 60 # independent variables aren't repeated 61 varsByCalc[calc].setdefault(depVar, sets.Set()) 62 varsByCalc[calc][depVar].\ 63 union_update(sets.Set(data[calc][depVar].keys())) 64 65 for period in expt.GetPeriodChecks(): 66 calc, depVar = period['calcKey'], period['depVarKey'] 67 start, period = period['startTime'], period['period'] 68 if calc not in varsByCalc.keys(): 69 varsByCalc[calc].setdefault(calc, {}) 70 if depVar not in varsByCalc[calc]: 71 varsByCalc[calc].setdefault(depVar, sets.Set()) 72 varsByCalc[calc][depVar].union_update([start, start+2.0*period]) 73 74 for amplitude in expt.GetAmplitudeChecks(): 75 calc, depVar = amplitude['calcKey'], amplitude['depVarKey'] 76 start, test, period = (amplitude['startTime'], 77 amplitude['testTime'], 78 amplitude['period']) 79 if calc not in varsByCalc.keys(): 80 varsByCalc[calc].setdefault(calc, {}) 81 if depVar not in varsByCalc[calc]: 82 varsByCalc[calc].setdefault(depVar, sets.Set()) 83 varsByCalc[calc][depVar].union_update([start, start+period, 84 test, test+period]) 85 86 for data_set in expt.GetIntegralDataSets(): 87 calc = data_set['calcKey'] 88 varsByCalc.setdefault(data_set['calcKey'], {}) 89 varsByCalc[calc][('full trajectory')] = data_set['interval'] 90 91 for ds in expt.scaled_extrema_data: 92 calc = ds['calcKey'] 93 varsByCalc.setdefault(ds['calcKey'], {}) 94 if ds['type'] == 'max': 95 called = ds['var'] + '_maximum' 96 elif ds['type'] == 'min': 97 called = ds['var'] + '_minimum' 98 varsByCalc[calc][called] = (ds['minTime'], ds['maxTime']) 99 100 # But I convert the sets back to sorted lists before returning 101 for calc in varsByCalc: 102 for depVar in varsByCalc[calc]: 103 varsByCalc[calc][depVar] = list(varsByCalc[calc][depVar]) 104 varsByCalc[calc][depVar].sort() 105 106 return varsByCalc

107

108 - def GetData(self):

109 """ 110 GetData() -> dictionary 111 112 Returns a dictionary containing all the data for the experiments. The 113 dictionary is of the form: 114 dictionary[expt name][calc name][dependent vars][independent vars] 115 = value. 116 117 Note that value may be an arbitrary object. 118 """ 119 120 data = {} 121 for exptName, expt in self.items(): 122 data[exptName] = expt.GetData() 123 124 return data

125

126 -class Experiment:

127 - def __init__(self, name = '', data = {}, fixedScaleFactors = {}, 128 longName = '', shared_sf = []):

129 self.SetName(name) 130 self.SetData(data) 131 self.SetFixedScaleFactors(fixedScaleFactors) 132 self.set_shared_sf(shared_sf) 133 self.periodChecks=[] 134 self.amplitudeChecks=[] 135 self.integral_data=[] 136 self.sf_priors = {} 137 self.scaled_extrema_data = []

138

139 - def _hashable_group(self, group):

140 """ 141 Return a sorted tuple of the elements of group. 142 """ 143 if isinstance(group, str): 144 group = [group] 145 hash_group = sets.Set(group) 146 hash_group = list(hash_group) 147 list(group).sort() 148 hash_group = tuple(group) 149 return hash_group

150

151 - def get_sf_groups(self):

152 """ 153 Return tuples representing all the scale factors in this experiment. 154 155 A tuple will contain multiple entries if several variables share a 156 scale factor. 157 """ 158 # Get the variables measured in this experiment 159 measuredVars = sets.Set() 160 for calcId in self.data: 161 measuredVars.union_update(sets.Set(self.data[calcId].keys())) 162 for dataset in self.integral_data: 163 measuredVars.union_update(sets.Set(dataset['vars'])) 164 for dataset in self.scaled_extrema_data: 165 measuredVars.add(dataset['var']) 166 167 sf_groups = [self._hashable_group(group) for group 168 in self.get_shared_sf()] 169 # Flatten out the list of shared scale factors so we can also get 170 # the unshared ones... 171 flattened = [] 172 for g in sf_groups: 173 flattened.extend(g) 174 # These are variables that don't share scale factors 175 unshared = [self._hashable_group(var) for var in measuredVars 176 if var not in flattened] 177 sf_groups.extend(unshared) 178 return sf_groups

179

180 - def set_sf_prior(self, group, prior_type, prior_params=None):

181 """ 182 Set the type of prior to place on a given group of scalefactors. 183 184 The group contains a collection of variables that are sharing a given 185 scale factor which may be just one variable. You can see what the 186 current groups are with expt.get_sf_groups(). 187 188 Currently implemented prior types are: 189 'uniform in sf': This is a uniform prior over scale factors. This 190 is simplest and fastest to compute, but it tends to weight 191 parameter sets that yield large scale factors heavily. It takes no 192 parameters. 193 194 'gaussian in log sf': This is a Gaussian prior over the logarithm 195 of the scale factor. This should avoid the problem of weighting 196 large factors heavily. It takes two parameters: the mean of the 197 normal distribution, and it's standard deviation. For example, 198 parameters (log(3.0), log(10)), will place a prior that holds 95% 199 of the probability between 3 / 10**2 and 3 * 10**2. 200 """ 201 hash_group = self._hashable_group(group) 202 203 if hash_group not in self.get_sf_groups(): 204 raise ValueError('Unrecognized group to set scale factor prior on. ' 205 'If it is a shared scale factor, you need to ' 206 'specify every member of the sharing group.') 207 208 available_types = ['uniform in sf', 'gaussian in log sf'] 209 if prior_type not in available_types: 210 raise ValueError('Unknown prior type %s. Available types are %s.' 211 % (prior_type, available_types)) 212 self.sf_priors[hash_group] = (prior_type, prior_params)

213

214 - def compute_sf_entropy(self, sf_group, theoryDotTheory, theoryDotData, T):

215 """ 216 Compute the entropy for a given scale factor. 217 """ 218 try: 219 prior_type, prior_params = self.sf_priors[sf_group] 220 except KeyError: 221 prior_type, prior_params = 'uniform in sf', None 222 223 if prior_type == 'uniform in sf': 224 if theoryDotTheory != 0: 225 entropy = scipy.log(scipy.sqrt(2*scipy.pi*T/theoryDotTheory)) 226 else: 227 entropy = 0 228 elif prior_type == 'gaussian in log sf': 229 # This is the integrand for the prior. Note that it's important 230 # that u = 0 corresponds to B_best. This ensures that the 231 # integration doesn't miss the (possibly sharp) peak there. 232 try: 233 import SloppyCell.misc_c 234 integrand = SloppyCell.misc_c.log_gaussian_prior_integrand 235 except ImportError: 236 logger.warn('Falling back to python integrand on log gaussian ' 237 'prior integration.') 238 exp = scipy.exp 239 def integrand(u, ak, bk, mulB, siglB, T, B_best, lB_best): 240 B = exp(u) * B_best 241 lB = u + lB_best 242 ret = exp(-ak/(2*T) * (B-B_best)**2 243 - (lB-mulB)**2/(2 * siglB**2)) 244 return ret

245 246 mulB, siglB = prior_params 247 B_best = theoryDotData/theoryDotTheory 248 lB_best = scipy.log(B_best) 249 250 # Often we get overflow errors in the integration, but they 251 # don't actually cause a problem. (exp(-inf) is still 0.) This 252 # prevents scipy from printing annoying error messages in this 253 # case. 254 prev_err = scipy.seterr(over='ignore') 255 int_args = (theoryDotTheory, theoryDotData, mulB, siglB, T, B_best, 256 lB_best) 257 ans, temp = scipy.integrate.quad(integrand, -scipy.inf, scipy.inf, 258 args = int_args, limit=1000) 259 scipy.seterr(**prev_err) 260 entropy = scipy.log(ans) 261 else: 262 raise ValueError('Unrecognized prior type: %s.' % prior_type) 263 264 return entropy

265

266 - def SetName(self, name):

267 self.name = name

268

269 - def GetName(self):

270 return self.name

271

272 - def set_data(self, data):

273 self.data = copy.copy(data)

274

275 - def update_data(self, newData):

276 self.data.update(newData)

277

278 - def get_data(self):

279 return self.data

280

281 - def set_fixed_sf(self, fixed_sf):

282 self.fixedScaleFactors = fixed_sf

283

284 - def set_shared_sf(self, shared_sf):

285 self.shared_sf = shared_sf

286

287 - def get_shared_sf(self):

288 return self.shared_sf

289

290 - def get_fixed_sf(self):

291 return self.fixedScaleFactors

292 293 294 SetData = set_data 295 GetData = get_data 296 UpdateData = update_data 297 SetFixedScaleFactors = set_fixed_sf 298 GetFixedScaleFactors = get_fixed_sf 299

300 - def AddPeriodCheck(self, calcKey, chemical, period, sigma, startTime=0.0):

301 """ 302 Constrain the period of the oscillations to a value (period) 303 with the error (sigma). The period is found using the maximum 304 to maximum distance of the first two maxima found between 305 startTime and two periods after the startTime. 306 """ 307 self.periodChecks.append({'calcKey':calcKey, 'depVarKey':chemical, 308 'period': period, 'sigma': sigma, 309 'startTime': startTime})

310

311 - def GetPeriodChecks(self):

312 return self.periodChecks

313

314 - def AddAmplitudeCheck(self, calcKey, chemical, startTime, testTime, period, 315 sigma):

316 """ 317 Turn on applying a constraint that the integrated 318 area in two different parts of the plot should be the 319 same. startTime and testTime are the starting points to 320 begin the integration for the period-long each. 321 """ 322 self.amplitudeChecks.append({'calcKey': calcKey, 'depVarKey': chemical, 323 'startTime': startTime, 324 'testTime': testTime, 325 'period': period, 'sigma': sigma})

326

327 - def GetAmplitudeChecks(self):

328 return self.amplitudeChecks

329

330 - def GetIntegralDataSets(self):

331 return self.integral_data

332

333 - def add_integral_data(self, calcKey, traj, uncert_traj, vars, 334 interval=None):

335 """ 336 Add an integral data set to the experiment. 337 338 calcKey The id of the calculation this data corresponds to 339 traj The trajectory to compare against 340 uncert_traj A trajectory of data uncertainties 341 vars What variables to fit against 342 interval The time interval to fit over, defaults to the entire traj 343 """ 344 traj.build_interpolated_traj() 345 uncert_traj.build_interpolated_traj() 346 if interval == None: 347 interval = (traj.get_times()[0], traj.get_times()[-1]) 348 self.integral_data.append({'calcKey': calcKey, 'trajectory': traj, 349 'uncert_traj': uncert_traj, 'vars': vars, 350 'interval': interval})

351

352 - def add_scaled_max(self, calcKey, var, maxval, sigma, 353 minTime=None, maxTime=None):

354 self.scaled_extrema_data.append({'calcKey': calcKey, 'var':var, 355 'val':maxval, 'sigma':sigma, 356 'minTime': minTime, 'maxTime':maxTime, 357 'type':'max'})

358 - def add_scaled_min(self, calcKey, var, minval, sigma, 359 minTime=None, maxTime=None):

360 self.scaled_extrema_data.append({'calcKey': calcKey, 'var':var, 361 'val':minval, 'sigma':sigma, 362 'minTime': minTime, 'maxTime':maxTime, 363 'type':'min'})

364

365 -class CalculationCollection(KeyedList):

366 """ 367 CalculationCollection(calculation name list) 368 369 An CalculationCollection unites a collection of calculations. It is 370 responsible for generating a master list of parameters and passing each 371 Calculation its appropriate parameters. 372 373 Individual calculations can be accessed via dictionary-type indexing. 374 375 XXX: Note that the parameter shuffling has not been extensively tested. 376 """ 377

378 - def __init__(self, calcList = []):

379 KeyedList.__init__(self) 380 381 self.params = KeyedList() 382 for calc in calcList: 383 try: 384 if len(calc) == 2: 385 self.AddCalculation(calc[1]) 386 else: 387 raise ValueError('Incorrect form for calcList') 388 except: 389 self.AddCalculation(calc)

390

391 - def AddCalculation(self, calc):

392 """ 393 LoadCalculations(calculations name list) 394 395 Adds the calculations in the list to the collection and adds their 396 parameters to the parameterSet 397 """ 398 if calc.GetName() in self: 399 raise ValueError("Calculation already has name %s" 400 % str(calc.GetName())) 401 402 self.set(calc.GetName(), calc ) 403 for pName, pValue in calc.GetParameters().items(): 404 self.params.setdefault(pName, pValue)

405

406 - def Calculate(self, varsByCalc, params = None):

407 """ 408 Calculate model predictions for everything in varsByCalc. 409 410 varsByCalc is a dictionary of the form: 411 dict[calc name][dep var] = ind var 412 413 The return dictionary is of the form: 414 dictionary[calc name][dep var][ind var] = result 415 """ 416 if params is not None: 417 self.params.update(params) 418 419 results = {} 420 421 calcs_to_do = varsByCalc.keys() 422 # Record which calculation each node is doing 423 calc_assigned = {} 424 while calcs_to_do: 425 # The number of calculations to do this round. We want to use 426 # all the processors if possible. 427 len_this_block = min(SloppyCell.num_procs, len(calcs_to_do)) 428 429 for worker in range(1, len_this_block): 430 calc = calcs_to_do.pop() 431 calc_assigned[worker] = calc 432 logger.debug('Assigning calculation %s to worker %i.' 433 % (calc, worker)) 434 command = 'Network.calculate(net, vars, params)' 435 args = {'net': self.get(calc), 'vars': varsByCalc[calc], 436 'params': self.params} 437 pypar.send((command, args), worker) 438 439 # The master does his share here 440 calc = calcs_to_do.pop() 441 # We use the finally statement because we want to ensure that we 442 # *always* wait for replies from the workers, even if the master 443 # encounters an exception in his evaluation. 444 try: 445 results[calc] = self.get(calc).calculate(varsByCalc[calc], 446 self.params) 447 finally: 448 # Collect results from the workers 449 for worker in range(1, len_this_block): 450 logger.debug('Receiving result from worker %i.' % worker) 451 results[calc_assigned[worker]] = pypar.receive(worker) 452 # If the master encounts an exception, we'll break out of the 453 # function ***here*** 454 455 # Check the results we received. If any is a SloppyCellException, 456 # reraise it. 457 for val in results.values(): 458 if isinstance(val, Utility.SloppyCellException): 459 raise val 460 461 return results

462

463 - def CalculateSensitivity(self, varsByCalc, params = None):

464 """ 465 Calculate sensitivities for model predictions of everything in 466 varsByCalc. 467 468 varsByCalc is a dictionary of the form: 469 dict[calc name][dep var] = ind var 470 471 The return dictionary is of the form: 472 dictionary[calc name][dep var][ind var][param] = result 473 """ 474 if params is not None : 475 self.params.update(params) 476 477 calcSensVals, calcVals = {}, {} 478 for (calcName, vars) in varsByCalc.items(): 479 calc = self.get(calcName) 480 vars = varsByCalc[calcName] 481 calcPOrder = calc.GetParameters().keys() 482 calc.CalculateSensitivity(varsByCalc[calcName], self.params) 483 calcSensVals[calcName] = calc.GetSensitivityResult(vars) 484 calcVals[calcName] = calc.GetResult(vars) 485 486 return calcVals, calcSensVals

487

488 - def GetParameters(self):

489 """ 490 Return a deep copy of the collections parameter KeyedList. 491 """ 492 self.params = KeyedList() 493 for calc in self.values(): 494 for pName, pValue in calc.GetParameters().items(): 495 self.params.setdefault(pName, pValue) 496 return self.params

497

Source Code for Module SloppyCell.Collections