1 import logging
2 logger = logging.getLogger('Ensembles')
3 import copy
4 import shutil
5 import sys
6 import time
7
8 import scipy
9 import scipy.linalg
10 import scipy.stats
11 import scipy.fftpack
12
13 import SloppyCell.KeyedList_mod as KeyedList_mod
14 KeyedList = KeyedList_mod.KeyedList
15 import SloppyCell.Utility as Utility
16
17 from SloppyCell import HAVE_PYPAR, my_rank, my_host, num_procs
18 if HAVE_PYPAR:
19 import pypar
20
22 """
23 Return the normalized autocorrelation of a series using the FFT.
24 """
25
26
27 f = scipy.fftpack.rfft(scipy.asarray(series)-scipy.mean(series),
28 n = 2*len(series))
29
30 ac = scipy.fftpack.irfft(abs(f)**2)
31
32 ac = ac[:len(series)]
33
34 ac *= len(series)/(len(series) - 1.0*scipy.arange(len(series)))
35
36 return ac/ac[0]
37
38 -def ensemble(m, params, hess=None,
39 steps=scipy.inf, max_run_hours=scipy.inf,
40 temperature=1.0, step_scale=1.0,
41 sing_val_cutoff=0, seeds=None,
42 recalc_hess_alg=False, recalc_func=None,
43 save_hours=scipy.inf, skip_elems=0, save_to=None):
44 """
45 Generate a Bayesian ensemble of parameter sets consistent with the data in
46 the model. The sampling is done in terms of the bare parameters.
47
48 Inputs:
49 (All not listed here are identical to those of ensemble_log_params.)
50 recalc_func --- Function used to calculate the hessian matrix. It should
51 take only a parameters argument and return the matrix.
52 If this is None, default is to use m.GetJandJtJ.
53 """
54 return ensemble_log_params(m, params, hess, steps, max_run_hours,
55 temperature, step_scale, sing_val_cutoff, seeds,
56 recalc_hess_alg, recalc_func, save_hours,
57 save_to, skip_elems, log_params=False)
58
59 -def ensemble_log_params(m, params, hess=None,
60 steps=scipy.inf, max_run_hours=scipy.inf,
61 temperature=1.0, step_scale=1.0,
62 sing_val_cutoff=0, seeds=None,
63 recalc_hess_alg = False, recalc_func=None,
64 save_hours=scipy.inf, save_to=None,
65 skip_elems = 0, log_params=True):
66 """
67 Generate a Bayesian ensemble of parameter sets consistent with the data in
68 the model. The sampling is done in terms of the logarithm of the parameters.
69
70 Inputs:
71 m -- Model to generate the ensemble for
72 params -- Initial parameter KeyedList to start from
73 hess -- Hessian of the model
74 steps -- Maximum number of Monte Carlo steps to attempt
75 max_run_hours -- Maximum number of hours to run
76 temperature -- Temperature of the ensemble
77 step_scale -- Additional scale applied to each step taken. step_scale < 1
78 results in steps shorter than those dictated by the quadratic
79 approximation and may be useful if acceptance is low.
80 sing_val_cutoff -- Truncate the quadratic approximation at eigenvalues
81 smaller than this fraction of the largest.
82 seeds -- A tuple of two integers to seed the random number generator
83 recalc_hess_alg --- If True, the Monte-Carlo is done by recalculating the
84 hessian matrix every timestep. This signficantly
85 increases the computation requirements for each step,
86 but it may be worth it if it improves convergence.
87 recalc_func --- Function used to calculate the hessian matrix. It should
88 take only a log parameters argument and return the matrix.
89 If this is None, default is to use
90 m.GetJandJtJInLogParameteters
91 save_hours --- If save_to is not None, the ensemble will be saved to
92 that file every 'save_hours' hours.
93 save_to --- Filename to save ensemble to.
94 skip_elems --- If non-zero, skip_elems are skipped between each included
95 step in the returned ensemble. For example, skip_elems=1
96 will return every other member. Using this option can
97 reduce memory consumption.
98
99 Outputs:
100 ens, ens_fes, ratio
101 ens -- List of KeyedList parameter sets in the ensemble
102 ens_fes -- List of free energies for each parameter set
103 ratio -- Fraction of attempted moves that were accepted
104
105 The sampling is done by Markov Chain Monte Carlo, with a Metropolis-Hasting
106 update scheme. The canidate-generating density is a gaussian centered on the
107 current point, with axes determined by the hessian. For a useful
108 introduction see:
109 Chib and Greenberg. "Understanding the Metropolis-Hastings Algorithm"
110 _The_American_Statistician_ 49(4), 327-335
111 """
112 if scipy.isinf(steps) and scipy.isinf(max_run_hours):
113 logger.warn('Both steps and max_run_hours are infinite! '
114 'Code will not stop by itself!')
115
116 if seeds is None:
117 seeds = int(time.time()%1 * 1e6)
118 logger.debug('Seeding random number generator based on system time.')
119 logger.debug('Seed used: %s' % str(seeds))
120 scipy.random.seed(seeds)
121 if isinstance(params, KeyedList):
122 param_keys = params.keys()
123
124 curr_params = copy.deepcopy(params)
125 curr_F = m.free_energy(curr_params, temperature)
126 ens, ens_Fs = [curr_params], [curr_F]
127
128
129 curr_params = scipy.array(curr_params)
130
131 if recalc_func is None and log_params:
132 recalc_func = lambda p: m.GetJandJtJInLogParameters(scipy.log(p))[1]
133 else:
134 recalc_func = lambda p: m.GetJandJtJ(p)[1]
135
136 accepted_moves, attempt_exceptions, ratio = 0, 0, scipy.nan
137 start_time = last_save_time = time.time()
138
139
140 if hess is None:
141 hess = recalc_func(curr_params)
142
143 samp_mat = _sampling_matrix(hess, sing_val_cutoff, temperature, step_scale)
144
145 steps_attempted = 0
146 while steps_attempted < steps:
147
148 if (time.time() - start_time) >= max_run_hours*3600:
149 break
150
151
152 deltaParams = _trial_move(samp_mat)
153
154
155 scaled_step = deltaParams
156
157 if log_params:
158 next_params = curr_params * scipy.exp(scaled_step)
159 else:
160 next_params = curr_params + scaled_step
161
162 try:
163 next_F = m.free_energy(next_params, temperature)
164 except Utility.SloppyCellException, X:
165 logger.warn('SloppyCellException in free energy evaluation at step '
166 '%i, free energy set to infinity.' % len(ens))
167 logger.warn('Parameters tried: %s.' % str(next_params))
168 attempt_exceptions += 1
169 next_F = scipy.inf
170
171 if recalc_hess_alg and not scipy.isinf(next_F):
172 try:
173 next_hess = recalc_func(next_params)
174 next_samp_mat = _sampling_matrix(next_hess, sing_val_cutoff,
175 temperature, step_scale)
176 accepted = _accept_move_recalc_alg(curr_F, samp_mat,
177 next_F, next_samp_mat,
178 deltaParams, temperature)
179 except Utility.SloppyCellException, X:
180 logger.warn('SloppyCellException in JtJ evaluation at step '
181 '%i, move not accepted.' % len(ens))
182 logger.warn('Parameters tried: %s.' % str(next_params))
183 attempt_exceptions += 1
184 next_F = scipy.inf
185 accepted = False
186 else:
187 accepted = _accept_move(next_F - curr_F, temperature)
188
189 steps_attempted += 1
190 if accepted:
191 accepted_moves += 1.
192 curr_params = next_params
193 curr_F = next_F
194 if recalc_hess_alg:
195 hess = next_hess
196 samp_mat = next_samp_mat
197
198 if steps_attempted % (skip_elems + 1) == 0:
199 ens_Fs.append(curr_F)
200 if isinstance(params, KeyedList):
201 ens.append(KeyedList(zip(param_keys, curr_params)))
202 else:
203 ens.append(curr_params)
204 ratio = accepted_moves/steps_attempted
205
206
207 if save_to is not None\
208 and time.time() >= last_save_time + save_hours * 3600:
209 _save_ens(ens, ens_Fs, ratio, save_to, attempt_exceptions,
210 steps_attempted)
211 last_save_time = time.time()
212
213 if save_to is not None:
214 _save_ens(ens, ens_Fs, ratio, save_to, attempt_exceptions,
215 steps_attempted)
216
217 return ens, ens_Fs, ratio
218
219 -def _save_ens(ens, ens_Fs, ratio, save_to, attempt_exceptions, steps_attempted):
220 temp_name = save_to + '_temporary_SloppyCellFile'
221 Utility.save((ens, ens_Fs, ratio), temp_name)
222 shutil.move(temp_name, save_to)
223 logger.debug('Ensemble of length %i saved to %s.' % (len(ens), save_to))
224 logger.debug('Acceptance ratio so far is %f.' % ratio)
225 logger.debug('Attempted moves threw an exception %i times out of %i '
226 'attempts.' % (attempt_exceptions, steps_attempted))
227
229 """
230 Basic Metropolis accept/reject step.
231 """
232 p = scipy.rand()
233 return (p < scipy.exp(-delta_F/temperature))
234
237 """
238 Accept/reject when each the sampling matrix is recalculated each step.
239 """
240 pi_x = scipy.exp(-curr_F/T)
241
242 sigma_curr = scipy.dot(curr_samp_mat, scipy.transpose(curr_samp_mat))
243 sigma_curr_inv = scipy.linalg.inv(sigma_curr)
244
245 q_x_to_y = scipy.exp(-_quadratic_cost(step, sigma_curr_inv))\
246 / scipy.sqrt(scipy.linalg.det(sigma_curr))
247
248 pi_y = scipy.exp(-next_F/T)
249 sigma_next = scipy.dot(next_samp_mat, scipy.transpose(next_samp_mat))
250 sigma_next_inv = scipy.linalg.inv(sigma_next)
251 q_y_to_x = scipy.exp(-_quadratic_cost(-step, sigma_next_inv))\
252 / scipy.sqrt(scipy.linalg.det(sigma_next))
253
254 p = scipy.rand()
255 accepted = (pi_y*q_y_to_x)/(pi_x*q_x_to_y)
256 did_accepted = p<accepted
257
258 return p < accepted
259
260
262
263
264 u, sing_vals, vh = scipy.linalg.svd(0.5 * hessian)
265
266
267
268
269
270
271 cutoff_sing_val = cutoff * max(sing_vals)
272
273 D = 1.0/scipy.maximum(sing_vals, cutoff_sing_val)
274
275
276
277
278 samp_mat = scipy.transpose(vh) * scipy.sqrt(D)
279
280
281
282 cutoff_vals = scipy.compress(sing_vals < cutoff_sing_val, sing_vals)
283 if len(cutoff_vals):
284 scale = scipy.sqrt(len(sing_vals) - len(cutoff_vals)
285 + sum(cutoff_vals)/cutoff_sing_val)
286 else:
287 scale = scipy.sqrt(len(sing_vals))
288
289 samp_mat /= scale
290 samp_mat *= step_scale
291 samp_mat *= scipy.sqrt(temperature)
292
293 return samp_mat
294
296 randVec = scipy.randn(len(sampling_mat))
297 trialMove = scipy.dot(sampling_mat, randVec)
298
299 return trialMove
300
302 """
303 The cost from the quadratic approximation of a trialMove, given the hessian.
304
305 (Note: the hessian here is assumed to be the second derivative matrix of the
306 cost, without an additional factor of 1/2.)
307 """
308 quadratic = 0.5*scipy.dot(scipy.transpose(trialMove),
309 scipy.dot(hessian, trialMove))
310 return quadratic
311
313 """
314 Return the mean and standard deviation trajectory objects for the given
315 input list of trajectories. (All must be evaluated at the same points.)
316 """
317 all_values = [traj.values for traj in traj_set]
318
319 mean_values = scipy.mean(all_values, 0)
320 std_values = scipy.std(all_values, 0)
321
322 mean_traj = copy.deepcopy(traj_set[0])
323 mean_traj.values = mean_values
324
325 std_traj = copy.deepcopy(traj_set[0])
326 std_traj.values = std_values
327
328 return mean_traj, std_traj
329
344
346 """
347 Return a list of trajectories evaluated at times for all parameter sets
348 in ensemble.
349 """
350 traj_set = []
351 elems_assigned = [ensemble[node::num_procs] for node in range(num_procs)]
352 for worker in range(1, num_procs):
353 command = 'Ensembles.few_ensemble_trajs(net, times, elements)'
354 args = {'net': net, 'times': times, 'elements': elems_assigned[worker]}
355 pypar.send((command, args), worker)
356
357 traj_set = few_ensemble_trajs(net, times, elems_assigned[0])
358
359 for worker in range(1, num_procs):
360 traj_set.extend(pypar.receive(worker))
361
362 return traj_set
363
369
371 """
372 Return a list of trajectories, each one corresponding the a given passed-in
373 quantile.
374 """
375 all_values = scipy.array([traj.values for traj in traj_set])
376 sorted_values = scipy.sort(all_values, 0)
377
378 q_trajs = []
379 for q in quantiles:
380
381
382 index = q * (len(sorted_values) - 1)
383 below = int(scipy.floor(index))
384 above = int(scipy.ceil(index))
385 if above == below:
386 q_values = sorted_values[below]
387 else:
388
389 q_below = (1.0*below)/(len(sorted_values)-1)
390 q_above = (1.0*above)/(len(sorted_values)-1)
391 q_values = sorted_values[below] + (q - q_below)*(sorted_values[above] - sorted_values[below])/(q_above - q_below)
392 q_traj = copy.deepcopy(traj_set[0])
393 q_traj.values = q_values
394 q_trajs.append(q_traj)
395
396 return q_trajs
397
399 """
400 Return the Principle Component Analysis eigenvalues and eigenvectors (in
401 log parameters) of an ensemble. (This function takes the logs for you.)
402 """
403 return PCA_eig(scipy.log(scipy.asarray(ens)))
404
406 """
407 Return the Principle Component Analysis eigenvalues and eigenvectors
408 of an ensemble.
409 """
410 X = scipy.asarray(ens)
411 mean_vals = scipy.mean(X, 0)
412 X -= mean_vals
413 u, s, vh = scipy.linalg.svd(scipy.transpose(X))
414
415
416 X += mean_vals
417 return len(X)/s[::-1]**2, u[:,::-1]
418