SloppyCell.Ensembles

1 import logging 2 logger = logging.getLogger('Ensembles') 3 import copy 4 import shutil 5 import sys 6 import time 7 8 import scipy 9 import scipy.linalg 10 import scipy.stats 11 import scipy.fftpack 12 13 import SloppyCell.KeyedList_mod as KeyedList_mod 14 KeyedList = KeyedList_mod.KeyedList 15 import SloppyCell.Utility as Utility 16 17 from SloppyCell import HAVE_PYPAR, my_rank, my_host, num_procs 18 if HAVE_PYPAR: 19 import pypar 20

21 -def autocorrelation(series):

22 """ 23 Return the normalized autocorrelation of a series using the FFT. 24 """ 25 # We need to de-mean the series. Also, we want to pad with zeros to avoid 26 # assuming our series is periodic. 27 f = scipy.fftpack.rfft(scipy.asarray(series)-scipy.mean(series), 28 n = 2*len(series)) 29 # The inverse fft of |f|**2 is the autocorrelation 30 ac = scipy.fftpack.irfft(abs(f)**2) 31 # But we padded with zeros, so it's too long 32 ac = ac[:len(series)] 33 # And we need to adjust to remove the effect of the zeros we added 34 ac *= len(series)/(len(series) - 1.0*scipy.arange(len(series))) 35 # Return the normalized ac 36 return ac/ac[0]

37

38 -def ensemble(m, params, hess=None, 39 steps=scipy.inf, max_run_hours=scipy.inf, 40 temperature=1.0, step_scale=1.0, 41 sing_val_cutoff=0, seeds=None, 42 recalc_hess_alg=False, recalc_func=None, 43 save_hours=scipy.inf, skip_elems=0, save_to=None):

44 """ 45 Generate a Bayesian ensemble of parameter sets consistent with the data in 46 the model. The sampling is done in terms of the bare parameters. 47 48 Inputs: 49 (All not listed here are identical to those of ensemble_log_params.) 50 recalc_func --- Function used to calculate the hessian matrix. It should 51 take only a parameters argument and return the matrix. 52 If this is None, default is to use m.GetJandJtJ. 53 """ 54 return ensemble_log_params(m, params, hess, steps, max_run_hours, 55 temperature, step_scale, sing_val_cutoff, seeds, 56 recalc_hess_alg, recalc_func, save_hours, 57 save_to, skip_elems, log_params=False)

58

59 -def ensemble_log_params(m, params, hess=None, 60 steps=scipy.inf, max_run_hours=scipy.inf, 61 temperature=1.0, step_scale=1.0, 62 sing_val_cutoff=0, seeds=None, 63 recalc_hess_alg = False, recalc_func=None, 64 save_hours=scipy.inf, save_to=None, 65 skip_elems = 0, log_params=True):

66 """ 67 Generate a Bayesian ensemble of parameter sets consistent with the data in 68 the model. The sampling is done in terms of the logarithm of the parameters. 69 70 Inputs: 71 m -- Model to generate the ensemble for 72 params -- Initial parameter KeyedList to start from 73 hess -- Hessian of the model 74 steps -- Maximum number of Monte Carlo steps to attempt 75 max_run_hours -- Maximum number of hours to run 76 temperature -- Temperature of the ensemble 77 step_scale -- Additional scale applied to each step taken. step_scale < 1 78 results in steps shorter than those dictated by the quadratic 79 approximation and may be useful if acceptance is low. 80 sing_val_cutoff -- Truncate the quadratic approximation at eigenvalues 81 smaller than this fraction of the largest. 82 seeds -- A tuple of two integers to seed the random number generator 83 recalc_hess_alg --- If True, the Monte-Carlo is done by recalculating the 84 hessian matrix every timestep. This signficantly 85 increases the computation requirements for each step, 86 but it may be worth it if it improves convergence. 87 recalc_func --- Function used to calculate the hessian matrix. It should 88 take only a log parameters argument and return the matrix. 89 If this is None, default is to use 90 m.GetJandJtJInLogParameteters 91 save_hours --- If save_to is not None, the ensemble will be saved to 92 that file every 'save_hours' hours. 93 save_to --- Filename to save ensemble to. 94 skip_elems --- If non-zero, skip_elems are skipped between each included 95 step in the returned ensemble. For example, skip_elems=1 96 will return every other member. Using this option can 97 reduce memory consumption. 98 99 Outputs: 100 ens, ens_fes, ratio 101 ens -- List of KeyedList parameter sets in the ensemble 102 ens_fes -- List of free energies for each parameter set 103 ratio -- Fraction of attempted moves that were accepted 104 105 The sampling is done by Markov Chain Monte Carlo, with a Metropolis-Hasting 106 update scheme. The canidate-generating density is a gaussian centered on the 107 current point, with axes determined by the hessian. For a useful 108 introduction see: 109 Chib and Greenberg. "Understanding the Metropolis-Hastings Algorithm" 110 _The_American_Statistician_ 49(4), 327-335 111 """ 112 if scipy.isinf(steps) and scipy.isinf(max_run_hours): 113 logger.warn('Both steps and max_run_hours are infinite! ' 114 'Code will not stop by itself!') 115 116 if seeds is None: 117 seeds = int(time.time()%1 * 1e6) 118 logger.debug('Seeding random number generator based on system time.') 119 logger.debug('Seed used: %s' % str(seeds)) 120 scipy.random.seed(seeds) 121 if isinstance(params, KeyedList): 122 param_keys = params.keys() 123 124 curr_params = copy.deepcopy(params) 125 curr_F = m.free_energy(curr_params, temperature) 126 ens, ens_Fs = [curr_params], [curr_F] 127 128 # We work with arrays of params through the rest of the code 129 curr_params = scipy.array(curr_params) 130 131 if recalc_func is None and log_params: 132 recalc_func = lambda p: m.GetJandJtJInLogParameters(scipy.log(p))[1] 133 else: 134 recalc_func = lambda p: m.GetJandJtJ(p)[1] 135 136 accepted_moves, attempt_exceptions, ratio = 0, 0, scipy.nan 137 start_time = last_save_time = time.time() 138 139 # Calculate our first hessian if necessary 140 if hess is None: 141 hess = recalc_func(curr_params) 142 # Generate the sampling matrix used to generate candidate moves 143 samp_mat = _sampling_matrix(hess, sing_val_cutoff, temperature, step_scale) 144 145 steps_attempted = 0 146 while steps_attempted < steps: 147 # Have we run too long? 148 if (time.time() - start_time) >= max_run_hours*3600: 149 break 150 151 # Generate the trial move from the quadratic approximation 152 deltaParams = _trial_move(samp_mat) 153 # Scale the trial move by the step_scale and the temperature 154 #scaled_step = step_scale * scipy.sqrt(temperature) * deltaParams 155 scaled_step = deltaParams 156 157 if log_params: 158 next_params = curr_params * scipy.exp(scaled_step) 159 else: 160 next_params = curr_params + scaled_step 161 162 try: 163 next_F = m.free_energy(next_params, temperature) 164 except Utility.SloppyCellException, X: 165 logger.warn('SloppyCellException in free energy evaluation at step ' 166 '%i, free energy set to infinity.' % len(ens)) 167 logger.warn('Parameters tried: %s.' % str(next_params)) 168 attempt_exceptions += 1 169 next_F = scipy.inf 170 171 if recalc_hess_alg and not scipy.isinf(next_F): 172 try: 173 next_hess = recalc_func(next_params) 174 next_samp_mat = _sampling_matrix(next_hess, sing_val_cutoff, 175 temperature, step_scale) 176 accepted = _accept_move_recalc_alg(curr_F, samp_mat, 177 next_F, next_samp_mat, 178 deltaParams, temperature) 179 except Utility.SloppyCellException, X: 180 logger.warn('SloppyCellException in JtJ evaluation at step ' 181 '%i, move not accepted.' % len(ens)) 182 logger.warn('Parameters tried: %s.' % str(next_params)) 183 attempt_exceptions += 1 184 next_F = scipy.inf 185 accepted = False 186 else: 187 accepted = _accept_move(next_F - curr_F, temperature) 188 189 steps_attempted += 1 190 if accepted: 191 accepted_moves += 1. 192 curr_params = next_params 193 curr_F = next_F 194 if recalc_hess_alg: 195 hess = next_hess 196 samp_mat = next_samp_mat 197 198 if steps_attempted % (skip_elems + 1) == 0: 199 ens_Fs.append(curr_F) 200 if isinstance(params, KeyedList): 201 ens.append(KeyedList(zip(param_keys, curr_params))) 202 else: 203 ens.append(curr_params) 204 ratio = accepted_moves/steps_attempted 205 206 # Save to a file 207 if save_to is not None\ 208 and time.time() >= last_save_time + save_hours * 3600: 209 _save_ens(ens, ens_Fs, ratio, save_to, attempt_exceptions, 210 steps_attempted) 211 last_save_time = time.time() 212 213 if save_to is not None: 214 _save_ens(ens, ens_Fs, ratio, save_to, attempt_exceptions, 215 steps_attempted) 216 217 return ens, ens_Fs, ratio

218

219 -def _save_ens(ens, ens_Fs, ratio, save_to, attempt_exceptions, steps_attempted):

220 temp_name = save_to + '_temporary_SloppyCellFile' 221 Utility.save((ens, ens_Fs, ratio), temp_name) 222 shutil.move(temp_name, save_to) 223 logger.debug('Ensemble of length %i saved to %s.' % (len(ens), save_to)) 224 logger.debug('Acceptance ratio so far is %f.' % ratio) 225 logger.debug('Attempted moves threw an exception %i times out of %i ' 226 'attempts.' % (attempt_exceptions, steps_attempted))

227

228 -def _accept_move(delta_F, temperature):

229 """ 230 Basic Metropolis accept/reject step. 231 """ 232 p = scipy.rand() 233 return (p < scipy.exp(-delta_F/temperature))

234

235 -def _accept_move_recalc_alg(curr_F, curr_samp_mat, next_F, next_samp_mat, 236 step, T):

237 """ 238 Accept/reject when each the sampling matrix is recalculated each step. 239 """ 240 pi_x = scipy.exp(-curr_F/T) 241 # This is the current location's covariance sampling matrix 242 sigma_curr = scipy.dot(curr_samp_mat, scipy.transpose(curr_samp_mat)) 243 sigma_curr_inv = scipy.linalg.inv(sigma_curr) 244 # This is the transition probability from the current point to the next. 245 q_x_to_y = scipy.exp(-_quadratic_cost(step, sigma_curr_inv))\ 246 / scipy.sqrt(scipy.linalg.det(sigma_curr)) 247 248 pi_y = scipy.exp(-next_F/T) 249 sigma_next = scipy.dot(next_samp_mat, scipy.transpose(next_samp_mat)) 250 sigma_next_inv = scipy.linalg.inv(sigma_next) 251 q_y_to_x = scipy.exp(-_quadratic_cost(-step, sigma_next_inv))\ 252 / scipy.sqrt(scipy.linalg.det(sigma_next)) 253 254 p = scipy.rand() 255 accepted = (pi_y*q_y_to_x)/(pi_x*q_x_to_y) 256 did_accepted = p<accepted 257 258 return p < accepted

259 260

261 -def _sampling_matrix(hessian, cutoff=0, temperature=1, step_scale=1):

262 # basically need SVD of hessian - singular values and eigenvectors 263 # hessian = u * diag(singVals) * vh 264 u, sing_vals, vh = scipy.linalg.svd(0.5 * hessian) 265 266 # scroll through the singular values and find the ones whose inverses will 267 # be huge and set them to zero also, load up the array of singular values 268 # that we store 269 # cutoff = (1.0/_.singVals[0])*1.0e03 270 # double cutoff = _.singVals[0]*1.0e-02 271 cutoff_sing_val = cutoff * max(sing_vals) 272 273 D = 1.0/scipy.maximum(sing_vals, cutoff_sing_val) 274 275 ## now fill in the sampling matrix ("square root" of the Hessian) 276 ## note that sqrt(D[i]) is taken here whereas Kevin took sqrt(D[j]) 277 ## this is because vh is the transpose of his PT -JJW 278 samp_mat = scipy.transpose(vh) * scipy.sqrt(D) 279 280 # Divide the sampling matrix by an additional factor such 281 # that the expected quadratic increase in cost will be about 1. 282 cutoff_vals = scipy.compress(sing_vals < cutoff_sing_val, sing_vals) 283 if len(cutoff_vals): 284 scale = scipy.sqrt(len(sing_vals) - len(cutoff_vals) 285 + sum(cutoff_vals)/cutoff_sing_val) 286 else: 287 scale = scipy.sqrt(len(sing_vals)) 288 289 samp_mat /= scale 290 samp_mat *= step_scale 291 samp_mat *= scipy.sqrt(temperature) 292 293 return samp_mat

294

295 -def _trial_move(sampling_mat):

296 randVec = scipy.randn(len(sampling_mat)) 297 trialMove = scipy.dot(sampling_mat, randVec) 298 299 return trialMove

300

301 -def _quadratic_cost(trialMove, hessian):

302 """ 303 The cost from the quadratic approximation of a trialMove, given the hessian. 304 305 (Note: the hessian here is assumed to be the second derivative matrix of the 306 cost, without an additional factor of 1/2.) 307 """ 308 quadratic = 0.5*scipy.dot(scipy.transpose(trialMove), 309 scipy.dot(hessian, trialMove)) 310 return quadratic

311

312 -def traj_ensemble_stats(traj_set):

313 """ 314 Return the mean and standard deviation trajectory objects for the given 315 input list of trajectories. (All must be evaluated at the same points.) 316 """ 317 all_values = [traj.values for traj in traj_set] 318 319 mean_values = scipy.mean(all_values, 0) 320 std_values = scipy.std(all_values, 0) 321 322 mean_traj = copy.deepcopy(traj_set[0]) 323 mean_traj.values = mean_values 324 325 std_traj = copy.deepcopy(traj_set[0]) 326 std_traj.values = std_values 327 328 return mean_traj, std_traj

329

330 -def few_ensemble_trajs(net, times, elements):

331 import SloppyCell.ReactionNetworks.Dynamics as Dynamics 332 traj_set = [] 333 for params in elements: 334 try: 335 traj = Dynamics.integrate(net, times, params=params, 336 fill_traj=False) 337 if not scipy.any(scipy.isnan(traj.values)): 338 traj_set.append(traj) 339 except Utility.SloppyCellException: 340 logger.warn('Exception in network integration on node %i.' 341 % my_rank) 342 343 return traj_set

344

345 -def ensemble_trajs(net, times, ensemble):

346 """ 347 Return a list of trajectories evaluated at times for all parameter sets 348 in ensemble. 349 """ 350 traj_set = [] 351 elems_assigned = [ensemble[node::num_procs] for node in range(num_procs)] 352 for worker in range(1, num_procs): 353 command = 'Ensembles.few_ensemble_trajs(net, times, elements)' 354 args = {'net': net, 'times': times, 'elements': elems_assigned[worker]} 355 pypar.send((command, args), worker) 356 357 traj_set = few_ensemble_trajs(net, times, elems_assigned[0]) 358 359 for worker in range(1, num_procs): 360 traj_set.extend(pypar.receive(worker)) 361 362 return traj_set

363

364 -def net_ensemble_trajs(net, times, ensemble):

365 traj_set = ensemble_trajs(net, times, ensemble) 366 best_traj = traj_set[0] 367 mean_traj, std_traj = traj_ensemble_stats(traj_set) 368 return best_traj, mean_traj, std_traj

369

370 -def traj_ensemble_quantiles(traj_set, quantiles=(0.025, 0.5, 0.975)):

371 """ 372 Return a list of trajectories, each one corresponding the a given passed-in 373 quantile. 374 """ 375 all_values = scipy.array([traj.values for traj in traj_set]) 376 sorted_values = scipy.sort(all_values, 0) 377 378 q_trajs = [] 379 for q in quantiles: 380 # Calculate the index corresponding to this quantile. The q is because 381 # Python arrays are 0 indexed 382 index = q * (len(sorted_values) - 1) 383 below = int(scipy.floor(index)) 384 above = int(scipy.ceil(index)) 385 if above == below: 386 q_values = sorted_values[below] 387 else: 388 # Linearly interpolate... 389 q_below = (1.0*below)/(len(sorted_values)-1) 390 q_above = (1.0*above)/(len(sorted_values)-1) 391 q_values = sorted_values[below] + (q - q_below)*(sorted_values[above] - sorted_values[below])/(q_above - q_below) 392 q_traj = copy.deepcopy(traj_set[0]) 393 q_traj.values = q_values 394 q_trajs.append(q_traj) 395 396 return q_trajs

397

398 -def PCA_eig_log_params(ens):

399 """ 400 Return the Principle Component Analysis eigenvalues and eigenvectors (in 401 log parameters) of an ensemble. (This function takes the logs for you.) 402 """ 403 return PCA_eig(scipy.log(scipy.asarray(ens)))

404

405 -def PCA_eig(ens):

406 """ 407 Return the Principle Component Analysis eigenvalues and eigenvectors 408 of an ensemble. 409 """ 410 X = scipy.asarray(ens) 411 mean_vals = scipy.mean(X, 0) 412 X -= mean_vals 413 u, s, vh = scipy.linalg.svd(scipy.transpose(X)) 414 # This return adjusts things so that can be easily compared with the JtJ 415 # eigensystem. 416 X += mean_vals 417 return len(X)/s[::-1]**2, u[:,::-1]

418

Source Code for Module SloppyCell.Ensembles