SloppyCell.ReactionNetworks.OptDesign

13 """ Set up the quantities necessary to run the optimal design 14 algorithms. NOTE: This function needs to be called first 15 before any of the optimal design functions can be called. 16 17 paramfile: the name of a pickled file containing the 18 best fit parameters in KeyedList format 19 calcobject: the calculation object for which we are doing the 20 optimal design. (Note that in general, may be searching a 21 design over many different calculations, but here we only 22 consider one. Thus, we set design_sentraj equal to senstraj) 23 senstrajfile: the name of the file containing the pickled 24 sensitivity trajectory for the calculation, calcobject, 25 for the set of parameters in paramfile. 26 jtjfile: the name of the file containing the pickled Fisher 27 Information Matrix (J^t J) for the current set of data and 28 for the parameters in paramfile. 29 NOTE: The derivatives computed for J^tJ need to be with respect 30 to the *log* of the parameters 31 """ 32 import OptDesign as v 33 v.curp = load(paramfile) 34 v.jtj = load(jtjfile) 35 v.clc = calcobject 36 v.senstraj = load(senstrajfile) 37 v.design_senstraj = v.senstraj 38 v.p_names_ordered = v.curp.keys() 39 v.jtjdict = {} 40 for pindex1,pname1 in enumerate(v.p_names_ordered) : 41 for pindex2,pname2 in enumerate(v.p_names_ordered) : 42 v.jtjdict[(pname1,pname2)] = v.jtj[pindex1][pindex2] 43 44 v.ovvarnames = v.clc.optimizableVars.keys() 45 v.jtjtrunc = scipy.zeros((len(v.ovvarnames),len(v.ovvarnames)),scipy.float_) 46 47 # The number of optimizable variables for the calculation we are 48 # considering might be less than the number of parameters for the 49 # whole model. We are only working with this calculation so we 50 # need to trim down the J^t J (Fisher information) matrix 51 # accordingly 52 for pindex1,pname1 in enumerate(v.ovvarnames) : 53 for pindex2,pname2 in enumerate(v.ovvarnames) : 54 v.jtjtrunc[pindex1][pindex2] = v.jtjdict[(pname1,pname2)]

58 """ Make the sensitivity trajectory for the calculation 59 calcoject (same as in setup(...) above). 60 params: parameters as a KeyedList, sensitivity traj is 61 calculated at these parameters (should be same as in paramfile 62 in setup(...) above) 63 times: the timepoints in the sensitivity trajectory (1-d array) 64 senstrajfilename: the file to save the sensitivity trajectory to 65 66 Note that if times is very finely spaced, the 67 sensitivity trajectory will need a lot of storage space """ 68 senstraj = Dynamics.integrate_sensitivity(calcobject, times, params, 1.0e-6) 69 save(senstraj,senstrajfilename)

72 """ 73 chemnames = list of unmeasurable chemicals 74 designchemnames = list of measurable chemicals 75 logprior = prior on params, e.g. logprior = log(1000.0) means 76 parameter standard deviation will be less than a factor of 1000.0 77 78 Out of the list chemnames, find the best chemical and 79 best time point, that most reduces the integrated variance 80 over designchemnames """ 81 times = design_senstraj.timepoints 82 trunc_times = [times[i] for i in scipy.arange(0,len(times),1)] 83 best_change = 0.0 # the change should always be negative 84 best_chem = "None" 85 best_time = "None" 86 for dchemname in designchemnames : 87 print "On design chemical ", dchemname 88 for t in trunc_times : 89 sensvect_design = get_sens_vect(dchemname,t) 90 # NOTE: assuming a 10% error on the measurement --- use 10% of the 91 # maximum value in the trajectory 92 maxval = max(design_senstraj.get_var_traj(dchemname)) + 1.0 93 sensvect_design = sensvect_design/(.1*maxval) 94 intvar_change = integrated_var_change(chemnames,sensvect_design,logprior) 95 tot_change = 0.0 96 for id in chemnames : 97 tot_change = tot_change + intvar_change[id] 98 if tot_change < best_change : 99 best_change = tot_change 100 best_chem = dchemname 101 best_time = t 102 return best_change, best_chem, best_time

105 """ 106 sensvect : a sensitivity vector (length = # of params) of 107 unmeasurable quantity of interest 108 designchemnames : list of measurable chemicals 109 110 sensvect could be the sensitivity of a single chemical at a 111 single timepoint; then can use method get_sens_vect (see elsewhere 112 in this file) to compute this sensitivity vector. In that 113 case we are designing over the species variance at that single point 114 """ 115 times = senstraj.timepoints 116 trunc_times = [times[i] for i in scipy.arange(0,len(times),5)] 117 best_change = 0.0 # the change should always be negative 118 best_chem = "None" 119 best_time = "None" 120 for dchemname in designchemnames : 121 for t in trunc_times : 122 sensvect_design = get_sens_vect(dchemname,t) 123 var_change = single_variance_change(sensvect,sensvect_design,logprior) 124 if var_change < best_change : 125 best_change = var_change 126 best_chem = dchemname 127 best_time = t 128 return best_change, best_chem, best_time

131 """ chemnames : list of chemical names for which the 132 variance at all timepoints will be computed 133 logprior : prior on parameters. logprior = log(1000.0) 134 means params allowed to vary by about a factor of 1000.0 135 return values : 136 times: times of the trajectory 137 bestfit: a dictionary of best fit trajectories (keys are entries in chemnames) 138 var: a dictionary of variances (keys are entries in chemnames) 139 """ 140 #senstraj = load('EndogenousEGFR3T3sensNoPriors') 141 times = senstraj.timepoints 142 jtjinv = scipy.linalg.inv(jtjtrunc+1.0/logprior**2*scipy.eye( 143 len(jtjtrunc),len(jtjtrunc))) 144 var = {} 145 bestfit = {} 146 optvarkeys = clc.optimizableVars.keys() 147 first = optvarkeys[0] 148 last = optvarkeys[-1] 149 for name in chemnames : 150 var[name] = [] 151 bestfit[name] = [] 152 chemindex = senstraj.key_column.get(name) 153 index1sens = senstraj.key_column.get((name,first)) 154 index2sens = senstraj.key_column.get((name,last)) 155 sensarray_this_chem = copy.copy(senstraj.values[:,index1sens:(index2sens+1)]) 156 # Turn sensitivities into sensitivities with respect to log parameters 157 for j, pname in enumerate(ovvarnames) : 158 sensarray_this_chem[:,j] = sensarray_this_chem[:,j]*curp.get(pname) 159 160 tmp = scipy.dot(sensarray_this_chem,jtjinv) 161 for i in range(len(tmp[:,0])) : 162 var[name].append(scipy.dot(tmp[i,:],sensarray_this_chem[i,:])) 163 164 bestfit[name] = senstraj.values[:,chemindex] 165 var[name] = scipy.asarray(var[name]) 166 return times, bestfit, var

169 """ Same as above except the variances are now on the 170 logs of the chemicals trajectories. 171 """ 172 #senstraj = load('EndogenousEGFR3T3sensNoPriors') 173 times = senstraj.timepoints 174 jtjinv = scipy.linalg.inv(jtjtrunc+1.0/logprior**2*scipy.eye( 175 len(jtjtrunc),len(jtjtrunc))) 176 var = {} 177 bestfit = {} 178 optvarkeys = clc.optimizableVars.keys() 179 first = optvarkeys[0] 180 last = optvarkeys[-1] 181 for name in chemnames : 182 var[name] = [] 183 bestfit[name] = [] 184 chemindex = senstraj.key_column.get(name) 185 index1sens = senstraj.key_column.get((name,first)) 186 index2sens = senstraj.key_column.get((name,last)) 187 sensarray_this_chem = copy.copy(senstraj.values[:,index1sens:(index2sens+1)]) 188 traj_this_chem = copy.copy(senstraj.values[:,chemindex]) 189 for j, pname in enumerate(ovvarnames) : 190 sensarray_this_chem[:,j] = sensarray_this_chem[:,j]*curp.get(pname) 191 # need to scale each row by 1/chemvalue to mimic a derivative w.r.t. 192 # log chemicals. Add a small value to chemvalue to avoid divide by zero 193 for i in range(len(times)) : 194 sensarray_this_chem[i,:] = sensarray_this_chem[i,:]/(traj_this_chem[i]+1.0e-6) 195 196 tmp = scipy.dot(sensarray_this_chem,jtjinv) 197 for i in range(len(tmp[:,0])) : 198 var[name].append(scipy.dot(tmp[i,:],sensarray_this_chem[i,:])) 199 200 bestfit[name] = senstraj.values[:,chemindex] 201 var[name] = scipy.asarray(var[name]) 202 return times,bestfit,var

205 """ Get the variance for a single function of parameters 206 that has a sensitivity vector sensvect. Useful for looking at 207 variances in parameter combinations, or simple functions of 208 parameters. Note that if we are concerned with ratios and 209 products of parameters, it's often best to consider sensvect 210 as a sensitivity w.r.t. log parameters """ 211 jtjinv = scipy.linalg.inv(jtjtrunc+1.0/logprior**2*scipy.eye( 212 len(jtjtrunc),len(jtjtrunc))) 213 tmp = scipy.dot(jtjinv,sensvect) 214 var = scipy.dot(sensvect,tmp) 215 return var

218 """ 219 chemnames : list of chemical names at which we will look 220 at variance 221 sensvect_design : the sensitivity vector (one by no. params array) at 222 the new design point. 223 returns : (times, varchange) 224 the times and the change in variances at those times (should 225 be negative) for each of the chemicals in chemnames, after the 226 addition of the new timepoint. varchange is a dictionary 227 indexed by entries in chemnames. 228 """ 229 times = senstraj.timepoints 230 n = len(jtjtrunc) 231 jtjinv = scipy.linalg.inv(jtjtrunc+1.0/logprior**2*scipy.eye(n,n)) 232 233 #sensvect_design = scipy.resize(sensvect_design,(n,1)) 234 jtjinv_design = scipy.dot(jtjinv,sensvect_design) 235 #jtjinv_design = scipy.resize(jtjinv_design,(n,1)) # want a column vector 236 237 denominator = 1.0 + scipy.dot(sensvect_design,jtjinv_design) 238 239 varchange = {} 240 optvarkeys = clc.optimizableVars.keys() 241 first = optvarkeys[0] 242 last = optvarkeys[-1] 243 for name in chemnames : 244 varchange[name] = [] 245 chemindex = senstraj.key_column.get(name) 246 index1sens = senstraj.key_column.get((name,first)) 247 index2sens = senstraj.key_column.get((name,last)) 248 sensarray_this_chem = copy.copy(senstraj.values[:,index1sens:(index2sens+1)]) 249 for j, pname in enumerate(ovvarnames) : 250 sensarray_this_chem[:,j] = sensarray_this_chem[:,j]*curp.get(pname) 251 252 product = scipy.dot(sensarray_this_chem,jtjinv_design) 253 # this product is a number of timepoints by one vector, we need to 254 # square each element for the final formula 255 varchange[name] = -scipy.asarray(product**2/denominator) 256 257 return times, varchange

260 """ 261 sensvect : given a single function f(p) of parameters, this is the 262 derivative w.r.t. each of the parameters (in log parameters). For 263 ratios or products of rate constants, f(p) is a linear function 264 sensvect_design : the sensitivity vector of the new point in the 265 design you wish to add 266 returns: the variance change of the quantity f(p), given the 267 addition of the new data point, with sensitivity vector sensvect_design. 268 """ 269 n = len(jtjtrunc) 270 jtjinv = scipy.linalg.inv(jtjtrunc+1.0/logprior**2*scipy.eye(n,n)) 271 272 jtjinv_design = scipy.dot(jtjinv,sensvect_design) 273 denominator = 1.0 + scipy.dot(sensvect_design,jtjinv_design) 274 product = scipy.dot(sensvect,jtjinv_design) 275 return -product**2/denominator

278 """ get a sensitivity vector for a chemical "chemname" at a 279 time, time """ 280 tindex = design_senstraj._get_time_index(time,1.0e-4) 281 optvarkeys = clc.optimizableVars.keys() 282 first = optvarkeys[0] 283 last = optvarkeys[-1] 284 index1sens = design_senstraj.key_column.get((chemname,first)) 285 index2sens = design_senstraj.key_column.get((chemname,last)) 286 sens_vect = copy.copy( 287 design_senstraj.values[tindex,index1sens:(index2sens+1)]) 288 for j, pname in enumerate(ovvarnames) : 289 sens_vect[j] = sens_vect[j]*curp.get(pname) 290 return sens_vect

316 """ This is similar to var_change except now we pass in a sensarray 317 instead of sensvect --- this is a matrix of sensvects aligned rowwise. 318 Row i will be multiplied by sqrt(weights[i]) where sum(weights)=1 and 319 each weight is a number between zero and one. We will return the 320 change in variance for all the chemicals in chemnames """ 321 # we use the formula (Sherman-Woodbury-Morrison) 322 # (A+UV^t)^(-1) = A^(-1) - A^(-1)*U*(I + V^T*A^(-1)*U)^(-1)*V^t*A^(-1) 323 # where U = V and V^t = W^(1/2)*sensarray_design 324 times = senstraj.timepoints 325 ntimes = len(times) 326 k,n = sensarray_design.shape 327 jtjinv = scipy.linalg.inv(jtjtrunc+1.0/logprior**2*scipy.eye(n,n)) 328 Vt = scipy.zeros((k,n),scipy.float_) 329 for i in range(k) : 330 Vt[i,:] = scipy.sqrt(weights[i])*sensarray_design[i,:] 331 design_jtjinv = scipy.dot(Vt,jtjinv) 332 #jtjinv_design = scipy.resize(jtjinv_design,(n,1)) # want a column vector 333 334 denominator = scipy.eye(k,k) + \ 335 scipy.dot(design_jtjinv,scipy.transpose(Vt)) 336 inv_denom = scipy.linalg.inv(denominator) 337 338 varchange = {} 339 optvarkeys = clc.optimizableVars.keys() 340 first = optvarkeys[0] 341 last = optvarkeys[-1] 342 for name in chemnames : 343 varchange[name] = [] 344 chemindex = senstraj.key_column.get(name) 345 index1sens = senstraj.key_column.get((name,first)) 346 index2sens = senstraj.key_column.get((name,last)) 347 sensarray_this_chem = copy.copy(senstraj.values[:,index1sens:(index2sens+1)]) 348 for j, pname in enumerate(ovvarnames) : 349 sensarray_this_chem[:,j] = sensarray_this_chem[:,j]*curp.get(pname) 350 351 product = scipy.dot(design_jtjinv, 352 scipy.transpose(sensarray_this_chem)) 353 # each column vector of this matrix has to be dotted through the 354 # denominator matrix --- each column is a different time point 355 for j in range(ntimes) : 356 quadprod = scipy.dot(product[:,j],inv_denom) 357 quadprod = scipy.dot(quadprod,product[:,j]) 358 varchange[name].append(-quadprod) 359 varchange[name] = scipy.asarray(varchange[name]) 360 361 return times, varchange

372 """ For this cost function we're going to assume unconstrained 373 variables are being passed in, so we need to convert them to 374 a range between 0 and 1. The sum of the weights should also = 1 """ 375 weights0to1 = weights_trans(weights) 376 # now weights lie between 0 and 1 377 weights0to1 = weights0to1/scipy.sum(weights0to1) # this makes sure 378 # weights sum up to 1. 379 intvarchange = integrated_var_change_weighted(weights0to1,chemnames, 380 sensarray_design,logprior) 381 cost = 0.0 382 for n in intvarchange.keys() : 383 cost = cost + intvarchange[n] 384 return cost

395 """ 396 weights : a vector of positive numbers with length the same as the number of 397 rows of sensarray_design. The weights should sum to 1 398 chemnames: a list of unmeasurable chemical names over which we wish 399 to design experiments 400 sensarray_design: an array of sensitivities of measurable chemicals 401 or just an array of sensitivity vectors, each row a different 402 sensitivity vector 403 logprior : prior on parameters. logprior = log(1000.0) allows parameters 404 to fluctuate by a factor of 1000 """ 405 weights_trans = scipy.arcsin(2.0*weights-1.0) 406 # maxiter may need to be increased if convergence is not apparent 407 # or if the number of weights is increased 408 w = scipy.optimize.fmin(weight_cost,weights_trans,maxiter = 10000, 409 args=(chemnames,sensarray_design,logprior)) 410 woptnotnormed = (scipy.sin(w)+1.0)/2.0 411 wopt = woptnotnormed/scipy.sum(woptnotnormed) 412 return woptnotnormed,wopt

415 """ 416 chemnames: list of chemical names 417 logprior: prior on params. logprior = log(1000.0) means parameters 418 allowed to fluctuate by a factor of 1000 """ 419 times, bestfit, var = variances(chemnames,logprior) 420 for key in bestfit.keys() : 421 Plotting.figure() 422 Plotting.plot(times,bestfit[key]/scale) 423 Plotting.hold(True) 424 Plotting.plot(times,bestfit[key]/scale + scipy.sqrt(var[key])/scale,'r--') 425 Plotting.plot(times,bestfit[key]/scale - scipy.sqrt(var[key])/scale,'r--') 426 Plotting.title(key,fontsize=16) 427 Plotting.xlabel('time (minutes)',fontsize=16) 428 Plotting.ylabel('number of molecules',fontsize=16) 429 xtics = Plotting.gca().get_xticklabels() 430 ytics = Plotting.gca().get_yticklabels() 431 Plotting.setp(xtics,size=16) 432 Plotting.setp(ytics,size=16) 433 #Plotting.axis([0.0,40.0,-.01,1.2e4]) 434 Plotting.show() 435 if return_var : 436 return times, bestfit, var

439 """ 440 chemnames: list of chemical names 441 logprior: prior on params 442 Plots the standard deviation of the chemicals when the variance 443 is computed using logs of the chemical trajectories. This 444 makes sure the final plots do not have best_fit+-stddev that 445 do not become negative """ 446 times, bestfit, var = variances_log_chems(chemnames,logprior) 447 for key in bestfit.keys() : 448 Plotting.figure() 449 Plotting.plot(times,bestfit[key]) 450 Plotting.hold(True) 451 Plotting.plot(times,bestfit[key]*scipy.exp(scipy.sqrt(var[key])),'r-') 452 Plotting.plot(times,bestfit[key]*scipy.exp(-scipy.sqrt(var[key])),'r-') 453 Plotting.title(key,fontsize=14) 454 Plotting.xlabel('time') 455 Plotting.ylabel('arb. units') 456 #Plotting.axis([0.0,40.0,-.01,1.2e4]) 457 Plotting.show()

461 """ 462 chemnames: list of chemical names 463 sensvect_design: a sensivity vector of a quantity that is 464 measurable 465 This will plot the old and new variances of the chemicals in 466 chemnames, given a new measurement that has sensitivity vector 467 sensvect_design 468 """ 469 times,bestfit,var = variances(chemnames,logprior) 470 times,varchange = variance_change(chemnames,sensvect_design,logprior) 471 472 for key in bestfit.keys() : 473 Plotting.figure() 474 Plotting.plot(times,bestfit[key]) 475 Plotting.hold(True) 476 Plotting.plot(times,bestfit[key] + scipy.sqrt(var[key]),'r-') 477 Plotting.plot(times,bestfit[key] - scipy.sqrt(var[key]),'r-') 478 479 Plotting.plot(times,bestfit[key] + scipy.sqrt(var[key]+varchange[key]),'k--') 480 Plotting.plot(times,bestfit[key] - scipy.sqrt(var[key]+varchange[key]),'k--') 481 482 Plotting.title(key,fontsize=14) 483 Plotting.xlabel('time') 484 Plotting.ylabel('arb. units') 485 Plotting.axis([0.0,40.0,-.01,1.2e4]) 486 Plotting.show() 487 if return_data : 488 newvar = {} 489 for ky in var.keys() : 490 newvar[ky] = var[key] + varchange[key] 491 return times,bestfit,newvar

494 """ 495 weights : a proposed set of weights for each of the row vectors in 496 sensarray_design 497 chemnames : a list of chemicals for which we will plot the variance 498 logprior : as before 499 This will plot the old and new variances on chemnames, similar to 500 above. 501 NOTE: the weights that are passed in do not necessarily have to sum to 502 one. e.g. if the weights are normalized such that max(weights) = 1, then 503 by scaling all the weights by 1/sigma, you are then assuming that 504 the most accurate measurement has an error of size sigma. sigma for 505 example could be 20% of the maximum value of a trajectory. 506 """ 507 508 times,bestfit,var = variances(chemnames,logprior) 509 times,varchange = var_change_weighted(weights,chemnames,sensarray_design,logprior) 510 511 for key in bestfit.keys() : 512 Plotting.figure() 513 Plotting.plot(times,scale*bestfit[key]) 514 Plotting.hold(True) 515 Plotting.plot(times,scale*bestfit[key] + scale*scipy.sqrt(var[key]),'r-') 516 Plotting.plot(times,scale*bestfit[key] - scale*scipy.sqrt(var[key]),'r-') 517 518 Plotting.plot(times,scale*bestfit[key] + scale*scipy.sqrt(var[key]+varchange[key]),'k--') 519 Plotting.plot(times,scale*bestfit[key] - scale*scipy.sqrt(var[key]+varchange[key]),'k--') 520 521 Plotting.title(key,fontsize=14) 522 Plotting.xlabel('time') 523 Plotting.ylabel('arb. units') 524 Plotting.axis([0.0,40.0,-.01,1.2e4]) 525 Plotting.show() 526 527 if return_data : 528 newvar = {} 529 for ky in var.keys() : 530 newvar[ky] = var[key] + varchange[key] 531 return times,bestfit,newvar

534 times, bestfit, var = variances(chemnames,logprior) 535 nallplots = len(chemnames) 536 # 9 at a time 537 nfigs = nallplots/9 # integer division -- no fractional part 538 for figno in range(1,nfigs+1) : 539 Plotting.figure() 540 for i in range(0,9) : 541 Plotting.subplot(3,3,i+1) 542 chemind = i+(figno-1)*9 543 Plotting.plot(times,bestfit[chemnames[chemind]]) 544 Plotting.hold(True) 545 Plotting.plot(times,bestfit[chemnames[chemind]] 546 + scipy.sqrt(var[chemnames[chemind]]),'r-') 547 Plotting.plot(times,bestfit[chemnames[chemind]] 548 - scipy.sqrt(var[chemnames[chemind]]),'r-') 549 550 yt = Plotting.yticks() 551 Plotting.axis([0,100.0,yt[0],yt[-1]]) 552 Plotting.title(chemnames[chemind]) 553 Plotting.xlabel('time') 554 Plotting.ylabel('arb. units') 555 xt = Plotting.xticks() 556 Plotting.xticks([xt[0],xt[-1]]) 557 558 Plotting.savefig('./figs/variance_wt_'+i.__str__()+'.ps') 559 Plotting.show()

574 """ reduce_size takes an array of dimension m,n and 575 returns an array with every skipsize row sampled. 576 """ 577 size = array.shape 578 newsize = len(scipy.arange(0,size[0],skipsize)) 579 if len(size) == 1 : # a vector 580 newvect = scipy.zeros((newsize,),scipy.float_) 581 for iind,i in enumerate(scipy.arange(0,size[0],skipsize)) : 582 newvect[iind] = array[i] 583 return newvect 584 elif len(size) == 2 : # an array 585 newarray = scipy.zeros((newsize,size[1]),scipy.float_) 586 for iind,i in enumerate(scipy.arange(0,size[0],skipsize)) : 587 newarray[iind] = array[i] 588 return newarray

Source Code for Module SloppyCell.ReactionNetworks.OptDesign