Package SloppyCell :: Module lmopt
[hide private]

Source Code for Module SloppyCell.lmopt

  1  from __future__ import nested_scopes 
  2  # Levenberg Marquardt minimization routines 
  3  """ 
  4  fmin_lm  : standard Levenberg Marquardt 
  5  fmin_lmNoJ : Levenberg Marquardt using a cost function instead of  
  6               a residual function and a gradient/J^tJ pair instead 
  7               of the derivative of the residual function. Useful 
  8               in problems where the number of residuals is very large. 
  9  fmin_lm_scale : scale invariant Levenberg Marquardt 
 10   
 11  """ 
 12  import scipy 
 13  from scipy import absolute, sqrt, asarray, zeros, mat, transpose, ones, dot, sum 
 14  import scipy.linalg 
 15  import copy 
 16  import SloppyCell.Utility 
 17  save = SloppyCell.Utility.save # module that provides pickled save 
 18   
 19  import SloppyCell.KeyedList_mod as KeyedList_mod 
 20  KeyedList = KeyedList_mod.KeyedList 
 21   
 22  abs = absolute 
 23  _epsilon = sqrt(scipy.finfo(scipy.float_).eps) 
 24   
 25           
26 -def approx_fprime(xk,f,epsilon,*args):
27 f0 = apply(f,(xk,)+args) 28 grad = scipy.zeros((len(xk),),scipy.float_) 29 ei = scipy.zeros((len(xk),),scipy.float_) 30 for k in range(len(xk)): 31 ei[k] = epsilon 32 grad[k] = (apply(f,(xk+ei,)+args) - f0)/epsilon 33 ei[k] = 0.0 34 return grad
35
36 -def approx_fprime1(xk,f,epsilon,*args):
37 """ centred difference formula to approximate fprime """ 38 #f0 = apply(f,(xk,)+args) 39 grad = scipy.zeros((len(xk),),scipy.float_) 40 ei = scipy.zeros((len(xk),),scipy.float_) 41 epsilon = (epsilon**2.0)**(1.0/3.0) # should be macheps^(1/3) 42 for k in range(len(xk)): 43 ei[k] = epsilon 44 grad[k] = (apply(f,(xk+ei,)+args) - apply(f,(xk-ei,)+args))/(2.0*epsilon) 45 ei[k] = 0.0 46 return grad
47
48 -def approx_fprime2(xk,f,epsilon,*args):
49 """ centred difference formula to approximate the jacobian, given the residual 50 function """ 51 #f0 = apply(f,(xk,)+args) 52 grad = scipy.zeros((len(xk),),scipy.float_) 53 ei = scipy.zeros((len(xk),),scipy.float_) 54 epsilon = (epsilon**2.0)**(1.0/3.0) # should be macheps^(1/3) 55 ei[0] = epsilon 56 resminus = asarray(apply(f,(xk-ei,)+args)) 57 resplus = asarray(apply(f,(xk+ei,)+args)) 58 m = len(resminus) 59 jac = scipy.zeros((m,len(xk)),scipy.float_) 60 jac[:,0] = (resplus-resminus)/(2.0*epsilon) 61 ei[0] = 0.0 62 for k in range(1,len(xk)): 63 ei[k] = epsilon 64 resplus = asarray(apply(f,(xk+ei,)+args)) 65 resminus = asarray(apply(f,(xk-ei,)+args)) 66 jac[:,k] = (resplus-resminus)/(2.0*epsilon) 67 #jac[k,:] = mat(transpose(mat(apply(f,(xk+ei,)+args) - apply(f,(xk-ei,)+args))))/(2.0*epsilon) 68 ei[k] = 0.0 69 return jac
70
71 -def check_grad(func, grad, x0, *args):
72 approx_grad = approx_fprime(x0,func,_epsilon,*args) 73 print "Finite difference gradient ", approx_grad 74 analytic_grad = grad(x0,*args) 75 print "Analytic gradient ", analytic_grad 76 differencenorm = sqrt(sum(approx_grad-analytic_grad)**2) 77 print "Norm of difference is ", differencenorm 78 return differencenorm
79
80 -def approx_fhess_p(x0,p,fprime,epsilon,*args):
81 f2 = apply(fprime,(x0+epsilon*p,)+args) 82 f1 = apply(fprime,(x0,)+args) 83 return (f2 - f1)/epsilon
84
85 -def fmin_lm(f, x0, fprime=None, args=(), avegtol=1e-5, epsilon=_epsilon, 86 maxiter=None, full_output=0, disp=1, retall=0, lambdainit = None, 87 jinit = None, trustradius = 1.0):
88 """Minimizer for a nonlinear least squares problem. Allowed to 89 have more residuals than parameters or vice versa. 90 f : residual function (function of parameters) 91 fprime : derivative of residual function with respect to parameters. 92 Should return a matrix (J) with dimensions number of residuals 93 by number of parameters. 94 x0 : initial parameter set 95 avegtol : convergence tolerance on the gradient vector 96 epsilon : size of steps to use for finite differencing of f (if fprime 97 not passed in) 98 maxiter : maximum number of iterations 99 full_output : 0 to get only the minimum set of parameters back 100 1 if you also want the best parameter set, the 101 lowest value of f, the number of function calls, 102 the number of gradient calls, the convergence flag, 103 the last Marquardt parameter used (lambda), and the 104 last evaluation of fprime (J matrix) 105 disp : 0 for no display, 1 to give cost at each iteration and convergence 106 conditions at the end 107 retall : 0 for nothing extra to be returned, 1 for all the parameter 108 sets during the optimization to be returned 109 lambdainit : initial value of the Marquardt parameter to use (useful if 110 continuing from an old optimization run 111 jinit : initial evaluation of the residual sensitivity matrix (J). 112 trustradius : set this to the maximum move you want to allow in a single 113 parameter direction. 114 If you are using log parameters, then setting this 115 to 1.0, for example, corresponds to a multiplicative 116 change of exp(1) = 2.718 117 """ 118 119 app_fprime = 0 120 if fprime is None: 121 app_fprime = 1 122 123 xcopy = copy.copy(x0) 124 if isinstance(x0,KeyedList) : 125 x0 = asarray(x0.values()) 126 else : 127 x0 = asarray(x0) 128 129 if lambdainit != None : 130 Lambda = lambdainit 131 else : 132 Lambda = 1.0e-2 133 Mult = 10.0 134 n = len(x0) 135 func_calls = 0 136 grad_calls = 0 137 res = asarray(apply(f,(x0,)+args)) 138 m = res.shape[0] 139 if maxiter is None : 140 maxiter = 200*n 141 niters = 0 142 x = x0 143 144 gtol = n*avegtol 145 146 if retall: 147 allvecs = [x] 148 x1 = x0 149 x2 = x0 150 d = zeros(n,scipy.float_) 151 move = zeros(n,scipy.float_) 152 finish = 0 153 if jinit!=None : 154 j = jinit 155 else : 156 if app_fprime : 157 j = asarray(apply(approx_fprime2,(x,f,epsilon)+args)) 158 func_calls = func_calls + 2*len(x) 159 else : 160 j = asarray(apply(fprime,(x,)+args)) 161 grad_calls+=1 162 163 res = asarray(apply(f,(x,)+args)) 164 func_calls+=1 165 # NOTE: Below is actually *half* the gradient (because 166 # we define the cost as the sum of squares of residuals) 167 # However the equations defining the optimization move, dp, 168 # are 2.0*J^tJ dp = -2.0*J^t r, where r is the residual 169 # vector; therefore, the twos cancel both sides 170 grad = mat(res)*mat(j) 171 172 while (niters<maxiter) and (finish == 0): 173 # note: grad, res and j will be available from the end of the 174 # last iteration. They just need to be computed the zeroth 175 # time aswell (above) 176 177 lmh = mat(transpose(j))*mat(j) 178 # use more accurate way to get e-vals/dirns 179 #[u,s,v] = scipy.linalg.svd(lmh) 180 [u,ssqrt,vt] = scipy.linalg.svd(j) 181 # want n singular values even if m<n and we have 182 # more parameters than data points. 183 if (len(ssqrt) == n) : 184 s = ssqrt**2 185 elif (len(ssqrt)<n) : 186 s = zeros((n,),scipy.float_) 187 s[0:len(ssqrt)] = ssqrt**2 188 #print "s is (in original) ", s 189 #rhsvect = -mat(transpose(u))*mat(transpose(grad)) 190 191 rhsvect = -mat(vt)*mat(transpose(grad)) 192 rhsvect = asarray(rhsvect)[:,0] 193 move = abs(rhsvect)/(s+Lambda*scipy.ones(n)+1.0e-30*scipy.ones(n)) 194 move = list(move) 195 maxindex = move.index(max(move)) 196 move = asarray(move) 197 198 if max(move) > trustradius : 199 Lambda = Mult*(1.0/trustradius*abs(rhsvect[maxindex])-s[maxindex]) 200 #print " Increasing lambda to ", Lambda 201 # now do the matrix inversion 202 203 for i in range(0,n) : 204 if (s[i]+Lambda) < 1.0e-30 : 205 d[i] = 0.0 206 else : 207 d[i] = 1.0/(s[i]+Lambda) 208 move[i] = d[i]*rhsvect[i] 209 move = asarray(move) 210 # move = asarray(mat(transpose(v))*mat(transpose(mat(move))))[:,0] 211 move = asarray(mat(transpose(vt))*mat(transpose(mat(move))))[:,0] 212 # print move 213 x1 = x + move 214 moveold = move[:] 215 216 for i in range(0,n) : 217 if (s[i]+Lambda/Mult) < 1.0e-30 : 218 d[i] = 0.0 219 else : 220 d[i] = 1.0/(s[i]+Lambda/Mult) 221 move[i] = d[i]*rhsvect[i] 222 move = asarray(mat(transpose(vt))*mat(transpose(mat(move))))[:,0] 223 224 x2 = x + asarray(move) 225 currentcost = sum(asarray(apply(f,(x,)+args))**2) 226 func_calls+=1 227 try: 228 res2 = asarray(apply(f,(x2,)+args)) 229 costlambdasmaller = sum(res2**2) 230 except SloppyCell.Utility.SloppyCellException: 231 costlambdasmaller = scipy.inf 232 func_calls+=1 233 try: 234 res1 = asarray(apply(f,(x1,)+args)) 235 costlambda = sum(res1**2) 236 except SloppyCell.Utility.SloppyCellException: 237 costlambda = scipy.inf 238 func_calls+=1 239 if disp : 240 print 'Iteration number', niters 241 print 'Current cost', currentcost 242 print "Move 1 gives cost of" , costlambda 243 print "Move 2 gives cost of ", costlambdasmaller 244 #fp = open('LMoutfile','a') 245 #fp.write('Iteration number ' + niters.__str__() + '\n') 246 #fp.write('Current cost ' + currentcost.__str__() + '\n') 247 #fp.write('Move 1 gives cost of ' + costlambda.__str__() + '\n') 248 #fp.write('Move 2 gives cost of ' + costlambdasmaller.__str__() + '\n') 249 #fp.close() 250 251 oldcost = currentcost 252 oldres = res 253 oldjac = j 254 255 if costlambdasmaller <= currentcost : 256 Lambda = Lambda/Mult 257 x = x2[:] 258 if retall: 259 allvecs.append(x) 260 currentcost = costlambdasmaller 261 if app_fprime : 262 j = asarray(apply(approx_fprime2,(x2,f,epsilon)+args)) 263 func_calls = func_calls + 2*len(x2) 264 else : 265 j = asarray(apply(fprime,(x2,)+args)) 266 grad_calls+=1 267 grad = mat(res2)*mat(j) 268 if sum(abs(2.0*grad), axis=None) < gtol : 269 finish = 2 270 elif costlambda <= currentcost : 271 currentcost = costlambda 272 x = x1[:] 273 move = moveold[:] 274 if retall: 275 allvecs.append(x) 276 if app_fprime : 277 j = asarray(apply(approx_fprime2,(x1,f,epsilon)+args)) 278 func_calls = func_calls + 2*len(x1) 279 else : 280 j = asarray(apply(fprime,(x1,)+args)) 281 grad_calls+=1 282 283 grad = mat(res1)*mat(j) 284 if sum(abs(2.0*grad), axis=None) < gtol : 285 finish = 2 286 else : 287 Lambdamult = Lambda 288 costmult = costlambda 289 piOverFour = .78539816339744825 290 NTrials = 0 291 NTrials2 = 0 292 move = moveold[:] 293 while (costmult > currentcost) and (NTrials < 10) : 294 num = -scipy.dot(grad,move)[0] 295 den = scipy.linalg.norm(grad)*scipy.linalg.norm(move) 296 gamma = scipy.arccos(num/den) 297 NTrials = NTrials+1 298 # was (gamma>piOverFour) below but that doens't 299 # make much sense to me. I don't think you should 300 # cut back on a given step, I think the trust 301 # region strategy is more successful 302 if (gamma > 0) : 303 Lambdamult = Lambdamult*Mult 304 for i in range(0,n) : 305 if s[i]+Lambdamult < 1.0e-30 : 306 d[i] = 0.0 307 else : 308 d[i] = 1.0/(s[i]+Lambdamult) 309 move[i] = d[i]*rhsvect[i] 310 move = asarray(mat(transpose(vt))*mat(transpose(mat(move))))[:,0] 311 x1 = x + move 312 res1 = asarray(apply(f,(x1,)+args)) 313 func_calls+=1 314 costmult = sum(res1**2) 315 316 else : 317 NTrials2 = 0 318 while (costmult > currentcost) and (NTrials2 < 10) : 319 NTrials2 = NTrials2 + 1 320 if disp == 1: 321 print " Decreasing stepsize " 322 move = (.5)**NTrials2*moveold 323 x1 = x + asarray(move) 324 res1 = asarray(apply(f,(x1,)+args)) 325 func_calls+=1 326 costmult = sum(res1**2) 327 328 if (NTrials==10) or (NTrials2==10) : 329 if disp == 1: 330 print " Failed to converge" 331 finish = 1 332 else : 333 x = x1[:] 334 if retall: 335 allvecs.append(x) 336 Lambda = Lambdamult 337 if app_fprime : 338 j = asarray(apply(approx_fprime2,(x,f,epsilon)+args)) 339 func_calls = func_calls + 2*len(x) 340 else : 341 j = asarray(apply(fprime,(x,)+args)) 342 grad_calls+=1 343 344 grad = mat(res1)*mat(j) 345 currentcost = costmult 346 if sum(abs(2.0*grad), axis=None) < gtol : 347 finish = 2 348 niters = niters + 1 349 350 # see if we need to reduce the trust region 351 newmodelval = oldres+asarray(mat(oldjac)*mat(transpose(mat(move))))[:,0] 352 oldmodelval = oldres 353 #print oldcost-sum(newmodelval**2) 354 #print trustradius 355 if ((oldcost-sum(newmodelval**2))>1.0e-16) : 356 ratio = (oldcost-currentcost)/(oldcost-sum(newmodelval**2)) 357 if ratio < .25 : 358 trustradius = trustradius/2.0 359 if ratio >.25 and ratio<=.75 : 360 trustradius = trustradius 361 if ratio > .75 and trustradius<10.0 : 362 trustradius = 2.0*trustradius 363 364 #save(x,'currentParamsLM') 365 366 if disp : 367 if (niters>=maxiter) and (finish != 2) : 368 print " Current function value: %f" % currentcost 369 print " Iterations: %d" % niters 370 print " Function evaluations: %d" % func_calls 371 print " Gradient evaluations: %d" % grad_calls 372 print " Maximum number of iterations exceeded with no convergence " 373 if (finish == 2) : 374 print " Optimization terminated successfully." 375 print " Current function value: %f" % currentcost 376 print " Iterations: %d" % niters 377 print " Function evaluations: %d" % func_calls 378 print " Gradient evaluations: %d" % grad_calls 379 380 if isinstance(xcopy,KeyedList) : 381 xcopy.update(x) 382 else : 383 xcopy = x 384 385 if full_output: 386 retlist = xcopy, currentcost, func_calls, grad_calls, finish, Lambda, j 387 if retall: 388 retlist += (allvecs,) 389 else : 390 retlist = xcopy 391 if retall : 392 retlist = (xcopy,allvecs) 393 394 return retlist
395
396 -def fmin_lmNoJ(fcost, x0, fjtj, args=(), avegtol=1e-5, epsilon=_epsilon, 397 maxiter=None, full_output=0, disp=1, retall=0, trustradius=1.0):
398 """Minimizer for a nonlinear least squares problem. Allowed to 399 have more residuals than parameters or vice versa 400 fcost : the cost function (*not* the residual function) 401 fjtj : this function must return back an ordered pair, the first entry 402 is the gradient of the cost and the second entry is the Levenberg 403 Marquardt (LM) approximation to the cost function. 404 NOTE: If the cost function = 1/2 * sum(residuals**2) then 405 the LM approximation is the matrix matrix product J^t J 406 where J = derivative of residual function with respect to parameters. 407 However if cost = k*sum(residuals**2) for some constant k, then 408 the LM approximation is 2*k*J^t J, so beware of this factor!!! 409 x0 : initial parameter set 410 avegtol : convergence tolerance on the gradient vector 411 epsilon : size of steps to use for finite differencing of f (if fprime 412 not passed in) 413 maxiter : maximum number of iterations 414 full_output : 0 to get only the minimum set of parameters back 415 1 if you also want the best parameter set, the 416 lowest value of f, the number of function calls, 417 the number of gradient calls, the convergence flag, 418 the last Marquardt parameter used (lambda), and the 419 last evaluation of fprime (J matrix) 420 disp : 0 for no display, 1 to give cost at each iteration and convergence 421 conditions at the end 422 retall : 0 for nothing extra to be returned, 1 for all the parameter 423 sets during the optimization to be returned 424 trustradius : set this to the maximum move you want to allow in a single 425 parameter direction. 426 If you are using log parameters, then setting this 427 to 1.0, for example, corresponds to a multiplicative 428 change of exp(1) = 2.718 429 430 431 This version requires fjtj to pass back an ordered pair with 432 a gradient evaluation of the cost and JtJ, but not a function for J. 433 This is important in problems when there is many residuals and J is too 434 cumbersome to compute and pass around, but JtJ is a lot "slimmer". """ 435 436 xcopy = copy.copy(x0) 437 if isinstance(x0,KeyedList) : 438 x0 = asarray(x0.values()) 439 else : 440 x0 = asarray(x0) 441 Lambda = 1.0e-02 442 Mult = 10.0 443 n = len(x0) 444 func_calls = 0 445 grad_calls = 0 446 if maxiter==None : 447 maxiter = 200*n 448 niters = 0 449 x = x0 450 451 gtol = n*avegtol 452 453 if retall: 454 allvecs = [x] 455 x1 = x0 456 x2 = x0 457 d = zeros(n,scipy.float_) 458 move = zeros(n,scipy.float_) 459 finish = 0 460 461 grad, lmh = apply(fjtj,(x,)) 462 grad_calls+=1 463 464 465 while (niters<maxiter) and (finish == 0): 466 # estimate what Lambda should be 467 468 [u,s,v] = scipy.linalg.svd(lmh) 469 #print "s is (in NoJ) ", s 470 #s,u = scipy.linalg.eig(lmh) 471 #s = real(s) 472 #u = real(u) 473 oldlmh = lmh[:,:] 474 oldgrad = grad[:] 475 476 rhsvect = -scipy.dot(transpose(u),grad) 477 # rhsvect = asarray(rhsvect)[:,0] 478 move = abs(rhsvect)/(s+Lambda*ones(n)+1.0e-30*ones(n)) 479 move = list(move) 480 maxindex = move.index(max(move)) 481 move = asarray(move) 482 if max(move) > trustradius : 483 Lambda = Mult*(1.0/trustradius*abs(rhsvect[maxindex])-s[maxindex]) 484 #print " Increasing lambda to ", Lambda 485 486 ## lmhreg = lmh + Lambda*eye(n,n,typecode=scipy.float_) 487 ## [u,s,v] = scipy.linalg.svd(lmhreg) 488 rhsvect = -scipy.dot(transpose(u),grad) 489 # rhsvect = asarray(rhsvect)[:,0] 490 491 for i in range(0,len(s)) : 492 if (s[i]+Lambda) < 1.0e-30 : 493 d[i] = 0.0 494 else : 495 d[i] = 1.0/(s[i]+Lambda) 496 move[i] = d[i]*rhsvect[i] 497 move = asarray(move) 498 move = dot(asarray(u),move) 499 x1 = x + move 500 moveold = move[:] 501 502 503 for i in range(0,len(s)) : 504 if (s[i]+Lambda/Mult) < 1.0e-30 : 505 d[i] = 0.0 506 else : 507 d[i] = 1.0/(s[i]+Lambda/Mult) 508 move[i] = d[i]*rhsvect[i] 509 move = asarray(move) 510 move = dot(asarray(u),move) 511 x2 = x + asarray(move) 512 513 currentcost = apply(fcost,(x,)) 514 oldcost = currentcost 515 func_calls+=1 516 517 try: 518 costlambdasmaller = apply(fcost,(x2,)) 519 except SloppyCell.Utility.SloppyCellException: 520 costlambdasmaller = scipy.inf 521 func_calls+=1 522 523 try: 524 costlambda = apply(fcost,(x1,)) 525 except SloppyCell.Utility.SloppyCellException: 526 costlambda = scipy.inf 527 func_calls+=1 528 if disp : 529 print 'Iteration number', niters 530 print 'Current cost', currentcost 531 print "Move 1 gives cost of" , costlambda 532 print "Move 2 gives cost of ", costlambdasmaller 533 534 #fp = open('LMoutfile','a') 535 #fp.write('Iteration number ' + niters.__str__() + '\n') 536 #fp.write('Current cost ' + currentcost.__str__() + '\n') 537 #fp.write('Move 1 gives cost of ' + costlambda.__str__() + '\n') 538 #fp.write('Move 2 gives cost of ' + costlambdasmaller.__str__() + '\n') 539 #fp.close() 540 541 if costlambdasmaller <= currentcost : 542 Lambda = Lambda/Mult 543 x = x2[:] 544 if retall: 545 allvecs.append(x) 546 currentcost = costlambdasmaller 547 grad, lmh = apply(fjtj,(x2,)) 548 grad_calls+=1 549 550 #if scipy.linalg.norm(asarray(grad)) < avegtol : 551 if sum(abs(2.0*grad), axis=None) < gtol : 552 finish = 2 553 elif costlambda <= currentcost : 554 currentcost = costlambda 555 move = moveold[:] 556 x = x1[:] 557 if retall: 558 allvecs.append(x) 559 grad, lmh = apply(fjtj,(x1,)) 560 grad_calls+=1 561 562 # if scipy.linalg.norm(asarray(grad)) < avegtol : 563 if sum(abs(2.0*grad), axis=None) < gtol : 564 finish = 2 565 else : 566 Lambdamult = Lambda 567 costmult = costlambda 568 piOverFour = .78539816339744825 569 NTrials2 = 0 570 NTrials = 0 571 572 while (costmult > currentcost) and (NTrials < 10) : 573 # num = -dot(transpose(asarray(grad)),asarray(moveold) ) 574 # den = scipy.linalg.norm(grad)*scipy.linalg.norm(moveold) 575 gamma = .1 # scipy.arccos(num/den) 576 NTrials = NTrials+1 577 if (gamma > 0) : 578 Lambdamult = Lambdamult*Mult 579 580 for i in range(0,len(s)) : 581 if s[i] + Lambdamult < 1.0e-30 : 582 d[i] = 0.0 583 else : 584 d[i] = 1.0/(s[i] + Lambdamult) 585 move[i] = d[i]*rhsvect[i] 586 move = asarray(move) 587 move = dot(asarray(u),move) 588 x1 = x + asarray(move) 589 590 func_calls+=1 591 costmult = apply(fcost,(x1,)) 592 else : 593 NTrials2 = 0 594 while (costmult > currentcost) and (NTrials2 < 10) : 595 NTrials2 = NTrials2 + 1 596 if disp : 597 print " Decreasing stepsize " 598 move = (.5)**NTrials2*moveold 599 x1 = x + asarray(moveold) 600 func_calls+=1 601 costmult = apply(fcost,(x1,)) 602 603 if (NTrials==10) or (NTrials2==10) : 604 if disp : 605 print " Failed to converge" 606 finish = 1 607 else : 608 x = x1[:] 609 if retall: 610 allvecs.append(x) 611 Lambda = Lambdamult 612 grad, lmh = apply(fjtj,(x1,)) 613 grad_calls+=1 614 currentcost = costmult 615 # if scipy.linalg.norm(grad) < avegtol : 616 if sum(abs(2.0*grad), axis=None) < gtol : 617 finish = 2 618 niters = niters + 1 619 620 # see if we need to reduce the trust region, compare the actual change in 621 # cost to the linear and quadratic change in cost 622 model_change = scipy.dot(scipy.transpose(oldgrad),move) + \ 623 .5*scipy.dot(scipy.transpose(move),scipy.dot(oldlmh,move) ) 624 #print oldcost-sum(newmodelval**2) 625 #print trustradius 626 if model_change>1.0e-16 : 627 ratio = (oldcost-currentcost)/(model_change) 628 if ratio < .25 : 629 trustradius = trustradius/2.0 630 if ratio >.25 and ratio<=.75 : 631 trustradius = trustradius 632 if ratio > .75 and trustradius<10.0 : 633 trustradius = 2.0*trustradius 634 635 #save(x,'currentParamsLM') 636 637 if disp : 638 if (niters>=maxiter) and (finish != 2) : 639 print " Current function value: %f" % currentcost 640 print " Iterations: %d" % niters 641 print " Function evaluations: %d" % func_calls 642 print " Gradient evaluations: %d" % grad_calls 643 print " Maximum number of iterations exceeded with no convergence " 644 if (finish == 2) : 645 print "Optimization terminated successfully." 646 print " Current function value: %f" % currentcost 647 print " Iterations: %d" % niters 648 print " Function evaluations: %d" % func_calls 649 print " Gradient evaluations: %d" % grad_calls 650 651 if isinstance(xcopy,KeyedList) : 652 xcopy.update(x) 653 else : 654 xcopy = x 655 656 if full_output: 657 retlist = xcopy, currentcost, func_calls, grad_calls, finish, Lambda, lmh 658 if retall: 659 retlist += (allvecs,) 660 else: 661 retlist = xcopy 662 if retall: 663 retlist = (xcopy, allvecs) 664 665 return retlist
666
667 -def solve_lmsys(Lambda,s,g,rhsvect,currentcost,n) :
668 d = zeros(n,scipy.float_) 669 move = zeros(n,scipy.float_) 670 for i in range(0,n) : 671 if s[i] < 1.0e-20 : 672 d[i] = 0.0 673 else : 674 d[i] = 1.0/(s[i]) 675 move[i] = d[i]*rhsvect[i] 676 return move
677
678 -def fmin_lm_scale(f, x0, fprime=None, args=(), avegtol=1e-5, epsilon=_epsilon, 679 maxiter=None, full_output=0, disp=1, retall=0,trustradius=1.0):
680 """ 681 Minimizer for a nonlinear least squares problem. Allowed to 682 have more residuals than parameters or vice versa. 683 684 f : residual function (function of parameters) 685 fprime : derivative of residual function with respect to parameters. 686 Should return a matrix (J) with dimensions number of residuals 687 by number of parameters. 688 x0 : initial parameter set 689 avegtol : convergence tolerance on the gradient vector 690 epsilon : size of steps to use for finite differencing of f (if fprime 691 not passed in) 692 maxiter : maximum number of iterations 693 full_output : 0 to get only the minimum set of parameters back 694 1 if you also want the best parameter set, the 695 lowest value of f, the number of function calls, 696 the number of gradient calls, the convergence flag, 697 the last Marquardt parameter used (lambda), and the 698 last evaluation of fprime (J matrix) 699 disp : 0 for no display, 1 to give cost at each iteration and convergence 700 conditions at the end 701 retall : 0 for nothing extra to be returned, 1 for all the parameter 702 sets during the optimization to be returned 703 trustradius : set this to the maximum length of move you want. 704 If you are using log parameters, then setting this 705 to 1.0, for example, corresponds to a multiplicative 706 change of exp(1) = 2.718 if the move is along a single 707 parameter direction 708 709 This version is scale invariant. This means that under a change of 710 scale of the parameters the direction the optimizer chooses to move 711 in does not change. To achieve this, we don't use a Marquardt 712 parameter to impose a trust region but rather take the infinite trust 713 region step and just cut it back to the length given in the variable 714 trustradius. """ 715 app_fprime = 0 716 if fprime is None: 717 app_fprime = 1 718 719 xcopy = copy.copy(x0) 720 if isinstance(x0,KeyedList) : 721 x0 = asarray(x0.values()) 722 else : 723 x0 = asarray(x0) 724 725 Lambda = 1.0e-02 726 Mult = 10.0 727 n = len(x0) 728 func_calls = 0 729 grad_calls = 0 730 res = asarray(apply(f,(x0,))) 731 m = res.shape[0] 732 if maxiter is None : 733 maxiter = 200*n 734 niters = 0 735 x = x0 736 737 gtol = n*avegtol 738 739 if retall: 740 allvecs = [x] 741 x1 = x0 742 x2 = x0 743 d = zeros(n,scipy.float_) 744 move = zeros(n,scipy.float_) 745 finish = 0 746 747 if app_fprime : 748 j = asarray(apply(approx_fprime2,(x,f,epsilon)+args)) 749 func_calls = func_calls + 2*len(x) 750 else : 751 j = asarray(apply(fprime,(x,))) 752 grad_calls+=1 753 res = asarray(apply(f,(x,))) 754 func_calls+=1 755 grad = mat(res)*mat(j) 756 757 while (niters<maxiter) and (finish == 0): 758 # note: grad, res and j will be available from the end of the 759 # last iteration. They just need to be computed the zeroth 760 # time aswell (above) 761 762 lmh = mat(transpose(j))*mat(j) 763 # use more accurate way to get e-vals/dirns 764 #[u,s,v] = scipy.linalg.svd(lmh) 765 [u,ssqrt,vt] = scipy.linalg.svd(j) 766 # want n singular values even if m<n and we have 767 # more parameters than data points. 768 if (len(ssqrt) == n) : 769 s = ssqrt**2 770 elif (len(ssqrt)<n) : 771 s = zeros((n,),scipy.float_) 772 s[0:len(ssqrt)] = ssqrt**2 773 #rhsvect = -mat(transpose(u))*mat(transpose(grad)) 774 rhsvect = -mat(vt)*mat(transpose(grad)) 775 rhsvect = asarray(rhsvect)[:,0] 776 777 778 currentcost = sum(asarray(apply(f,(x,)))**2) 779 g = asarray(grad)[0,:] 780 Lambda = 0 781 move = solve_lmsys(Lambda,s,g,rhsvect,currentcost,n) 782 783 move = asarray(move) 784 move = asarray(mat(transpose(vt))*mat(transpose(mat(move))))[:,0] 785 unitmove = move/(scipy.linalg.norm(move)) 786 move1 = unitmove*trustradius 787 788 # print move 789 x1 = x + move1 790 791 move2 = unitmove*trustradius*Mult 792 x2 = x + asarray(move2) 793 794 func_calls+=1 795 try: 796 res2 = asarray(apply(f,(x2,))) 797 costlambdasmaller = sum(res2**2) 798 except SloppyCell.Utility.SloppyCellException: 799 costlambdasmaller = scipy.inf 800 func_calls+=1 801 try: 802 res1 = asarray(apply(f,(x1,))) 803 costlambda = sum(res1**2) 804 except SloppyCell.Utility.SloppyCellException: 805 costlambda = scipy.inf 806 func_calls+=1 807 if disp : 808 print "Cost is ", currentcost 809 print "Iteration is", niters 810 811 oldcost = currentcost 812 oldres = res 813 oldjac = j 814 815 if costlambdasmaller <= currentcost : 816 trustradius = trustradius*Mult 817 x = x2 818 if retall: 819 allvecs.append(x) 820 currentcost = costlambdasmaller 821 if app_fprime : 822 j = asarray(apply(approx_fprime2,(x2,f,epsilon)+args)) 823 func_calls = func_calls + 2*len(x2) 824 else : 825 j = asarray(apply(fprime,(x2,))) 826 grad_calls+=1 827 grad = mat(res2)*mat(j) 828 if sum(abs(2.0*grad), axis=None) < gtol : 829 finish = 2 830 move = move2 831 elif costlambda <= currentcost : 832 currentcost = costlambda 833 x = x1 834 if retall: 835 allvecs.append(x) 836 if app_fprime : 837 j = asarray(apply(approx_fprime2,(x1,f,epsilon)+args)) 838 func_calls = func_calls + 2*len(x1) 839 else : 840 j = asarray(apply(fprime,(x1,))) 841 grad_calls+=1 842 843 grad = mat(res1)*mat(j) 844 if sum(abs(2.0*grad), axis=None) < gtol : 845 finish = 2 846 move = move1 847 else : 848 trustradmult = trustradius 849 costmult = costlambda 850 NTrials = 0 851 move = unitmove 852 while (costmult > currentcost) and (NTrials < 100) : 853 while (costmult > currentcost) and (NTrials < 100) : 854 NTrials = NTrials + 1 855 #print " Decreasing stepsize " 856 trustradmult = trustradmult/2.0 857 move = move*trustradmult 858 x1 = x + asarray(move) 859 res1 = asarray(apply(f,(x1,))) 860 func_calls+=1 861 costmult = sum(res1**2) 862 863 if (NTrials==100) : 864 if disp : 865 print " Failed to converge" 866 finish = 1 867 else : 868 x = x1 869 if retall: 870 allvecs.append(x) 871 trustradius = trustradmult 872 if app_fprime : 873 j = asarray(apply(approx_fprime2,(x,f,epsilon)+args)) 874 func_calls = func_calls + 2*len(x) 875 else : 876 j = asarray(apply(fprime,(x,))) 877 grad_calls+=1 878 879 grad = mat(res1)*mat(j) 880 currentcost = costmult 881 if sum(abs(2.0*grad), axis=None) < gtol : 882 finish = 2 883 niters = niters + 1 884 885 # see if we need to reduce the trust region 886 newmodelval = oldres+asarray(mat(oldjac)*mat(transpose(mat(move))))[:,0] 887 oldmodelval = oldres 888 #print oldcost-sum(newmodelval**2) 889 #print trustradius 890 if ((oldcost-sum(newmodelval**2))>1.0e-16) : 891 ratio = (oldcost-currentcost)/(oldcost-sum(newmodelval**2)) 892 if ratio < .25 : 893 trustradius = trustradius/2.0 894 if ratio >.25 and ratio<=.75 : 895 trustradius = trustradius 896 if ratio > .75 and trustradius<10.0 : 897 trustradius = 2.0*trustradius 898 899 if disp : 900 if (niters>=maxiter) and (finish != 2) : 901 print " Current function value: %f" % currentcost 902 print " Iterations: %d" % niters 903 print " Function evaluations: %d" % func_calls 904 print " Gradient evaluations: %d" % grad_calls 905 print " Maximum number of iterations exceeded with no convergence " 906 if (finish == 2) : 907 print "Optimization terminated successfully." 908 print " Current function value: %f" % currentcost 909 print " Iterations: %d" % niters 910 print " Function evaluations: %d" % func_calls 911 print " Gradient evaluations: %d" % grad_calls 912 913 if isinstance(xcopy,KeyedList) : 914 xcopy.update(x) 915 else : 916 xcopy = x 917 918 if full_output: 919 retlist = xcopy, currentcost, func_calls, grad_calls, finish, Lambda, j 920 if retall: 921 retlist += (allvecs,) 922 else: 923 retlist = xcopy 924 if retall: 925 retlist = (xcopy, allvecs) 926 927 return retlist
928