# /usr/bin/env python
import numpy as np
import scipy.optimize as opt
import matplotlib.pyplot as plt
import matplotlib.animation as ani
from math import *
from numpy.random import default_rng
[docs]def draw(frame, history, x, y):
"""
Draws a snapshot of the optimization process.
Used for animation.
Args:
frame (int): index of the frame to be drawn
history (list): steps of the optimization process
x (array): x coordinates
y (array): y coordinates
"""
meshsizex = len(x)
meshsizey = len(y)
X, Y = np.meshgrid(x, y)
Z = np.zeros([meshsizey, meshsizex])
for i in range(meshsizey):
for j in range(meshsizex):
Z[i,j] = target( [X[i,j], Y[i,j]] )
if alternative:
contours = np.linspace(0,2,21)**2
else:
contours = np.linspace(-1.25, 0, 20)
plt.clf()
ax = plt.axes()
ax.set_aspect('equal')
plt.contourf(X, Y, Z, levels=contours, cmap=plt.cm.ocean)
steparray = np.array(history[:frame+1])
plt.plot(steparray[:,0], steparray[:,1],'-',color='darkred',lw=0.5)
plt.plot(steparray[-1,0], steparray[-1,1],'ro',markersize=3)
[docs]def animate(history, minx, miny, maxx, maxy):
"""
Animates the simulation.
Args:
history (list): steps of the optimization process
minx (float): x axis minimum
maxx (float): x axis maximum
miny (float): y axis minimum
maxy (float): y axis maximum
"""
delta = 0.05
x = np.arange(minx, maxx, delta)
y = np.arange(miny, maxy, delta)
nframes = len(history)
print("animating "+str(nframes)+" frames")
fig = plt.figure()
motion = ani.FuncAnimation(fig, draw, nframes, fargs=( history, x, y ) )
plt.show()
[docs]def show_convergence(steps, minx=-5, maxx=5, miny=-5, maxy=5):
"""
Plots the :meth:`target` function and the optimization path.
Args:
steps (array): sequence of coordinates representing optimization progress
minx (float): x axis minimum
maxx (float): x axis maximum
miny (float): y axis minimum
maxy (float): y axis maximum
"""
delta = 0.025
x = np.arange(minx, maxx, delta)
y = np.arange(miny, maxy, delta)
meshsizex = len(x)
meshsizey = len(y)
X, Y = np.meshgrid(x, y)
Z = np.zeros([meshsizey, meshsizex])
for i in range(meshsizey):
for j in range(meshsizex):
Z[i,j] = target( [X[i,j], Y[i,j]] )
plt.clf()
ax = plt.axes()
ax.set_aspect('equal')
if alternative:
contours = np.linspace(0,2,21)**2
else:
contours = np.linspace(-1.25, 0, 20)
plt.contourf(X, Y, Z, levels=contours, cmap=plt.cm.ocean) #cmap='Greys_r'
steparray = np.array(steps)
plt.plot(steparray[:,0], steparray[:,1],'-o',color='red',lw=1, markersize=2)
plt.show()
[docs]def target( coordinates ):
"""
The target function.
For this exercise, the function is chosen to be
.. math ::
f(x,y) = -0.9 e^{ -2.5 (x-2.2)^2 - 0.3(y-0.8)^2 }
- 1.1 e^{ -2.1(x-1.0)^2 - 0.4(y-2.1)^2 } - 0.1 x
or alternatively (if the global variable alternative = True)
.. math ::
f(x,y) = (\cos 3y - 2 e^{x/2} + 2)^2 + (x-y^3)^2
Our task is to find the minimum of this function.
Args:
coordinates (array): (x,y) coordinates
Returns:
float: function value :math:`f(x,y)`
"""
x = coordinates[0]
y = coordinates[1]
if alternative:
return ( ( cos(3*y) - 2*exp(x/2) + 2 )**2 + (x-y**3)**2 )
else:
return -0.9*exp( -2.5*(x-2.2)**2 - 0.3*(y-0.8)**2 ) \
-1.1*exp( -2.1*(x-1.0)**2 - 0.4*(y-2.1)**2 ) - 0.1*x
[docs]def gradient( coordinates ):
"""
Gradient of the target function :meth:`target`.
Args:
coordinates (array): (x,y) coordinates
Returns:
array: gradient :math:`\\vec{g} = \\nabla f` as a vector [gx, gy].
"""
x = coordinates[0]
y = coordinates[1]
if alternative:
gradx = -2*( cos(3*y) - 2*exp(x/2) + 2 )*exp(x/2) + 2*(x-y**3)
grady = -6*( cos(3*y) - 2*exp(x/2) + 2 )*sin(3*y) - 6*(x-y**3)*y**2
else:
gradx = -0.9*-2.5*2*(x-2.2)*exp( -2.5*(x-2.2)**2 - 0.3*(y-0.8)**2 ) \
-1.1*-2.1*2*(x-1.0)*exp( -2.1*(x-1.0)**2 - 0.4*(y-2.1)**2 ) - 0.1
grady = -0.9*-0.3*2*(y-0.8)*exp( -2.5*(x-2.2)**2 - 0.3*(y-0.8)**2 ) \
-1.1*-0.4*2*(y-2.1)*exp( -2.1*(x-1.0)**2 - 0.4*(y-2.1)**2 )
return np.array([gradx, grady])
[docs]def linear_search( coordinates, direction, stepsize = 1, accuracy = 5 ):
"""
Searches for the minimum of the :meth:`target` function on a line.
Starting from given coordinates, the function
takes equally spaced steps in the given direction.
The function continues as long as the target function decreases.
(As a safety, there is a maximum number of steps allowed
after which the process is aborted.)
When the target starts to increase again, the step size is
reduced and a new search is performed near the minium of the previous search.
This allows the algorithm to find the minimum with better accuracy.
Args:
coordinates (array): starting position (x,y)
direction (array): vector pointing the search direction
stepsize (float): scaling for the initial step size
accuracy (int): how many times the step length is shortened
Returns:
array, float: (x,y) coordinates at the minimum, minimum target value
"""
# accuracy scales
scales = [1, 4**-1, 4**-2, 4**-3, 4**-4, 4**-5, 4**-6, 4**-7, 4**-8]
if accuracy >= len(scales):
accuracy = len(scales)
previous_coordinates = coordinates
# repeat the search using more and more accuracy (shorter steps)
for scale in scales[:accuracy]:
# The first search is started from the given coordinates.
# The following searches are started from the coordinates
# that *preceded* the minimum found in the previous search.
# This is done because the true minimum could be before or after
# the minimum value of the previous search.
coordinates = previous_coordinates
current_value = target(coordinates)
# tag for monitoring if we are still finding decreasing values
better = True
n_step = 0
# search as long as the values decrease, but do at most 100 steps
while better and n_step < 100:
n_step += 1
# save the new coordinates
new_coordinates = coordinates + scale*stepsize*direction
new_value = target(new_coordinates)
# if the new coordinates yield a smaller value, keep searching
if new_value < current_value:
current_value = new_value
previous_coordinates = coordinates
coordinates = new_coordinates
better = True
# If the target function is increasing, end the search
# for this scale. Currently the minimum location is stored in
# 'coordinates' and the location the was checked before that
# is stored in 'previous_coordinates'.
else:
better = False
return coordinates, current_value
[docs]def random_displacement( coords, length ):
"""
Randomly changes the given coordinate vector.
Each coordinate is changed by a uniformly distributed random variable :math:`\\Delta x \\in [-L, L]`.
The function returns the new coordinates as a new vector. The original vector is not affected.
Args:
coords (array): coordinates as a vector
length (float): maximum change :math:`L`
Returns:
array: new coordinates
"""
new_coords = coords + ( 2.0*random.random( size=len(coords) )-1.0 )*length
return new_coords
[docs]def simulated_annealing( initial_guess,
initial_temperature,
final_temperature,
n_steps,
max_step_length,
recording_interval = 10 ):
"""
Searches for the minimum of the :meth:`target` function :math:`f`.
The simulated annealing algorithm starts from the initial guess and searches for better
solutions using a random walk. The process is controlled by a temperature-like quantity :math:`T`.
The algorithm repeats the following:
* Make a small random change in the current coordinates.
* Calculate the change in target function due to this change, :math:`\\Delta f`.
* If :math:`\\Delta f \\le 0` (new coordinates are better than the old ones), accept the change.
* If the new solution is the best one yet, record it.
* If :math:`\\Delta f > 0` (new coordinates are worse than the old ones), accept the change
with probability :math:`P = e^{- \\Delta f / T}`.
* Lower the temperature by a small amount.
The algorithm has no specific stopping criteria. Instead, it continues searching for a
predetermined period.
Once finished, the algorithm returns the coordinates where the minimum was found,
the minimum target value and the path it took as a list of coordinates.
Args:
initial_guess (array): starting point (x,y)
initial_temperature (float): value of :math:`T` in the beginning
final_temperature (float): value of :math:`T` at the end
n_steps (int): number of steps
max_step_length (float): the maximum allowed change in each coordinate in one step
recording_interval (int): records the current coordinates after this many steps
Returns:
array, float, array: final coordinates, final target value, list of coordinates
"""
current_value = target(initial_guess)
best_value = current_value
current_coordinates = initial_guess
best_coordinates = initial_guess
steps = [initial_guess]
delta_temperature = ( final_temperature - initial_temperature ) / (n_steps - 1)
for i in range(n_steps):
temperature = initial_temperature + \
i*delta_temperature
new_coordinates = random_displacement( current_coordinates, max_step_length )
new_value = target( new_coordinates )
randomizer = random.random()
if randomizer < exp( - (new_value - current_value) / temperature ):
current_value = new_value
current_coordinates = new_coordinates
if i%recording_interval == 0:
steps.append( current_coordinates )
if current_value < best_value:
best_value = current_value
best_coordinates = current_coordinates
return best_coordinates, best_value, steps
[docs]def conjugate_gradients_algorithm(initial_guess, max_gradient = 0.1, max_steps = 1000):
"""
Searches for the minimum of the :meth:`target` function :math:`f`.
The conjugate gradients algorithm starts from the initial guess and repeats the following:
* Calculate the :meth:`gradient` :math:`\\vec{g}_i = \\nabla f` at current position.
* Check the length of the gradient vector :math:`g_i = | \\vec{g}_i |` and
stop if it is below a given threshold. If not, continue.
* Calculate a mixing parameter. There are many ways to do this.
This function uses
.. math ::
\\beta_i = \\frac{ \\vec{g}_i \\cdot (\\vec{g}_i - \\vec{g}_{i-1}) }
{ \\vec{g}_{i-1} \\cdot \\vec{g}_{i-1}}
* Do a :meth:`linear_search` in the direction
:math:`\\vec{d}_i = - \\vec{g}_i + \\beta_i \\vec{d}_{i-1}`
and move to the lowest point found.
(For the first step, :math:`\\vec{d}_i = - \\vec{g}_i`.)
Once finished, the algorithm returns the coordinates where the minimum was found,
the minimum target value and the path it took as a list of coordinates.
.. note ::
This function is incomplete!
Args:
initial_guess (array): starting point (x,y)
max_gradient (float): stop if :math:`g` is less than this
max_steps (int): stop if the iteration takes more than this many steps
Returns:
array, float, array: final coordinates, final target value, list of coordinates
"""
if max_gradient < 0.001:
accuracy = 6
else:
accuracy = 4
coords = initial_guess
n_step = 1
steps = [coords]
grad = gradient(coords)
direction = -grad
g_norm = np.sqrt( direction @ direction )
while g_norm > max_gradient and n_step < max_steps:
n_step += 1
coords, value = linear_search( coords, direction, accuracy )
# todo
direction = np.zeros(2)
g_norm = np.sqrt( direction @ direction )
steps.append(coords)
return coords, value, steps
[docs]def deepest_descent_algorithm(initial_guess, max_gradient = 0.1, max_steps = 1000):
"""
Searches for the minimum of the :meth:`target` function :math:`f`.
The deepest descent algorithm starts from the initial guess and repeats the following:
* Calculate the :meth:`gradient` :math:`\\vec{g} = \\nabla f` at current position.
* Check the length of the gradient vector :math:`g = | \\vec{g} |` and
stop if it is below a given threshold. If not, continue.
* Do a :meth:`linear_search` in the direction :math:`\\vec{d} = - \\vec{g}`
and move to the lowest point found.
Once finished, the algorithm returns the coordinates where the minimum was found,
the minimum target value and the path it took as a list of coordinates.
.. note ::
This function is incomplete!
Args:
initial_guess (array): starting point (x,y)
max_gradient (float): stop if :math:`g` is less than this
max_steps (int): stop if the iteration takes more than this many steps
Returns:
array, float, array: final coordinates, final target value, list of coordinates
"""
coords = initial_guess
n_step = 1
steps = [coords]
g_norm = max_gradient+1
while g_norm > max_gradient and n_step < max_steps:
n_step += 1
value = 0.0
# todo:
# - calculate the direction for searching and save it in variable 'direction'
# - do a linear search in this direction
direction = np.zeros(2)
g_norm = np.sqrt( direction @ direction )
steps.append(coords)
return coords, value, steps
[docs]def main(guess, algorithm = "dd"):
"""
Find a minimum of :meth:`target` function.
The function also visualizes the optimization process.
Args:
guess (array): Initial coordinates (x,y) as array.
algorithm (str): One of:
*"dd"*: deepest descent gradient search,
*"cg"*: conjugate gradients search,
*"acg"*: more accurate conjugate gradients search,
*"scipy"*: built-in scipy conjugate gradients search,
*"sa"*: simulated annealing
"""
if algorithm == "dd":
# use deepest descent algorithm
coords, value, steps = deepest_descent_algorithm(guess)
elif algorithm == "cg":
# use conjugate gradients algorithm
coords, value, steps = conjugate_gradients_algorithm(guess)
elif algorithm == "acg":
# conjugate gradients with with more accuracy
coords, value, steps = conjugate_gradients_algorithm(guess, 10**-4)
elif algorithm == "scipy":
# built-in scipy conjugate gradients optimizer
coords, steps = opt.fmin_cg(target, guess, gradient, retall=1)
value = target(coords)
elif algorithm == "sa":
# simulated annealing
coords, value, steps = simulated_annealing(guess,
0.5, 0.0001, 1000, 0.1 )
else:
print("not a valid algorithm")
quit()
# show solution
print( "solution: ", coords )
print( "value: ", value )
# show how the algorithm worked
if alternative:
show_convergence(steps, minx=-2, miny=-2, maxx=2, maxy=2)
animate(steps, minx=-2, miny=-2, maxx=2, maxy=2)
else:
show_convergence(steps, minx=0, miny=0, maxx=3, maxy=3)
animate(steps, minx=0, miny=0, maxx=3, maxy=3)
if __name__ == "__main__":
random = default_rng()
# change this to True to change the target function
alternative = True
# let the computer take a random guess
if alternative:
guess = random.random(size=2)*3-1.5
else:
guess = random.random(size=2)*3
# or specify the initial guess yourself
# guess = np.array([1.5, 1.5])
main(guess, "dd")