Source code for gradients

# /usr/bin/env python
import numpy as np
import scipy.optimize as opt
import matplotlib.pyplot as plt
import matplotlib.animation as ani
from math import *
from numpy.random import default_rng


[docs]def draw(frame, history, x, y): """ Draws a snapshot of the optimization process. Used for animation. Args: frame (int): index of the frame to be drawn history (list): steps of the optimization process x (array): x coordinates y (array): y coordinates """ meshsizex = len(x) meshsizey = len(y) X, Y = np.meshgrid(x, y) Z = np.zeros([meshsizey, meshsizex]) for i in range(meshsizey): for j in range(meshsizex): Z[i,j] = target( [X[i,j], Y[i,j]] ) if alternative: contours = np.linspace(0,2,21)**2 else: contours = np.linspace(-1.25, 0, 20) plt.clf() ax = plt.axes() ax.set_aspect('equal') plt.contourf(X, Y, Z, levels=contours, cmap=plt.cm.ocean) steparray = np.array(history[:frame+1]) plt.plot(steparray[:,0], steparray[:,1],'-',color='darkred',lw=0.5) plt.plot(steparray[-1,0], steparray[-1,1],'ro',markersize=3)
[docs]def animate(history, minx, miny, maxx, maxy): """ Animates the simulation. Args: history (list): steps of the optimization process minx (float): x axis minimum maxx (float): x axis maximum miny (float): y axis minimum maxy (float): y axis maximum """ delta = 0.05 x = np.arange(minx, maxx, delta) y = np.arange(miny, maxy, delta) nframes = len(history) print("animating "+str(nframes)+" frames") fig = plt.figure() motion = ani.FuncAnimation(fig, draw, nframes, fargs=( history, x, y ) ) plt.show()
[docs]def show_convergence(steps, minx=-5, maxx=5, miny=-5, maxy=5): """ Plots the :meth:`target` function and the optimization path. Args: steps (array): sequence of coordinates representing optimization progress minx (float): x axis minimum maxx (float): x axis maximum miny (float): y axis minimum maxy (float): y axis maximum """ delta = 0.025 x = np.arange(minx, maxx, delta) y = np.arange(miny, maxy, delta) meshsizex = len(x) meshsizey = len(y) X, Y = np.meshgrid(x, y) Z = np.zeros([meshsizey, meshsizex]) for i in range(meshsizey): for j in range(meshsizex): Z[i,j] = target( [X[i,j], Y[i,j]] ) plt.clf() ax = plt.axes() ax.set_aspect('equal') if alternative: contours = np.linspace(0,2,21)**2 else: contours = np.linspace(-1.25, 0, 20) plt.contourf(X, Y, Z, levels=contours, cmap=plt.cm.ocean) #cmap='Greys_r' steparray = np.array(steps) plt.plot(steparray[:,0], steparray[:,1],'-o',color='red',lw=1, markersize=2) plt.show()
[docs]def target( coordinates ): """ The target function. For this exercise, the function is chosen to be .. math :: f(x,y) = -0.9 e^{ -2.5 (x-2.2)^2 - 0.3(y-0.8)^2 } - 1.1 e^{ -2.1(x-1.0)^2 - 0.4(y-2.1)^2 } - 0.1 x or alternatively (if the global variable alternative = True) .. math :: f(x,y) = (\cos 3y - 2 e^{x/2} + 2)^2 + (x-y^3)^2 Our task is to find the minimum of this function. Args: coordinates (array): (x,y) coordinates Returns: float: function value :math:`f(x,y)` """ x = coordinates[0] y = coordinates[1] if alternative: return ( ( cos(3*y) - 2*exp(x/2) + 2 )**2 + (x-y**3)**2 ) else: return -0.9*exp( -2.5*(x-2.2)**2 - 0.3*(y-0.8)**2 ) \ -1.1*exp( -2.1*(x-1.0)**2 - 0.4*(y-2.1)**2 ) - 0.1*x
[docs]def gradient( coordinates ): """ Gradient of the target function :meth:`target`. Args: coordinates (array): (x,y) coordinates Returns: array: gradient :math:`\\vec{g} = \\nabla f` as a vector [gx, gy]. """ x = coordinates[0] y = coordinates[1] if alternative: gradx = -2*( cos(3*y) - 2*exp(x/2) + 2 )*exp(x/2) + 2*(x-y**3) grady = -6*( cos(3*y) - 2*exp(x/2) + 2 )*sin(3*y) - 6*(x-y**3)*y**2 else: gradx = -0.9*-2.5*2*(x-2.2)*exp( -2.5*(x-2.2)**2 - 0.3*(y-0.8)**2 ) \ -1.1*-2.1*2*(x-1.0)*exp( -2.1*(x-1.0)**2 - 0.4*(y-2.1)**2 ) - 0.1 grady = -0.9*-0.3*2*(y-0.8)*exp( -2.5*(x-2.2)**2 - 0.3*(y-0.8)**2 ) \ -1.1*-0.4*2*(y-2.1)*exp( -2.1*(x-1.0)**2 - 0.4*(y-2.1)**2 ) return np.array([gradx, grady])
[docs]def random_displacement( coords, length ): """ Randomly changes the given coordinate vector. Each coordinate is changed by a uniformly distributed random variable :math:`\\Delta x \\in [-L, L]`. The function returns the new coordinates as a new vector. The original vector is not affected. Args: coords (array): coordinates as a vector length (float): maximum change :math:`L` Returns: array: new coordinates """ new_coords = coords + ( 2.0*random.random( size=len(coords) )-1.0 )*length return new_coords
[docs]def simulated_annealing( initial_guess, initial_temperature, final_temperature, n_steps, max_step_length, recording_interval = 10 ): """ Searches for the minimum of the :meth:`target` function :math:`f`. The simulated annealing algorithm starts from the initial guess and searches for better solutions using a random walk. The process is controlled by a temperature-like quantity :math:`T`. The algorithm repeats the following: * Make a small random change in the current coordinates. * Calculate the change in target function due to this change, :math:`\\Delta f`. * If :math:`\\Delta f \\le 0` (new coordinates are better than the old ones), accept the change. * If the new solution is the best one yet, record it. * If :math:`\\Delta f > 0` (new coordinates are worse than the old ones), accept the change with probability :math:`P = e^{- \\Delta f / T}`. * Lower the temperature by a small amount. The algorithm has no specific stopping criteria. Instead, it continues searching for a predetermined period. Once finished, the algorithm returns the coordinates where the minimum was found, the minimum target value and the path it took as a list of coordinates. Args: initial_guess (array): starting point (x,y) initial_temperature (float): value of :math:`T` in the beginning final_temperature (float): value of :math:`T` at the end n_steps (int): number of steps max_step_length (float): the maximum allowed change in each coordinate in one step recording_interval (int): records the current coordinates after this many steps Returns: array, float, array: final coordinates, final target value, list of coordinates """ current_value = target(initial_guess) best_value = current_value current_coordinates = initial_guess best_coordinates = initial_guess steps = [initial_guess] delta_temperature = ( final_temperature - initial_temperature ) / (n_steps - 1) for i in range(n_steps): temperature = initial_temperature + \ i*delta_temperature new_coordinates = random_displacement( current_coordinates, max_step_length ) new_value = target( new_coordinates ) randomizer = random.random() if randomizer < exp( - (new_value - current_value) / temperature ): current_value = new_value current_coordinates = new_coordinates if i%recording_interval == 0: steps.append( current_coordinates ) if current_value < best_value: best_value = current_value best_coordinates = current_coordinates return best_coordinates, best_value, steps
[docs]def conjugate_gradients_algorithm(initial_guess, max_gradient = 0.1, max_steps = 1000): """ Searches for the minimum of the :meth:`target` function :math:`f`. The conjugate gradients algorithm starts from the initial guess and repeats the following: * Calculate the :meth:`gradient` :math:`\\vec{g}_i = \\nabla f` at current position. * Check the length of the gradient vector :math:`g_i = | \\vec{g}_i |` and stop if it is below a given threshold. If not, continue. * Calculate a mixing parameter. There are many ways to do this. This function uses .. math :: \\beta_i = \\frac{ \\vec{g}_i \\cdot (\\vec{g}_i - \\vec{g}_{i-1}) } { \\vec{g}_{i-1} \\cdot \\vec{g}_{i-1}} * Do a :meth:`linear_search` in the direction :math:`\\vec{d}_i = - \\vec{g}_i + \\beta_i \\vec{d}_{i-1}` and move to the lowest point found. (For the first step, :math:`\\vec{d}_i = - \\vec{g}_i`.) Once finished, the algorithm returns the coordinates where the minimum was found, the minimum target value and the path it took as a list of coordinates. .. note :: This function is incomplete! Args: initial_guess (array): starting point (x,y) max_gradient (float): stop if :math:`g` is less than this max_steps (int): stop if the iteration takes more than this many steps Returns: array, float, array: final coordinates, final target value, list of coordinates """ if max_gradient < 0.001: accuracy = 6 else: accuracy = 4 coords = initial_guess n_step = 1 steps = [coords] grad = gradient(coords) direction = -grad g_norm = np.sqrt( direction @ direction ) while g_norm > max_gradient and n_step < max_steps: n_step += 1 coords, value = linear_search( coords, direction, accuracy ) # todo direction = np.zeros(2) g_norm = np.sqrt( direction @ direction ) steps.append(coords) return coords, value, steps
[docs]def deepest_descent_algorithm(initial_guess, max_gradient = 0.1, max_steps = 1000): """ Searches for the minimum of the :meth:`target` function :math:`f`. The deepest descent algorithm starts from the initial guess and repeats the following: * Calculate the :meth:`gradient` :math:`\\vec{g} = \\nabla f` at current position. * Check the length of the gradient vector :math:`g = | \\vec{g} |` and stop if it is below a given threshold. If not, continue. * Do a :meth:`linear_search` in the direction :math:`\\vec{d} = - \\vec{g}` and move to the lowest point found. Once finished, the algorithm returns the coordinates where the minimum was found, the minimum target value and the path it took as a list of coordinates. .. note :: This function is incomplete! Args: initial_guess (array): starting point (x,y) max_gradient (float): stop if :math:`g` is less than this max_steps (int): stop if the iteration takes more than this many steps Returns: array, float, array: final coordinates, final target value, list of coordinates """ coords = initial_guess n_step = 1 steps = [coords] g_norm = max_gradient+1 while g_norm > max_gradient and n_step < max_steps: n_step += 1 value = 0.0 # todo: # - calculate the direction for searching and save it in variable 'direction' # - do a linear search in this direction direction = np.zeros(2) g_norm = np.sqrt( direction @ direction ) steps.append(coords) return coords, value, steps
[docs]def main(guess, algorithm = "dd"): """ Find a minimum of :meth:`target` function. The function also visualizes the optimization process. Args: guess (array): Initial coordinates (x,y) as array. algorithm (str): One of: *"dd"*: deepest descent gradient search, *"cg"*: conjugate gradients search, *"acg"*: more accurate conjugate gradients search, *"scipy"*: built-in scipy conjugate gradients search, *"sa"*: simulated annealing """ if algorithm == "dd": # use deepest descent algorithm coords, value, steps = deepest_descent_algorithm(guess) elif algorithm == "cg": # use conjugate gradients algorithm coords, value, steps = conjugate_gradients_algorithm(guess) elif algorithm == "acg": # conjugate gradients with with more accuracy coords, value, steps = conjugate_gradients_algorithm(guess, 10**-4) elif algorithm == "scipy": # built-in scipy conjugate gradients optimizer coords, steps = opt.fmin_cg(target, guess, gradient, retall=1) value = target(coords) elif algorithm == "sa": # simulated annealing coords, value, steps = simulated_annealing(guess, 0.5, 0.0001, 1000, 0.1 ) else: print("not a valid algorithm") quit() # show solution print( "solution: ", coords ) print( "value: ", value ) # show how the algorithm worked if alternative: show_convergence(steps, minx=-2, miny=-2, maxx=2, maxy=2) animate(steps, minx=-2, miny=-2, maxx=2, maxy=2) else: show_convergence(steps, minx=0, miny=0, maxx=3, maxy=3) animate(steps, minx=0, miny=0, maxx=3, maxy=3)
if __name__ == "__main__": random = default_rng() # change this to True to change the target function alternative = True # let the computer take a random guess if alternative: guess = random.random(size=2)*3-1.5 else: guess = random.random(size=2)*3 # or specify the initial guess yourself # guess = np.array([1.5, 1.5]) main(guess, "dd")