Source code for gradients

# /usr/bin/env python
import numpy as np
import scipy.optimize as opt
import matplotlib.pyplot as plt
import matplotlib.animation as ani
from math import *
from numpy.random import default_rng


[docs]def draw(frame, history, x, y):
    """
    Draws a snapshot of the optimization process.
    
    Used for animation.
    
    Args:
        frame (int): index of the frame to be drawn
        history (list): steps of the optimization process
        x (array): x coordinates
        y (array): y coordinates
    """
    
    meshsizex = len(x)
    meshsizey = len(y)
    X, Y = np.meshgrid(x, y)
    Z = np.zeros([meshsizey, meshsizex])
    
    for i in range(meshsizey):
        for j in range(meshsizex):
            Z[i,j] = target( [X[i,j], Y[i,j]] )

    if alternative:
        contours = np.linspace(0,2,21)**2
        
    else:
        contours = np.linspace(-1.25, 0, 20)
        
    plt.clf()
    ax = plt.axes()
    ax.set_aspect('equal')
    plt.contourf(X, Y, Z, levels=contours, cmap=plt.cm.ocean)

    steparray = np.array(history[:frame+1])
    plt.plot(steparray[:,0], steparray[:,1],'-',color='darkred',lw=0.5)
    plt.plot(steparray[-1,0], steparray[-1,1],'ro',markersize=3)
    
    
    
[docs]def animate(history, minx, miny, maxx, maxy):
    """
    Animates the simulation.
    
    Args:
        history (list): steps of the optimization process
        minx (float): x axis minimum 
        maxx (float): x axis maximum 
        miny (float): y axis minimum 
        maxy (float): y axis maximum 
    """
    
    delta = 0.05
    
    x = np.arange(minx, maxx, delta)
    y = np.arange(miny, maxy, delta)
    
    nframes = len(history)
    print("animating "+str(nframes)+" frames")
    fig = plt.figure()
    motion = ani.FuncAnimation(fig, draw, nframes, fargs=( history, x, y ) )
    plt.show()





[docs]def show_convergence(steps, minx=-5, maxx=5, miny=-5, maxy=5):
    """
    Plots the :meth:`target` function and the optimization path.
    
    Args:
        steps (array): sequence of coordinates representing optimization progress
        minx (float): x axis minimum 
        maxx (float): x axis maximum 
        miny (float): y axis minimum 
        maxy (float): y axis maximum 
    """

    delta = 0.025
    x = np.arange(minx, maxx, delta)
    y = np.arange(miny, maxy, delta)
    meshsizex = len(x)
    meshsizey = len(y)
    X, Y = np.meshgrid(x, y)
    Z = np.zeros([meshsizey, meshsizex])

    for i in range(meshsizey):
        for j in range(meshsizex):
            Z[i,j] = target( [X[i,j], Y[i,j]] )

    plt.clf()
    ax = plt.axes()
    ax.set_aspect('equal')
    
    if alternative:
        contours = np.linspace(0,2,21)**2
        
    else:
        contours = np.linspace(-1.25, 0, 20)
    
    plt.contourf(X, Y, Z, levels=contours, cmap=plt.cm.ocean) #cmap='Greys_r'

    steparray = np.array(steps)
    plt.plot(steparray[:,0], steparray[:,1],'-o',color='red',lw=1, markersize=2)

    plt.show()




[docs]def target( coordinates ):
    """
    The target function.
    
    For this exercise, the function is chosen to be
    
    .. math ::
    
        f(x,y) = -0.9 e^{ -2.5 (x-2.2)^2 - 0.3(y-0.8)^2 }
                 - 1.1 e^{ -2.1(x-1.0)^2 - 0.4(y-2.1)^2 } - 0.1 x
    
    or alternatively (if the global variable alternative = True)
    
    .. math ::
    
        f(x,y) = (\cos 3y - 2 e^{x/2} + 2)^2 + (x-y^3)^2 
    
    Our task is to find the minimum of this function.
    
    Args:
        coordinates (array): (x,y) coordinates
        
    Returns:
        float: function value :math:`f(x,y)`
    """

    x = coordinates[0]
    y = coordinates[1]
    
    if alternative:
        return ( ( cos(3*y) - 2*exp(x/2) + 2 )**2 + (x-y**3)**2 )

    else:
        return -0.9*exp( -2.5*(x-2.2)**2 - 0.3*(y-0.8)**2 ) \
            -1.1*exp( -2.1*(x-1.0)**2 - 0.4*(y-2.1)**2 ) - 0.1*x
        

[docs]def gradient( coordinates ):
    """
    Gradient of the target function :meth:`target`.
        
    Args:
        coordinates (array): (x,y) coordinates
        
    Returns:
        array: gradient :math:`\\vec{g} = \\nabla f` as a vector [gx, gy].
    """
    
    x = coordinates[0]
    y = coordinates[1]
    
    if alternative:
        gradx =  -2*( cos(3*y) - 2*exp(x/2) + 2 )*exp(x/2) + 2*(x-y**3) 
        grady =  -6*( cos(3*y) - 2*exp(x/2) + 2 )*sin(3*y) - 6*(x-y**3)*y**2 

    else:
        gradx = -0.9*-2.5*2*(x-2.2)*exp( -2.5*(x-2.2)**2 - 0.3*(y-0.8)**2 ) \
            -1.1*-2.1*2*(x-1.0)*exp( -2.1*(x-1.0)**2 - 0.4*(y-2.1)**2 ) - 0.1
        grady = -0.9*-0.3*2*(y-0.8)*exp( -2.5*(x-2.2)**2 - 0.3*(y-0.8)**2 ) \
            -1.1*-0.4*2*(y-2.1)*exp( -2.1*(x-1.0)**2 - 0.4*(y-2.1)**2 )

    return np.array([gradx, grady])


[docs]def linear_search( coordinates, direction, stepsize = 1, accuracy = 5 ):
    """
    Searches for the minimum of the :meth:`target` function on a line.
    
    Starting from given coordinates, the function
    takes equally spaced steps in the given direction.
    The function continues as long as the target function decreases.
    (As a safety, there is a maximum number of steps allowed
    after which the process is aborted.)
    
    When the target starts to increase again, the step size is
    reduced and a new search is performed near the minium of the previous search.
    This allows the algorithm to find the minimum with better accuracy.
    
    Args:
        coordinates (array): starting position (x,y)
        direction (array): vector pointing the search direction
        stepsize (float): scaling for the initial step size
        accuracy (int): how many times the step length is shortened
        
    Returns:
        array, float: (x,y) coordinates at the minimum, minimum target value
    """

    # accuracy scales
    scales = [1, 4**-1, 4**-2, 4**-3, 4**-4, 4**-5, 4**-6, 4**-7, 4**-8]
    if accuracy >= len(scales):
        accuracy = len(scales)
    
    previous_coordinates = coordinates

    # repeat the search using more and more accuracy (shorter steps)
    for scale in scales[:accuracy]:
    
        # The first search is started from the given coordinates.
        # The following searches are started from the coordinates
        # that *preceded* the minimum found in the previous search.
        # This is done because the true minimum could be before or after
        # the minimum value of the previous search.
        coordinates = previous_coordinates
        current_value = target(coordinates)
        
        # tag for monitoring if we are still finding decreasing values
        better = True
        n_step = 0
        
        # search as long as the values decrease, but do at most 100 steps
        while better and n_step < 100:

            n_step += 1

            # save the new coordinates
            new_coordinates = coordinates + scale*stepsize*direction
            new_value = target(new_coordinates)
                        
            # if the new coordinates yield a smaller value, keep searching
            if new_value < current_value:
                current_value = new_value
                previous_coordinates = coordinates    
                coordinates = new_coordinates
                better = True

            # If the target function is increasing, end the search
            # for this scale. Currently the minimum location is stored in
            # 'coordinates' and the location the was checked before that
            # is stored in 'previous_coordinates'.
            else:
                better = False
                            
    return coordinates, current_value
    


[docs]def random_displacement( coords, length ):
    """
    Randomly changes the given coordinate vector.
    
    Each coordinate is changed by a uniformly distributed random variable :math:`\\Delta x \\in [-L, L]`.

    The function returns the new coordinates as a new vector. The original vector is not affected.
    
    Args:
        coords (array): coordinates as a vector
        length (float): maximum change :math:`L`
        
    Returns:
        array: new coordinates
    """

    new_coords = coords + ( 2.0*random.random( size=len(coords) )-1.0 )*length

    return new_coords


[docs]def simulated_annealing( initial_guess, 
                         initial_temperature, 
                         final_temperature, 
                         n_steps,
                         max_step_length,
                         recording_interval = 10 ):
    """
    Searches for the minimum of the :meth:`target` function :math:`f`.
    
    The simulated annealing algorithm starts from the initial guess and searches for better
    solutions using a random walk. The process is controlled by a temperature-like quantity :math:`T`.
    The algorithm repeats the following:
    
        * Make a small random change in the current coordinates.
        * Calculate the change in target function due to this change, :math:`\\Delta f`.
        * If :math:`\\Delta f \\le 0` (new coordinates are better than the old ones), accept the change.
        * If the new solution is the best one yet, record it.
        * If :math:`\\Delta f > 0` (new coordinates are worse than the old ones), accept the change
          with probability :math:`P = e^{- \\Delta f / T}`.
        * Lower the temperature by a small amount.
        
    The algorithm has no specific stopping criteria. Instead, it continues searching for a
    predetermined period. 
    Once finished, the algorithm returns the coordinates where the minimum was found,
    the minimum target value and the path it took as a list of coordinates.
    
    Args:
        initial_guess (array): starting point (x,y)
        initial_temperature (float): value of :math:`T` in the beginning
        final_temperature (float): value of :math:`T` at the end
        n_steps (int): number of steps
        max_step_length (float): the maximum allowed change in each coordinate in one step
        recording_interval (int): records the current coordinates after this many steps
        
    Returns:
        array, float, array: final coordinates, final target value, list of coordinates        
    """
    
    current_value = target(initial_guess)
    best_value = current_value
    current_coordinates = initial_guess
    best_coordinates = initial_guess

    steps = [initial_guess]
    delta_temperature = ( final_temperature - initial_temperature ) / (n_steps - 1)

    for i in range(n_steps):
        temperature = initial_temperature + \
            i*delta_temperature

        new_coordinates = random_displacement( current_coordinates, max_step_length )
        new_value = target( new_coordinates )
        randomizer = random.random()

        if randomizer < exp( - (new_value - current_value) / temperature ):
            current_value = new_value
            current_coordinates = new_coordinates
        
            if i%recording_interval == 0:
                steps.append( current_coordinates )
    
            if current_value < best_value:
                best_value = current_value
                best_coordinates = current_coordinates

    return best_coordinates, best_value, steps





[docs]def conjugate_gradients_algorithm(initial_guess, max_gradient = 0.1, max_steps = 1000):
    """
    Searches for the minimum of the :meth:`target` function :math:`f`.
    
    The conjugate gradients algorithm starts from the initial guess and repeats the following:
    
        * Calculate the :meth:`gradient` :math:`\\vec{g}_i = \\nabla f` at current position.
        * Check the length of the gradient vector :math:`g_i = | \\vec{g}_i |` and
          stop if it is below a given threshold. If not, continue.
        * Calculate a mixing parameter. There are many ways to do this.
          This function uses 
          
          .. math ::

              \\beta_i = \\frac{ \\vec{g}_i \\cdot (\\vec{g}_i - \\vec{g}_{i-1}) }
                        { \\vec{g}_{i-1} \\cdot \\vec{g}_{i-1}} 
                        
        * Do a :meth:`linear_search` in the direction 
          :math:`\\vec{d}_i = - \\vec{g}_i + \\beta_i \\vec{d}_{i-1}`
          and move to the lowest point found. 
          (For the first step, :math:`\\vec{d}_i = - \\vec{g}_i`.)

    Once finished, the algorithm returns the coordinates where the minimum was found,
    the minimum target value and the path it took as a list of coordinates.

    .. note ::
        This function is incomplete!

    Args:
        initial_guess (array): starting point (x,y)
        max_gradient (float): stop if :math:`g` is less than this
        max_steps (int): stop if the iteration takes more than this many steps
        
    Returns:
        array, float, array: final coordinates, final target value, list of coordinates
    """

    if max_gradient < 0.001:
        accuracy = 6
    else:
        accuracy = 4
        
    coords = initial_guess
    n_step = 1
    steps = [coords]
    grad = gradient(coords) 
    direction = -grad
    g_norm = np.sqrt( direction @ direction )
    
    while g_norm > max_gradient and n_step < max_steps:
        n_step += 1
        coords, value = linear_search( coords, direction, accuracy ) 
        
        # todo
               
        direction = np.zeros(2)
        g_norm = np.sqrt( direction @ direction )
        steps.append(coords)

    return coords, value, steps



[docs]def deepest_descent_algorithm(initial_guess, max_gradient = 0.1, max_steps = 1000):
    """
    Searches for the minimum of the :meth:`target` function :math:`f`.
    
    The deepest descent algorithm starts from the initial guess and repeats the following:
    
        * Calculate the :meth:`gradient` :math:`\\vec{g} = \\nabla f` at current position.
        * Check the length of the gradient vector :math:`g = | \\vec{g} |` and
          stop if it is below a given threshold. If not, continue.
        * Do a :meth:`linear_search` in the direction :math:`\\vec{d} = - \\vec{g}`
          and move to the lowest point found.

    Once finished, the algorithm returns the coordinates where the minimum was found,
    the minimum target value and the path it took as a list of coordinates.

    .. note ::
        This function is incomplete!
        
    Args:
        initial_guess (array): starting point (x,y)
        max_gradient (float): stop if :math:`g` is less than this
        max_steps (int): stop if the iteration takes more than this many steps
        
    Returns:
        array, float, array: final coordinates, final target value, list of coordinates
    """

    coords = initial_guess
    n_step = 1
    steps = [coords]
    g_norm = max_gradient+1
    
    while g_norm > max_gradient and n_step < max_steps:
        n_step += 1
        value = 0.0
        
        # todo:
        # - calculate the direction for searching and save it in variable 'direction'
        # - do a linear search in this direction
        
        direction = np.zeros(2)
        g_norm = np.sqrt( direction @ direction )
        steps.append(coords)

    return coords, value, steps

[docs]def main(guess, algorithm = "dd"):
    """
    Find a minimum of :meth:`target` function.
    
    The function also visualizes the optimization process.
    
    Args:
        guess (array): Initial coordinates (x,y) as array. 
        algorithm (str): One of:
            *"dd"*: deepest descent gradient search,
            *"cg"*: conjugate gradients search,
            *"acg"*: more accurate conjugate gradients search,
            *"scipy"*: built-in scipy conjugate gradients search,
            *"sa"*: simulated annealing
    """


    if algorithm == "dd":
        # use deepest descent algorithm
        coords, value, steps = deepest_descent_algorithm(guess)

    elif algorithm == "cg":
        # use conjugate gradients algorithm
        coords, value, steps = conjugate_gradients_algorithm(guess)

    elif algorithm == "acg":
        # conjugate gradients with with more accuracy
        coords, value, steps = conjugate_gradients_algorithm(guess, 10**-4)

    elif algorithm == "scipy":
        # built-in scipy conjugate gradients optimizer
        coords, steps = opt.fmin_cg(target, guess, gradient, retall=1)
        value = target(coords)
        
    elif algorithm == "sa":
        # simulated annealing
        coords, value, steps = simulated_annealing(guess, 
                         0.5, 0.0001, 1000, 0.1 )

        
    else:
        print("not a valid algorithm")
        quit()
    
    # show solution
    print( "solution: ", coords )
    print( "value:    ", value )
    
    # show how the algorithm worked
    if alternative:
        show_convergence(steps, minx=-2, miny=-2, maxx=2, maxy=2)
        animate(steps, minx=-2, miny=-2, maxx=2, maxy=2)
    else:
        show_convergence(steps, minx=0, miny=0, maxx=3, maxy=3)
        animate(steps, minx=0, miny=0, maxx=3, maxy=3)
    


if __name__ == "__main__":
    random = default_rng()
    
    # change this to True to change the target function
    alternative = True
    
    # let the computer take a random guess
    if alternative:
        guess = random.random(size=2)*3-1.5
    else:
        guess = random.random(size=2)*3
        
    # or specify the initial guess yourself   
    # guess = np.array([1.5, 1.5])
    
    main(guess, "dd")