Ginzburg-Landau model energy minimization

Original

Optimized

In this example a basic Ginzburg-Landau model is solved. This example gives an idea of how the API together with ForwardDiff can be leveraged to performantly solve non standard problems on a FEM grid. A large portion of the code is there only for performance reasons, but since this usually really matters and is what takes the most time to optimize, it is included.

The key to using a method like this for minimizing a free energy function directly, rather than the weak form, as is usually done with FEM, is to split up the gradient and Hessian calculations. This means that they are performed for each cell separately instead of for the grid as a whole.

using ForwardDiff
import ForwardDiff: GradientConfig, HessianConfig, Chunk
using Ferrite
using Optim, LineSearches
using SparseArrays
using Tensors
using Base.Threads

Energy terms

4th order Landau free energy

function Fl(P::Vec{3, T}, α::Vec{3}) where {T}
    P2 = Vec{3, T}((P[1]^2, P[2]^2, P[3]^2))
    return α[1] * sum(P2) +
        α[2] * (P[1]^4 + P[2]^4 + P[3]^4) +
        α[3] * ((P2[1] * P2[2] + P2[2] * P2[3]) + P2[1] * P2[3])
end

Fl (generic function with 1 method)

Ginzburg free energy

@inline Fg(∇P, G) = 0.5(∇P ⊡ G) ⊡ ∇P

Fg (generic function with 1 method)

GL free energy

F(P, ∇P, params) = Fl(P, params.α) + Fg(∇P, params.G)

F (generic function with 1 method)

Parameters that characterize the model

struct ModelParams{V, T}
    α::V
    G::T
end

ThreadCache

This holds the values that each thread will use during the assembly.

struct ThreadCache{CV, T, DIM, F <: Function, GC <: GradientConfig, HC <: HessianConfig}
    cvP::CV
    element_indices::Vector{Int}
    element_dofs::Vector{T}
    element_gradient::Vector{T}
    element_hessian::Matrix{T}
    element_coords::Vector{Vec{DIM, T}}
    element_potential::F
    gradconf::GC
    hessconf::HC
end
function ThreadCache(dpc::Int, nodespercell, cvP::CellValues, modelparams, elpotential)
    element_indices = zeros(Int, dpc)
    element_dofs = zeros(dpc)
    element_gradient = zeros(dpc)
    element_hessian = zeros(dpc, dpc)
    element_coords = zeros(Vec{3, Float64}, nodespercell)
    potfunc = x -> elpotential(x, cvP, modelparams)
    gradconf = GradientConfig(potfunc, zeros(dpc), Chunk{12}())
    hessconf = HessianConfig(potfunc, zeros(dpc), Chunk{4}())
    return ThreadCache(cvP, element_indices, element_dofs, element_gradient, element_hessian, element_coords, potfunc, gradconf, hessconf)
end

Main.ThreadCache

The Model

everything is combined into a model.

mutable struct LandauModel{T, DH <: DofHandler, CH <: ConstraintHandler, TC <: ThreadCache}
    dofs::Vector{T}
    dofhandler::DH
    boundaryconds::CH
    threadindices::Vector{Vector{Int}}
    threadcaches::Vector{TC}
end

function LandauModel(α, G, gridsize, left::Vec{DIM, T}, right::Vec{DIM, T}, elpotential) where {DIM, T}
    grid = generate_grid(Tetrahedron, gridsize, left, right)
    threadindices = Ferrite.create_coloring(grid)

    qr = QuadratureRule{RefTetrahedron}(2)
    ipP = Lagrange{RefTetrahedron, 1}()^3
    cvP = CellValues(qr, ipP)

    dofhandler = DofHandler(grid)
    add!(dofhandler, :P, ipP)
    close!(dofhandler)

    dofvector = zeros(ndofs(dofhandler))
    startingconditions!(dofvector, dofhandler)
    boundaryconds = ConstraintHandler(dofhandler)
    #boundary conditions can be added but aren't necessary for optimization
    #add!(boundaryconds, Dirichlet(:P, getfacetset(grid, "left"), (x, t) -> [0.0,0.0,0.53], [1,2,3]))
    #add!(boundaryconds, Dirichlet(:P, getfacetset(grid, "right"), (x, t) -> [0.0,0.0,-0.53], [1,2,3]))
    close!(boundaryconds)
    update!(boundaryconds, 0.0)

    apply!(dofvector, boundaryconds)

    hessian = allocate_matrix(dofhandler)
    dpc = ndofs_per_cell(dofhandler)
    cpc = length(grid.cells[1].nodes)
    caches = [ThreadCache(dpc, cpc, copy(cvP), ModelParams(α, G), elpotential) for t in 1:nthreads()]
    return LandauModel(dofvector, dofhandler, boundaryconds, threadindices, caches)
end

Main.LandauModel

utility to quickly save a model

function save_landau(path, model, dofs = model.dofs)
    VTKGridFile(path, model.dofhandler) do vtk
        write_solution(vtk, model.dofhandler, dofs)
    end
    return
end

save_landau (generic function with 2 methods)

Assembly

This macro defines most of the assembly step, since the structure is the same for the energy, gradient and Hessian calculations.

macro assemble!(innerbody)
    return esc(
        quote
            dofhandler = model.dofhandler
            for indices in model.threadindices
                @threads for i in indices
                    cache = model.threadcaches[threadid()]
                    eldofs = cache.element_dofs
                    nodeids = dofhandler.grid.cells[i].nodes
                    for j in 1:length(cache.element_coords)
                        cache.element_coords[j] = dofhandler.grid.nodes[nodeids[j]].x
                    end
                    reinit!(cache.cvP, cache.element_coords)

                    celldofs!(cache.element_indices, dofhandler, i)
                    for j in 1:length(cache.element_dofs)
                        eldofs[j] = dofvector[cache.element_indices[j]]
                    end
                    $innerbody
                end
            end
        end
    )
end

@assemble! (macro with 1 method)

This calculates the total energy calculation of the grid

function F(dofvector::Vector{T}, model) where {T}
    outs = fill(zero(T), nthreads())
    @assemble! begin
        outs[threadid()] += cache.element_potential(eldofs)
    end
    return sum(outs)
end

F (generic function with 2 methods)

The gradient calculation for each dof

function ∇F!(∇f::Vector{T}, dofvector::Vector{T}, model::LandauModel{T}) where {T}
    fill!(∇f, zero(T))
    @assemble! begin
        ForwardDiff.gradient!(cache.element_gradient, cache.element_potential, eldofs, cache.gradconf)
        @inbounds assemble!(∇f, cache.element_indices, cache.element_gradient)
    end
    return
end

∇F! (generic function with 1 method)

The Hessian calculation for the whole grid

function ∇²F!(∇²f::SparseMatrixCSC, dofvector::Vector{T}, model::LandauModel{T}) where {T}
    assemblers = [start_assemble(∇²f) for t in 1:nthreads()]
    @assemble! begin
        ForwardDiff.hessian!(cache.element_hessian, cache.element_potential, eldofs, cache.hessconf)
        @inbounds assemble!(assemblers[threadid()], cache.element_indices, cache.element_hessian)
    end
    return
end

∇²F! (generic function with 1 method)

We can also calculate all things in one go!

function calcall(∇²f::SparseMatrixCSC, ∇f::Vector{T}, dofvector::Vector{T}, model::LandauModel{T}) where {T}
    outs = fill(zero(T), nthreads())
    fill!(∇f, zero(T))
    assemblers = [start_assemble(∇²f, ∇f) for t in 1:nthreads()]
    @assemble! begin
        outs[threadid()] += cache.element_potential(eldofs)
        ForwardDiff.hessian!(cache.element_hessian, cache.element_potential, eldofs, cache.hessconf)
        ForwardDiff.gradient!(cache.element_gradient, cache.element_potential, eldofs, cache.gradconf)
        @inbounds assemble!(assemblers[threadid()], cache.element_indices, cache.element_gradient, cache.element_hessian)
    end
    return sum(outs)
end

calcall (generic function with 1 method)

Minimization

Now everything can be combined to minimize the energy, and find the equilibrium configuration.

function minimize!(model; kwargs...)
    dh = model.dofhandler
    dofs = model.dofs
    ∇f = fill(0.0, length(dofs))
    ∇²f = allocate_matrix(dh)
    function g!(storage, x)
        ∇F!(storage, x, model)
        return apply_zero!(storage, model.boundaryconds)
    end
    function h!(storage, x)
        return ∇²F!(storage, x, model)
        # apply!(storage, model.boundaryconds)
    end
    f(x) = F(x, model)

    od = TwiceDifferentiable(f, g!, h!, model.dofs, 0.0, ∇f, ∇²f)

    # this way of minimizing is only beneficial when the initial guess is completely off,
    # then a quick couple of ConjuageGradient steps brings us easily closer to the minimum.
    # res = optimize(od, model.dofs, ConjugateGradient(linesearch=BackTracking()), Optim.Options(show_trace=true, show_every=1, g_tol=1e-20, iterations=10))
    # model.dofs .= res.minimizer
    # to get the final convergence, Newton's method is more ideal since the energy landscape should be almost parabolic
    ##+
    res = optimize(od, model.dofs, Newton(linesearch = BackTracking()), Optim.Options(show_trace = true, show_every = 1, g_tol = 1.0e-20))
    model.dofs .= res.minimizer
    return res
end

minimize! (generic function with 1 method)

Testing it

This calculates the contribution of each element to the total energy, it is also the function that will be put through ForwardDiff for the gradient and Hessian.

function element_potential(eldofs::AbstractVector{T}, cvP, params) where {T}
    energy = zero(T)
    for qp in 1:getnquadpoints(cvP)
        P = function_value(cvP, qp, eldofs)
        ∇P = function_gradient(cvP, qp, eldofs)
        energy += F(P, ∇P, params) * getdetJdV(cvP, qp)
    end
    return energy
end

element_potential (generic function with 1 method)

now we define some starting conditions

function startingconditions!(dofvector, dofhandler)
    for cell in CellIterator(dofhandler)
        globaldofs = celldofs(cell)
        it = 1
        for i in 1:3:length(globaldofs)
            dofvector[globaldofs[i]] = -2.0
            dofvector[globaldofs[i + 1]] = 2.0
            dofvector[globaldofs[i + 2]] = -2.0tanh(cell.coords[it][1] / 20)
            it += 1
        end
    end
    return
end

δ(i, j) = i == j ? one(i) : zero(i)
V2T(p11, p12, p44) = Tensor{4, 3}((i, j, k, l) -> p11 * δ(i, j) * δ(k, l) * δ(i, k) + p12 * δ(i, j) * δ(k, l) * (1 - δ(i, k)) + p44 * δ(i, k) * δ(j, l) * (1 - δ(i, j)))

G = V2T(1.0e2, 0.0, 1.0e2)
α = Vec{3}((-1.0, 1.0, 1.0))
left = Vec{3}((-75.0, -25.0, -2.0))
right = Vec{3}((75.0, 25.0, 2.0))
model = LandauModel(α, G, (50, 50, 2), left, right, element_potential)

save_landau("landauorig", model)
@time minimize!(model)
save_landau("landaufinal", model)

Iter     Function value   Gradient norm
     0     2.127588e+06     3.597094e+02
 * time: 0.00010800361633300781
     1     3.786155e+05     1.047687e+02
 * time: 2.874967098236084
     2     5.306125e+04     2.978953e+01
 * time: 5.700730085372925
     3    -2.642320e+03     7.943136e+00
 * time: 8.523725032806396
     4    -1.027484e+04     1.752693e+00
 * time: 11.346935987472534
     5    -1.084925e+04     2.157295e-01
 * time: 14.190430164337158
     6    -1.085880e+04     5.288877e-03
 * time: 17.008180141448975
     7    -1.085881e+04     3.478176e-06
 * time: 19.82645297050476
     8    -1.085881e+04     1.596091e-12
 * time: 22.68190097808838
     9    -1.085881e+04     1.444882e-12
 * time: 25.53494906425476
    10    -1.085881e+04     1.548831e-13
 * time: 28.352108001708984
    11    -1.085881e+04     1.548831e-13
 * time: 31.271126985549927
    12    -1.085881e+04     1.508030e-13
 * time: 31.717130184173584
 37.720729 seconds (10.40 M allocations: 3.280 GiB, 0.32% gc time, 9.01% compilation time)

as we can see this runs very quickly even for relatively large gridsizes. The key to get high performance like this is to minimize the allocations inside the threaded loops, ideally to 0.

This page was generated using Literate.jl.