Implement 2d Euler equations & rework `@with_signature`
Realized that the current @with_signature
interface is not sufficient to conveniently operate with normal vectors in more than 1D and only seems to get in the way when working with numerical fluxes in 2d.
Because of that I started a rework: Move away from the Riemann solver interfaces where we are pretty strict in the sense of what method signatures a ApproxRiemannSolver
demands (e.g. flux, speed
must both accept the same number of variables, etc.)
Instead, @with_signature
now can use multiple @accepts
statements where the variable names can be prefixed to disambiguate them in the body, e.g.
@with_signature function numflux(eq::EulerEquation2d)
@accepts Prefix(lhs), rho, qx, qy, E
@accepts Prefix(lhs), rho, qx, qy, E
@accepts nx, ny
# compute something
@returns nflx_rho, nflx_qx, nflx_qy, nflx_E
end
translates to
function numflux(args1, args2, args3, eq::EulerEquation2d)
ls_rho, lhs_qx, lhs_qy, lhs_E = args1
rhs_rho, rhs_qx, rhs_qy, rhs_E = args2
nx, ny = args3
# compute something
return nflx_rho, nflx_qx, nflx_qy, nflx_E
end
Doing it that way now requires the implementer of equations also write a function to compute numerical fluxes (which we probably wanted anyways for finer control).
He must also be careful what he handles over to broadcast_faces!, broadcast_volume!
, especially with broadcast_faces!
, because we cut out the sanity checks from ApproxRiemannSolver
.
But here are also benefits:
- Makes it traceable/convinient to implement 2d equations
- This new interface now allows to reuse previously cached results, e.g. form running
broadcast_volume!(flux,...)
beforebroadcast_faces!(llf_rho,...)
(this was also possible before, but needed a work around withState(..)
etc. - (only tested with
EulerEquation2d
and the very simple implementation I had added in the first commit of this PR) the new interface provides a speed up in the RHS evaluation, likely because of the previous point - We likely can remove the
RiemannSolver
interface for now - Omplementation of
broadcast_volume!, broadcast_faces!
is simplified (at least it seemed so initially) - In theory this system now allows to reduce the number of times we have to transverse the volume/faces for function evaluations, e.g. instead of going over all faces four times for the numerical flux of
rho,qx,qy,E
inEulerEquation2d
, we can now implement onenumflux
function that does that at all at once. However, I soonish will hit the max tuple length limit (=32) for which inlining tuples will become costly. Will need to work around that with some kind ofOnionTuples
. I add a draft of that below.
Draft OnionTuples
using BenchmarkTools
const MAXLENGTH=32
struct OnionTuple{T,Ntotal,Nrest,Nfilled}
# TODO why not split this into rest and filled
tpls::Tuple{NTuple{Nrest,T},Vararg{NTuple{MAXLENGTH,T},Nfilled}}
# rest::NTuple{Nrest,T}
# filled::NTuple{Nfilled,NTuple{MAXLENGTH,T}}
function OnionTuple(rest::NTuple{Nrest,T},filled::NTuple{MAXLENGTH,T}...) where {Nrest,T}
Nfilled = length(filled)
Ntotal = Nrest + Nfilled*MAXLENGTH
@assert Nrest <= 32
@assert 1 + Nfilled <= 32
return new{T,Ntotal,Nrest,Nfilled}((rest,filled...))
# return new{T,Ntotal,Nrest,Nfilled}(rest,filled)
end
function OnionTuple(rest::NTuple{Nrest,T}) where {Nrest,T}
Nfilled = 0
Ntotal = Nrest
@assert Nrest <= 32
return new{T,Ntotal,Nrest,Nfilled}((rest,))
# return new{T,Ntotal,Nrest,Nfilled}(rest,())
end
end
Base.length(tpl::OnionTuple{T,Ntotal}) where {T,Ntotal} = Int64(Ntotal)
# tuple unpacking
Base.indexed_iterate(o::OnionTuple, i::Int, state=1) = (@inline; (o[i], i+1))
# function index(Nfilled,Nrest,i)
# if i <= Nfilled*MAXLENGTH
# n = div(i,MAXLENGTH)
# j = i % MAXLENGTH
# j = j == 0 ? 1 : j
# return (n,j)
# else
# return (0,i-Nfilled*MAXLENGTH)
# end
# end
# function Base.getindex(tpl::OnionTuple{T,Ntotal,Nrest,Nfilled}, i::Integer) where {T,Ntotal,Nrest,Nfilled}
# n,j = index(Nfilled,Nrest,i)
# if n == 0
# tpl.rest[j]
# else
# tpl.filled[n][j]
# end
# end
function Base.getindex(tpl::OnionTuple{T,Ntotal,Nrest,Nfilled}, i::Integer) where {T,Ntotal,Nrest,Nfilled}
n, j = if i <= Nfilled*MAXLENGTH
ii = i-1
n = div(ii,MAXLENGTH)+1+1
j = (ii % MAXLENGTH)+1
n, j
else
1, i-Nfilled*MAXLENGTH
end
return tpl.tpls[n][j]
end
# function Base.getindex(tpl::OnionTuple{T,Ntotal,Nrest,Nfilled}, i::Integer) where {T,Ntotal,Nrest,Nfilled}
# # @assert 1 <= i <= Ntotal
# # el = if i <= Nfilled*MAXLENGTH
# # n = div(i,MAXLENGTH)
# # j = i % MAXLENGTH
# # j = j == 0 ? 1 : j
# # @inbounds tpl.filled[n][j]
# # else
# # @inbounds tpl.rest[i-Nfilled*MAXLENGTH]
# # end
# # return el
# # return tpl.filled[i]
# end
# function compute_something(tpl::OnionTuple{Int64})
# t1, t2, t3 = tpl[1], tpl[2], tpl[3]
# return t1 + t2 + t3
# end
function kernel(tpls)
tpl1, tpl2 = tpls
a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12,a13,a14,a15,a16,a17,a18,a19,a20,a21,a22,a23,a24,a25,a26,a27,a28,a29,a30,a31,a32 = tpl1
a33, = tpl2
r = a1+a2+a3+a4+a5+a6+a7+a8+a9+a10+
a11+a12+a13+a14+a15+a16+a17+a18+a19+a20+
a21+a22+a23+a24+a25+a26+a27+a28+a29+a30+
a31+a32+a33
# r = 10*a1
return (r,)
end
function kernel(tpl::OnionTuple)
a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12,a13,a14,a15,a16,a17,a18,a19,a20,a21,a22,a23,a24,a25,a26,a27,a28,a29,a30,a31,a32,a33 = tpl
# a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12,a13,a14,a15,a16,a17,a18,a19,a20,a21,a22,a23,a24,a25,a26,a27,a28,a29,a30,a31,a32,a33 =
# tpl[1],tpl[2],tpl[3],tpl[4],tpl[5],tpl[6],tpl[7],tpl[8],tpl[9],tpl[10],tpl[11],tpl[12],tpl[13],tpl[14],tpl[15],tpl[16],tpl[17],tpl[18],tpl[19],tpl[20],tpl[21],tpl[22],tpl[23],tpl[24],tpl[25],tpl[26],tpl[27],tpl[28],tpl[29],tpl[30],tpl[31],tpl[32],tpl[33]
r = a1+a2+a3+a4+a5+a6+a7+a8+a9+a10+
a11+a12+a13+a14+a15+a16+a17+a18+a19+a20+
a21+a22+a23+a24+a25+a26+a27+a28+a29+a30+
a31+a32+a33
# r = 10*a1
return (r,)
end
N = 100
tpl1 = Tuple( randn() for _ = 1:MAXLENGTH )
tpl2 = Tuple( randn() for _ = 1:1 )
tpls = (tpl1,tpl2)
kernel(tpls)
onion = OnionTuple(tpl2,tpl1)
kernel(onion)
b1 = @benchmark kernel($tpls)
display(b1)
b2 = @benchmark kernel($onion)
display(b2)