Skip to content
Snippets Groups Projects
Commit 625b8bd9 authored by Florian Atteneder's avatar Florian Atteneder
Browse files

'unroll' llf, av_nflux_2d methods to see how much perf can be gained over...

'unroll' llf, av_nflux_2d methods to see how much perf can be gained over current broadcast_faces_2! impl
parent ecadfab9
No related tags found
1 merge request!55Add LV to optimize `dg_rhs` loops
Pipeline #5850 failed
......@@ -483,3 +483,289 @@ end
@returns nflx_rho, nflx_qx, nflx_qy, nflx_E
end
function av_nflux_2d(eq::EulerEquation2d, mesh::Mesh2d)
@unpack ldg_rho_x, ldg_rho_y, ldg_qx_x, ldg_qx_y, ldg_qy_x, ldg_qy_y,
ldg_E_x, ldg_E_y, smoothed_mu = get_static_variables(mesh.cache)
@unpack nflx_rho, nflx_qx, nflx_qy, nflx_E, nx, ny = get_bdry_variables(mesh.cache)
@inbounds for icell in mesh.bulkidxs
cell_in = mesh.tree.cells[icell]
bulk_in = cellindices(mesh, icell)
rng_lhs, rng_rhs, rng_down, rng_up = dg1d.faceindices(mesh, icell)
if dg1d.has_neighbor(cell_in, Cart2d.Xmin)
cell_out = cell_in.neighbors[Cart2d.Xmin]
bulk_out = cellindices(mesh, cell_out)
for (i, fi) in enumerate(rng_lhs)
bo = bulk_out[end,i]
bi = bulk_in[1,i]
nxi, nyi = nx[fi], ny[fi]
lhs_nflx = nxi*ldg_rho_x[bi] + nyi*ldg_rho_y[bi]
rhs_nflx = nxi*ldg_rho_x[bo] + nyi*ldg_rho_y[bo]
nflx_rho[fi] += 0.5 * (smoothed_mu[bi]*lhs_nflx + smoothed_mu[bo]*rhs_nflx)
lhs_nflx = nxi*ldg_qx_x[bi] + nyi*ldg_qx_y[bi]
rhs_nflx = nxi*ldg_qx_x[bo] + nyi*ldg_qx_y[bo]
nflx_qx[fi] += 0.5 * (smoothed_mu[bi]*lhs_nflx + smoothed_mu[bo]*rhs_nflx)
lhs_nflx = nxi*ldg_qy_x[bi] + nyi*ldg_qy_y[bi]
rhs_nflx = nxi*ldg_qy_x[bo] + nyi*ldg_qy_y[bo]
nflx_qy[fi] += 0.5 * (smoothed_mu[bi]*lhs_nflx + smoothed_mu[bo]*rhs_nflx)
lhs_nflx = nxi*ldg_E_x[bi] + nyi*ldg_E_y[bi]
rhs_nflx = nxi*ldg_E_x[bo] + nyi*ldg_E_y[bo]
nflx_E[fi] += 0.5 * (smoothed_mu[bi]*lhs_nflx + smoothed_mu[bo]*rhs_nflx)
end
end
if dg1d.has_neighbor(cell_in, Cart2d.Xmax)
cell_out = cell_in.neighbors[Cart2d.Xmax]
bulk_out = cellindices(mesh, cell_out)
for (i, fi) in enumerate(rng_rhs)
bo = bulk_out[1,i]
bi = bulk_in[end,i]
nxi, nyi = nx[fi], ny[fi]
lhs_nflx = nxi*ldg_rho_x[bi] + nyi*ldg_rho_y[bi]
rhs_nflx = nxi*ldg_rho_x[bo] + nyi*ldg_rho_y[bo]
nflx_rho[fi] += 0.5 * (smoothed_mu[bi]*lhs_nflx + smoothed_mu[bo]*rhs_nflx)
lhs_nflx = nxi*ldg_qx_x[bi] + nyi*ldg_qx_y[bi]
rhs_nflx = nxi*ldg_qx_x[bo] + nyi*ldg_qx_y[bo]
nflx_qx[fi] += 0.5 * (smoothed_mu[bi]*lhs_nflx + smoothed_mu[bo]*rhs_nflx)
lhs_nflx = nxi*ldg_qy_x[bi] + nyi*ldg_qy_y[bi]
rhs_nflx = nxi*ldg_qy_x[bo] + nyi*ldg_qy_y[bo]
nflx_qy[fi] += 0.5 * (smoothed_mu[bi]*lhs_nflx + smoothed_mu[bo]*rhs_nflx)
lhs_nflx = nxi*ldg_E_x[bi] + nyi*ldg_E_y[bi]
rhs_nflx = nxi*ldg_E_x[bo] + nyi*ldg_E_y[bo]
nflx_E[fi] += 0.5 * (smoothed_mu[bi]*lhs_nflx + smoothed_mu[bo]*rhs_nflx)
end
end
if dg1d.has_neighbor(cell_in, Cart2d.Ymin)
cell_out = cell_in.neighbors[Cart2d.Ymin]
bulk_out = cellindices(mesh, cell_out)
for (i, fi) in enumerate(rng_down)
bo = bulk_out[i,end]
bi = bulk_in[i,1]
nxi, nyi = nx[fi], ny[fi]
lhs_nflx = nxi*ldg_rho_x[bi] + nyi*ldg_rho_y[bi]
rhs_nflx = nxi*ldg_rho_x[bo] + nyi*ldg_rho_y[bo]
nflx_rho[fi] += 0.5 * (smoothed_mu[bi]*lhs_nflx + smoothed_mu[bo]*rhs_nflx)
lhs_nflx = nxi*ldg_qx_x[bi] + nyi*ldg_qx_y[bi]
rhs_nflx = nxi*ldg_qx_x[bo] + nyi*ldg_qx_y[bo]
nflx_qx[fi] += 0.5 * (smoothed_mu[bi]*lhs_nflx + smoothed_mu[bo]*rhs_nflx)
lhs_nflx = nxi*ldg_qy_x[bi] + nyi*ldg_qy_y[bi]
rhs_nflx = nxi*ldg_qy_x[bo] + nyi*ldg_qy_y[bo]
nflx_qy[fi] += 0.5 * (smoothed_mu[bi]*lhs_nflx + smoothed_mu[bo]*rhs_nflx)
lhs_nflx = nxi*ldg_E_x[bi] + nyi*ldg_E_y[bi]
rhs_nflx = nxi*ldg_E_x[bo] + nyi*ldg_E_y[bo]
nflx_E[fi] += 0.5 * (smoothed_mu[bi]*lhs_nflx + smoothed_mu[bo]*rhs_nflx)
end
end
if dg1d.has_neighbor(cell_in, Cart2d.Ymax)
cell_out = cell_in.neighbors[Cart2d.Ymax]
bulk_out = cellindices(mesh, cell_out)
for (i, fi) in enumerate(rng_up)
bo = bulk_out[i,1]
bi = bulk_in[i,end]
nxi, nyi = nx[fi], ny[fi]
lhs_nflx = nxi*ldg_rho_x[bi] + nyi*ldg_rho_y[bi]
rhs_nflx = nxi*ldg_rho_x[bo] + nyi*ldg_rho_y[bo]
nflx_rho[fi] += 0.5 * (smoothed_mu[bi]*lhs_nflx + smoothed_mu[bo]*rhs_nflx)
lhs_nflx = nxi*ldg_qx_x[bi] + nyi*ldg_qx_y[bi]
rhs_nflx = nxi*ldg_qx_x[bo] + nyi*ldg_qx_y[bo]
nflx_qx[fi] += 0.5 * (smoothed_mu[bi]*lhs_nflx + smoothed_mu[bo]*rhs_nflx)
lhs_nflx = nxi*ldg_qy_x[bi] + nyi*ldg_qy_y[bi]
rhs_nflx = nxi*ldg_qy_x[bo] + nyi*ldg_qy_y[bo]
nflx_qy[fi] += 0.5 * (smoothed_mu[bi]*lhs_nflx + smoothed_mu[bo]*rhs_nflx)
lhs_nflx = nxi*ldg_E_x[bi] + nyi*ldg_E_y[bi]
rhs_nflx = nxi*ldg_E_x[bo] + nyi*ldg_E_y[bo]
nflx_E[fi] += 0.5 * (smoothed_mu[bi]*lhs_nflx + smoothed_mu[bo]*rhs_nflx)
end
end
end
end
# @with_signature function llf(equation::EulerEquation2d)
# @accepts Prefix(lhs), rho, flx_rho_x, flx_rho_y, qx, flx_qx_x, flx_qx_y, qy, flx_qy_y, E, flx_E_x, flx_E_y, v
# @accepts Prefix(rhs), rho, flx_rho_x, flx_rho_y, qx, flx_qx_x, flx_qx_y, qy, flx_qy_y, E, flx_E_x, flx_E_y, v
# @accepts nx, ny
#
# vmax = absmax(lhs_v, rhs_v)
#
# lhs_nflx = nx*lhs_flx_rho_x + ny*lhs_flx_rho_y
# rhs_nflx = nx*rhs_flx_rho_x + ny*rhs_flx_rho_y
# nflx_rho = LLF(lhs_nflx, rhs_nflx, lhs_rho, rhs_rho, vmax)
#
# lhs_nflx = nx*lhs_flx_qx_x + ny*lhs_flx_qx_y
# rhs_nflx = nx*rhs_flx_qx_x + ny*rhs_flx_qx_y
# nflx_qx = LLF(lhs_nflx, rhs_nflx, lhs_qx, rhs_qx, vmax)
#
# lhs_flx_qy_x = lhs_flx_qx_y
# rhs_flx_qy_x = rhs_flx_qx_y
# lhs_nflx = nx*lhs_flx_qy_x + ny*lhs_flx_qy_y
# rhs_nflx = nx*rhs_flx_qy_x + ny*rhs_flx_qy_y
# nflx_qy = LLF(lhs_nflx, rhs_nflx, lhs_qy, rhs_qy, vmax)
#
# lhs_nflx = nx*lhs_flx_E_x + ny*lhs_flx_E_y
# rhs_nflx = nx*rhs_flx_E_x + ny*rhs_flx_E_y
# nflx_E = LLF(lhs_nflx, rhs_nflx, lhs_E, rhs_E, vmax)
#
# @returns nflx_rho, nflx_qx, nflx_qy, nflx_E
# end
function llf(equation::EulerEquation2d, mesh::Mesh2d)
@unpack rho, qx, qy, E = get_dynamic_variables(mesh.cache)
@unpack flx_rho_x, flx_rho_y, flx_qx_x, flx_qx_y, flx_qy_y,
flx_E_x, flx_E_y, v = get_static_variables(mesh.cache)
@unpack nx, ny, nflx_rho, nflx_qx, nflx_qy, nflx_E = get_bdry_variables(mesh.cache)
flx_qy_x = flx_qx_y
@inbounds for icell in mesh.bulkidxs
cell_in = mesh.tree.cells[icell]
bulk_in = cellindices(mesh, icell)
rng_lhs, rng_rhs, rng_down, rng_up = dg1d.faceindices(mesh, icell)
if dg1d.has_neighbor(cell_in, Cart2d.Xmin)
cell_out = cell_in.neighbors[Cart2d.Xmin]
bulk_out = cellindices(mesh, cell_out)
for (i, fi) in enumerate(rng_lhs)
bo = bulk_out[end,i]
bi = bulk_in[1,i]
vmax = absmax(v[bi], v[bo])
nxi, nyi = nx[fi], ny[fi]
lhs_nflx = nxi*flx_rho_x[bi] + nyi*flx_rho_y[bi]
rhs_nflx = nxi*flx_rho_x[bo] + nyi*flx_rho_y[bo]
nflx_rho[fi] = LLF(lhs_nflx, rhs_nflx, rho[bi], rho[bo], vmax)
lhs_nflx = nxi*flx_qx_x[bi] + nyi*flx_qx_y[bi]
rhs_nflx = nxi*flx_qx_x[bo] + nyi*flx_qx_y[bo]
nflx_qx[fi] = LLF(lhs_nflx, rhs_nflx, qx[bi], qx[bo], vmax)
lhs_nflx = nxi*flx_qy_x[bi] + nyi*flx_qy_y[bi]
rhs_nflx = nxi*flx_qy_x[bo] + nyi*flx_qy_y[bo]
nflx_qy[fi] = LLF(lhs_nflx, rhs_nflx, qy[bi], qy[bo], vmax)
lhs_nflx = nxi*flx_E_x[bi] + nyi*flx_E_y[bi]
rhs_nflx = nxi*flx_E_x[bo] + nyi*flx_E_y[bo]
nflx_E[fi] = LLF(lhs_nflx, rhs_nflx, E[bi], E[bo], vmax)
end
end
if dg1d.has_neighbor(cell_in, Cart2d.Xmax)
cell_out = cell_in.neighbors[Cart2d.Xmax]
bulk_out = cellindices(mesh, cell_out)
for (i, fi) in enumerate(rng_rhs)
bo = bulk_out[1,i]
bi = bulk_in[end,i]
vmax = absmax(v[bi], v[bo])
nxi, nyi = nx[fi], ny[fi]
lhs_nflx = nxi*flx_rho_x[bi] + nyi*flx_rho_y[bi]
rhs_nflx = nxi*flx_rho_x[bo] + nyi*flx_rho_y[bo]
nflx_rho[fi] = LLF(lhs_nflx, rhs_nflx, rho[bi], rho[bo], vmax)
lhs_nflx = nxi*flx_qx_x[bi] + nyi*flx_qx_y[bi]
rhs_nflx = nxi*flx_qx_x[bo] + nyi*flx_qx_y[bo]
nflx_qx[fi] = LLF(lhs_nflx, rhs_nflx, qx[bi], qx[bo], vmax)
lhs_nflx = nxi*flx_qy_x[bi] + nyi*flx_qy_y[bi]
rhs_nflx = nxi*flx_qy_x[bo] + nyi*flx_qy_y[bo]
nflx_qy[fi] = LLF(lhs_nflx, rhs_nflx, qy[bi], qy[bo], vmax)
lhs_nflx = nxi*flx_E_x[bi] + nyi*flx_E_y[bi]
rhs_nflx = nxi*flx_E_x[bo] + nyi*flx_E_y[bo]
nflx_E[fi] = LLF(lhs_nflx, rhs_nflx, E[bi], E[bo], vmax)
end
end
if dg1d.has_neighbor(cell_in, Cart2d.Ymin)
cell_out = cell_in.neighbors[Cart2d.Ymin]
bulk_out = cellindices(mesh, cell_out)
for (i, fi) in enumerate(rng_down)
bo = bulk_out[i,end]
bi = bulk_in[i,1]
vmax = absmax(v[bi], v[bo])
nxi, nyi = nx[fi], ny[fi]
lhs_nflx = nxi*flx_rho_x[bi] + nyi*flx_rho_y[bi]
rhs_nflx = nxi*flx_rho_x[bo] + nyi*flx_rho_y[bo]
nflx_rho[fi] = LLF(lhs_nflx, rhs_nflx, rho[bi], rho[bo], vmax)
lhs_nflx = nxi*flx_qx_x[bi] + nyi*flx_qx_y[bi]
rhs_nflx = nxi*flx_qx_x[bo] + nyi*flx_qx_y[bo]
nflx_qx[fi] = LLF(lhs_nflx, rhs_nflx, qx[bi], qx[bo], vmax)
lhs_nflx = nxi*flx_qy_x[bi] + nyi*flx_qy_y[bi]
rhs_nflx = nxi*flx_qy_x[bo] + nyi*flx_qy_y[bo]
nflx_qy[fi] = LLF(lhs_nflx, rhs_nflx, qy[bi], qy[bo], vmax)
lhs_nflx = nxi*flx_E_x[bi] + nyi*flx_E_y[bi]
rhs_nflx = nxi*flx_E_x[bo] + nyi*flx_E_y[bo]
nflx_E[fi] = LLF(lhs_nflx, rhs_nflx, E[bi], E[bo], vmax)
end
end
if dg1d.has_neighbor(cell_in, Cart2d.Ymax)
cell_out = cell_in.neighbors[Cart2d.Ymax]
bulk_out = cellindices(mesh, cell_out)
for (i, fi) in enumerate(rng_up)
bo = bulk_out[i,1]
bi = bulk_in[i,end]
vmax = absmax(v[bi], v[bo])
nxi, nyi = nx[fi], ny[fi]
lhs_nflx = nxi*flx_rho_x[bi] + nyi*flx_rho_y[bi]
rhs_nflx = nxi*flx_rho_x[bo] + nyi*flx_rho_y[bo]
nflx_rho[fi] = LLF(lhs_nflx, rhs_nflx, rho[bi], rho[bo], vmax)
lhs_nflx = nxi*flx_qx_x[bi] + nyi*flx_qx_y[bi]
rhs_nflx = nxi*flx_qx_x[bo] + nyi*flx_qx_y[bo]
nflx_qx[fi] = LLF(lhs_nflx, rhs_nflx, qx[bi], qx[bo], vmax)
lhs_nflx = nxi*flx_qy_x[bi] + nyi*flx_qy_y[bi]
rhs_nflx = nxi*flx_qy_x[bo] + nyi*flx_qy_y[bo]
nflx_qy[fi] = LLF(lhs_nflx, rhs_nflx, qy[bi], qy[bo], vmax)
lhs_nflx = nxi*flx_E_x[bi] + nyi*flx_E_y[bi]
rhs_nflx = nxi*flx_E_x[bo] + nyi*flx_E_y[bo]
nflx_E[fi] = LLF(lhs_nflx, rhs_nflx, E[bi], E[bo], vmax)
end
end
end
end
......@@ -248,12 +248,14 @@ function rhs!(env, P::Project2d, hrsc::HRSC.AbstractArtificialViscosity, mesh::M
## compute rhs of regularized equation: ∂t u + ∂x f + ∂x μ q = 0
broadcast_volume_2!(flux, equation, mesh)
broadcast_faces_2!(llf, equation, mesh)
# broadcast_faces_2!(llf, equation, mesh)
llf(equation, mesh)
# TODO Need to revise Bdry conditions
# broadcast_bdry_2!(bdryllf, equation, mesh)
broadcast_volume_2!(av_flux_2d, equation, mesh)
broadcast_faces_2!(av_nflux_2d, equation, mesh)
# broadcast_faces_2!(av_nflux_2d, equation, mesh)
av_nflux_2d(equation, mesh)
# TODO don't we also need av_bdrylff here?
compute_rhs_weak_form!(rhs_rho, flx_rho_x, flx_rho_y, nflx_rho, mesh)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment