From 625b8bd90db38dc8c63f13e6ec05bd9592754faf Mon Sep 17 00:00:00 2001 From: Florian Atteneder <florian.atteneder@uni-jena.de> Date: Mon, 4 Sep 2023 23:56:31 +0200 Subject: [PATCH] 'unroll' llf, av_nflux_2d methods to see how much perf can be gained over current broadcast_faces_2! impl --- src/EulerEq/equation.jl | 286 ++++++++++++++++++++++++++++++++++++++++ src/EulerEq/rhs.jl | 6 +- 2 files changed, 290 insertions(+), 2 deletions(-) diff --git a/src/EulerEq/equation.jl b/src/EulerEq/equation.jl index 7e8a3f3c..f65676f6 100644 --- a/src/EulerEq/equation.jl +++ b/src/EulerEq/equation.jl @@ -483,3 +483,289 @@ end @returns nflx_rho, nflx_qx, nflx_qy, nflx_E end + + + +function av_nflux_2d(eq::EulerEquation2d, mesh::Mesh2d) + + @unpack ldg_rho_x, ldg_rho_y, ldg_qx_x, ldg_qx_y, ldg_qy_x, ldg_qy_y, + ldg_E_x, ldg_E_y, smoothed_mu = get_static_variables(mesh.cache) + @unpack nflx_rho, nflx_qx, nflx_qy, nflx_E, nx, ny = get_bdry_variables(mesh.cache) + + @inbounds for icell in mesh.bulkidxs + + cell_in = mesh.tree.cells[icell] + bulk_in = cellindices(mesh, icell) + rng_lhs, rng_rhs, rng_down, rng_up = dg1d.faceindices(mesh, icell) + + if dg1d.has_neighbor(cell_in, Cart2d.Xmin) + cell_out = cell_in.neighbors[Cart2d.Xmin] + bulk_out = cellindices(mesh, cell_out) + for (i, fi) in enumerate(rng_lhs) + bo = bulk_out[end,i] + bi = bulk_in[1,i] + + nxi, nyi = nx[fi], ny[fi] + + lhs_nflx = nxi*ldg_rho_x[bi] + nyi*ldg_rho_y[bi] + rhs_nflx = nxi*ldg_rho_x[bo] + nyi*ldg_rho_y[bo] + nflx_rho[fi] += 0.5 * (smoothed_mu[bi]*lhs_nflx + smoothed_mu[bo]*rhs_nflx) + + lhs_nflx = nxi*ldg_qx_x[bi] + nyi*ldg_qx_y[bi] + rhs_nflx = nxi*ldg_qx_x[bo] + nyi*ldg_qx_y[bo] + nflx_qx[fi] += 0.5 * (smoothed_mu[bi]*lhs_nflx + smoothed_mu[bo]*rhs_nflx) + + lhs_nflx = nxi*ldg_qy_x[bi] + nyi*ldg_qy_y[bi] + rhs_nflx = nxi*ldg_qy_x[bo] + nyi*ldg_qy_y[bo] + nflx_qy[fi] += 0.5 * (smoothed_mu[bi]*lhs_nflx + smoothed_mu[bo]*rhs_nflx) + + lhs_nflx = nxi*ldg_E_x[bi] + nyi*ldg_E_y[bi] + rhs_nflx = nxi*ldg_E_x[bo] + nyi*ldg_E_y[bo] + nflx_E[fi] += 0.5 * (smoothed_mu[bi]*lhs_nflx + smoothed_mu[bo]*rhs_nflx) + end + end + + if dg1d.has_neighbor(cell_in, Cart2d.Xmax) + cell_out = cell_in.neighbors[Cart2d.Xmax] + bulk_out = cellindices(mesh, cell_out) + for (i, fi) in enumerate(rng_rhs) + bo = bulk_out[1,i] + bi = bulk_in[end,i] + + nxi, nyi = nx[fi], ny[fi] + + lhs_nflx = nxi*ldg_rho_x[bi] + nyi*ldg_rho_y[bi] + rhs_nflx = nxi*ldg_rho_x[bo] + nyi*ldg_rho_y[bo] + nflx_rho[fi] += 0.5 * (smoothed_mu[bi]*lhs_nflx + smoothed_mu[bo]*rhs_nflx) + + lhs_nflx = nxi*ldg_qx_x[bi] + nyi*ldg_qx_y[bi] + rhs_nflx = nxi*ldg_qx_x[bo] + nyi*ldg_qx_y[bo] + nflx_qx[fi] += 0.5 * (smoothed_mu[bi]*lhs_nflx + smoothed_mu[bo]*rhs_nflx) + + lhs_nflx = nxi*ldg_qy_x[bi] + nyi*ldg_qy_y[bi] + rhs_nflx = nxi*ldg_qy_x[bo] + nyi*ldg_qy_y[bo] + nflx_qy[fi] += 0.5 * (smoothed_mu[bi]*lhs_nflx + smoothed_mu[bo]*rhs_nflx) + + lhs_nflx = nxi*ldg_E_x[bi] + nyi*ldg_E_y[bi] + rhs_nflx = nxi*ldg_E_x[bo] + nyi*ldg_E_y[bo] + nflx_E[fi] += 0.5 * (smoothed_mu[bi]*lhs_nflx + smoothed_mu[bo]*rhs_nflx) + end + end + + if dg1d.has_neighbor(cell_in, Cart2d.Ymin) + cell_out = cell_in.neighbors[Cart2d.Ymin] + bulk_out = cellindices(mesh, cell_out) + for (i, fi) in enumerate(rng_down) + bo = bulk_out[i,end] + bi = bulk_in[i,1] + + nxi, nyi = nx[fi], ny[fi] + + lhs_nflx = nxi*ldg_rho_x[bi] + nyi*ldg_rho_y[bi] + rhs_nflx = nxi*ldg_rho_x[bo] + nyi*ldg_rho_y[bo] + nflx_rho[fi] += 0.5 * (smoothed_mu[bi]*lhs_nflx + smoothed_mu[bo]*rhs_nflx) + + lhs_nflx = nxi*ldg_qx_x[bi] + nyi*ldg_qx_y[bi] + rhs_nflx = nxi*ldg_qx_x[bo] + nyi*ldg_qx_y[bo] + nflx_qx[fi] += 0.5 * (smoothed_mu[bi]*lhs_nflx + smoothed_mu[bo]*rhs_nflx) + + lhs_nflx = nxi*ldg_qy_x[bi] + nyi*ldg_qy_y[bi] + rhs_nflx = nxi*ldg_qy_x[bo] + nyi*ldg_qy_y[bo] + nflx_qy[fi] += 0.5 * (smoothed_mu[bi]*lhs_nflx + smoothed_mu[bo]*rhs_nflx) + + lhs_nflx = nxi*ldg_E_x[bi] + nyi*ldg_E_y[bi] + rhs_nflx = nxi*ldg_E_x[bo] + nyi*ldg_E_y[bo] + nflx_E[fi] += 0.5 * (smoothed_mu[bi]*lhs_nflx + smoothed_mu[bo]*rhs_nflx) + end + end + + if dg1d.has_neighbor(cell_in, Cart2d.Ymax) + cell_out = cell_in.neighbors[Cart2d.Ymax] + bulk_out = cellindices(mesh, cell_out) + for (i, fi) in enumerate(rng_up) + bo = bulk_out[i,1] + bi = bulk_in[i,end] + + nxi, nyi = nx[fi], ny[fi] + + lhs_nflx = nxi*ldg_rho_x[bi] + nyi*ldg_rho_y[bi] + rhs_nflx = nxi*ldg_rho_x[bo] + nyi*ldg_rho_y[bo] + nflx_rho[fi] += 0.5 * (smoothed_mu[bi]*lhs_nflx + smoothed_mu[bo]*rhs_nflx) + + lhs_nflx = nxi*ldg_qx_x[bi] + nyi*ldg_qx_y[bi] + rhs_nflx = nxi*ldg_qx_x[bo] + nyi*ldg_qx_y[bo] + nflx_qx[fi] += 0.5 * (smoothed_mu[bi]*lhs_nflx + smoothed_mu[bo]*rhs_nflx) + + lhs_nflx = nxi*ldg_qy_x[bi] + nyi*ldg_qy_y[bi] + rhs_nflx = nxi*ldg_qy_x[bo] + nyi*ldg_qy_y[bo] + nflx_qy[fi] += 0.5 * (smoothed_mu[bi]*lhs_nflx + smoothed_mu[bo]*rhs_nflx) + + lhs_nflx = nxi*ldg_E_x[bi] + nyi*ldg_E_y[bi] + rhs_nflx = nxi*ldg_E_x[bo] + nyi*ldg_E_y[bo] + nflx_E[fi] += 0.5 * (smoothed_mu[bi]*lhs_nflx + smoothed_mu[bo]*rhs_nflx) + end + end + end + +end + + +# @with_signature function llf(equation::EulerEquation2d) +# @accepts Prefix(lhs), rho, flx_rho_x, flx_rho_y, qx, flx_qx_x, flx_qx_y, qy, flx_qy_y, E, flx_E_x, flx_E_y, v +# @accepts Prefix(rhs), rho, flx_rho_x, flx_rho_y, qx, flx_qx_x, flx_qx_y, qy, flx_qy_y, E, flx_E_x, flx_E_y, v +# @accepts nx, ny +# +# vmax = absmax(lhs_v, rhs_v) +# +# lhs_nflx = nx*lhs_flx_rho_x + ny*lhs_flx_rho_y +# rhs_nflx = nx*rhs_flx_rho_x + ny*rhs_flx_rho_y +# nflx_rho = LLF(lhs_nflx, rhs_nflx, lhs_rho, rhs_rho, vmax) +# +# lhs_nflx = nx*lhs_flx_qx_x + ny*lhs_flx_qx_y +# rhs_nflx = nx*rhs_flx_qx_x + ny*rhs_flx_qx_y +# nflx_qx = LLF(lhs_nflx, rhs_nflx, lhs_qx, rhs_qx, vmax) +# +# lhs_flx_qy_x = lhs_flx_qx_y +# rhs_flx_qy_x = rhs_flx_qx_y +# lhs_nflx = nx*lhs_flx_qy_x + ny*lhs_flx_qy_y +# rhs_nflx = nx*rhs_flx_qy_x + ny*rhs_flx_qy_y +# nflx_qy = LLF(lhs_nflx, rhs_nflx, lhs_qy, rhs_qy, vmax) +# +# lhs_nflx = nx*lhs_flx_E_x + ny*lhs_flx_E_y +# rhs_nflx = nx*rhs_flx_E_x + ny*rhs_flx_E_y +# nflx_E = LLF(lhs_nflx, rhs_nflx, lhs_E, rhs_E, vmax) +# +# @returns nflx_rho, nflx_qx, nflx_qy, nflx_E +# end +function llf(equation::EulerEquation2d, mesh::Mesh2d) + @unpack rho, qx, qy, E = get_dynamic_variables(mesh.cache) + @unpack flx_rho_x, flx_rho_y, flx_qx_x, flx_qx_y, flx_qy_y, + flx_E_x, flx_E_y, v = get_static_variables(mesh.cache) + @unpack nx, ny, nflx_rho, nflx_qx, nflx_qy, nflx_E = get_bdry_variables(mesh.cache) + + flx_qy_x = flx_qx_y + + @inbounds for icell in mesh.bulkidxs + + cell_in = mesh.tree.cells[icell] + bulk_in = cellindices(mesh, icell) + rng_lhs, rng_rhs, rng_down, rng_up = dg1d.faceindices(mesh, icell) + + if dg1d.has_neighbor(cell_in, Cart2d.Xmin) + cell_out = cell_in.neighbors[Cart2d.Xmin] + bulk_out = cellindices(mesh, cell_out) + for (i, fi) in enumerate(rng_lhs) + bo = bulk_out[end,i] + bi = bulk_in[1,i] + + vmax = absmax(v[bi], v[bo]) + + nxi, nyi = nx[fi], ny[fi] + + lhs_nflx = nxi*flx_rho_x[bi] + nyi*flx_rho_y[bi] + rhs_nflx = nxi*flx_rho_x[bo] + nyi*flx_rho_y[bo] + nflx_rho[fi] = LLF(lhs_nflx, rhs_nflx, rho[bi], rho[bo], vmax) + + lhs_nflx = nxi*flx_qx_x[bi] + nyi*flx_qx_y[bi] + rhs_nflx = nxi*flx_qx_x[bo] + nyi*flx_qx_y[bo] + nflx_qx[fi] = LLF(lhs_nflx, rhs_nflx, qx[bi], qx[bo], vmax) + + lhs_nflx = nxi*flx_qy_x[bi] + nyi*flx_qy_y[bi] + rhs_nflx = nxi*flx_qy_x[bo] + nyi*flx_qy_y[bo] + nflx_qy[fi] = LLF(lhs_nflx, rhs_nflx, qy[bi], qy[bo], vmax) + + lhs_nflx = nxi*flx_E_x[bi] + nyi*flx_E_y[bi] + rhs_nflx = nxi*flx_E_x[bo] + nyi*flx_E_y[bo] + nflx_E[fi] = LLF(lhs_nflx, rhs_nflx, E[bi], E[bo], vmax) + + end + end + + if dg1d.has_neighbor(cell_in, Cart2d.Xmax) + cell_out = cell_in.neighbors[Cart2d.Xmax] + bulk_out = cellindices(mesh, cell_out) + for (i, fi) in enumerate(rng_rhs) + bo = bulk_out[1,i] + bi = bulk_in[end,i] + + vmax = absmax(v[bi], v[bo]) + + nxi, nyi = nx[fi], ny[fi] + + lhs_nflx = nxi*flx_rho_x[bi] + nyi*flx_rho_y[bi] + rhs_nflx = nxi*flx_rho_x[bo] + nyi*flx_rho_y[bo] + nflx_rho[fi] = LLF(lhs_nflx, rhs_nflx, rho[bi], rho[bo], vmax) + + lhs_nflx = nxi*flx_qx_x[bi] + nyi*flx_qx_y[bi] + rhs_nflx = nxi*flx_qx_x[bo] + nyi*flx_qx_y[bo] + nflx_qx[fi] = LLF(lhs_nflx, rhs_nflx, qx[bi], qx[bo], vmax) + + lhs_nflx = nxi*flx_qy_x[bi] + nyi*flx_qy_y[bi] + rhs_nflx = nxi*flx_qy_x[bo] + nyi*flx_qy_y[bo] + nflx_qy[fi] = LLF(lhs_nflx, rhs_nflx, qy[bi], qy[bo], vmax) + + lhs_nflx = nxi*flx_E_x[bi] + nyi*flx_E_y[bi] + rhs_nflx = nxi*flx_E_x[bo] + nyi*flx_E_y[bo] + nflx_E[fi] = LLF(lhs_nflx, rhs_nflx, E[bi], E[bo], vmax) + end + end + + if dg1d.has_neighbor(cell_in, Cart2d.Ymin) + cell_out = cell_in.neighbors[Cart2d.Ymin] + bulk_out = cellindices(mesh, cell_out) + for (i, fi) in enumerate(rng_down) + bo = bulk_out[i,end] + bi = bulk_in[i,1] + + vmax = absmax(v[bi], v[bo]) + + nxi, nyi = nx[fi], ny[fi] + + lhs_nflx = nxi*flx_rho_x[bi] + nyi*flx_rho_y[bi] + rhs_nflx = nxi*flx_rho_x[bo] + nyi*flx_rho_y[bo] + nflx_rho[fi] = LLF(lhs_nflx, rhs_nflx, rho[bi], rho[bo], vmax) + + lhs_nflx = nxi*flx_qx_x[bi] + nyi*flx_qx_y[bi] + rhs_nflx = nxi*flx_qx_x[bo] + nyi*flx_qx_y[bo] + nflx_qx[fi] = LLF(lhs_nflx, rhs_nflx, qx[bi], qx[bo], vmax) + + lhs_nflx = nxi*flx_qy_x[bi] + nyi*flx_qy_y[bi] + rhs_nflx = nxi*flx_qy_x[bo] + nyi*flx_qy_y[bo] + nflx_qy[fi] = LLF(lhs_nflx, rhs_nflx, qy[bi], qy[bo], vmax) + + lhs_nflx = nxi*flx_E_x[bi] + nyi*flx_E_y[bi] + rhs_nflx = nxi*flx_E_x[bo] + nyi*flx_E_y[bo] + nflx_E[fi] = LLF(lhs_nflx, rhs_nflx, E[bi], E[bo], vmax) + end + end + + if dg1d.has_neighbor(cell_in, Cart2d.Ymax) + cell_out = cell_in.neighbors[Cart2d.Ymax] + bulk_out = cellindices(mesh, cell_out) + for (i, fi) in enumerate(rng_up) + bo = bulk_out[i,1] + bi = bulk_in[i,end] + + vmax = absmax(v[bi], v[bo]) + + nxi, nyi = nx[fi], ny[fi] + + lhs_nflx = nxi*flx_rho_x[bi] + nyi*flx_rho_y[bi] + rhs_nflx = nxi*flx_rho_x[bo] + nyi*flx_rho_y[bo] + nflx_rho[fi] = LLF(lhs_nflx, rhs_nflx, rho[bi], rho[bo], vmax) + + lhs_nflx = nxi*flx_qx_x[bi] + nyi*flx_qx_y[bi] + rhs_nflx = nxi*flx_qx_x[bo] + nyi*flx_qx_y[bo] + nflx_qx[fi] = LLF(lhs_nflx, rhs_nflx, qx[bi], qx[bo], vmax) + + lhs_nflx = nxi*flx_qy_x[bi] + nyi*flx_qy_y[bi] + rhs_nflx = nxi*flx_qy_x[bo] + nyi*flx_qy_y[bo] + nflx_qy[fi] = LLF(lhs_nflx, rhs_nflx, qy[bi], qy[bo], vmax) + + lhs_nflx = nxi*flx_E_x[bi] + nyi*flx_E_y[bi] + rhs_nflx = nxi*flx_E_x[bo] + nyi*flx_E_y[bo] + nflx_E[fi] = LLF(lhs_nflx, rhs_nflx, E[bi], E[bo], vmax) + end + end + end +end diff --git a/src/EulerEq/rhs.jl b/src/EulerEq/rhs.jl index d2c5808b..60769016 100644 --- a/src/EulerEq/rhs.jl +++ b/src/EulerEq/rhs.jl @@ -248,12 +248,14 @@ function rhs!(env, P::Project2d, hrsc::HRSC.AbstractArtificialViscosity, mesh::M ## compute rhs of regularized equation: ∂t u + ∂x f + ∂x μ q = 0 broadcast_volume_2!(flux, equation, mesh) - broadcast_faces_2!(llf, equation, mesh) + # broadcast_faces_2!(llf, equation, mesh) + llf(equation, mesh) # TODO Need to revise Bdry conditions # broadcast_bdry_2!(bdryllf, equation, mesh) broadcast_volume_2!(av_flux_2d, equation, mesh) - broadcast_faces_2!(av_nflux_2d, equation, mesh) + # broadcast_faces_2!(av_nflux_2d, equation, mesh) + av_nflux_2d(equation, mesh) # TODO don't we also need av_bdrylff here? compute_rhs_weak_form!(rhs_rho, flx_rho_x, flx_rho_y, nflx_rho, mesh) -- GitLab