68 BL_PROFILE_REGION(
"erf_fast_rhs_T()");
70 const Box& domain = geom.Domain();
71 auto const domlo = lbound(domain);
72 auto const domhi = ubound(domain);
74 Real beta_1 = 0.5 * (1.0 - beta_s);
75 Real beta_2 = 0.5 * (1.0 + beta_s);
80 const Real* dx = geom.CellSize();
81 const GpuArray<Real, AMREX_SPACEDIM> dxInv = geom.InvCellSizeArray();
87 const auto& dm = S_stage_data[
IntVars::cons].DistributionMap();
89 MultiFab Delta_rho_u( convert(ba,IntVect(1,0,0)), dm, 1, 1);
90 MultiFab Delta_rho_v( convert(ba,IntVect(0,1,0)), dm, 1, 1);
91 MultiFab Delta_rho_w( convert(ba,IntVect(0,0,1)), dm, 1, IntVect(1,1,0));
92 MultiFab Delta_rho ( ba , dm, 1, 1);
93 MultiFab Delta_rho_theta( ba , dm, 1, 1);
95 MultiFab New_rho_u(convert(ba,IntVect(1,0,0)), dm, 1, 1);
96 MultiFab New_rho_v(convert(ba,IntVect(0,1,0)), dm, 1, 1);
98 MultiFab coeff_A_mf(fast_coeffs, make_alias, 0, 1);
99 MultiFab inv_coeff_B_mf(fast_coeffs, make_alias, 1, 1);
100 MultiFab coeff_C_mf(fast_coeffs, make_alias, 2, 1);
101 MultiFab coeff_P_mf(fast_coeffs, make_alias, 3, 1);
102 MultiFab coeff_Q_mf(fast_coeffs, make_alias, 4, 1);
106 const Array<Real,AMREX_SPACEDIM> grav{0.0, 0.0, -gravity};
107 const GpuArray<Real,AMREX_SPACEDIM> grav_gpu{grav[0], grav[1], grav[2]};
116 #pragma omp parallel if (Gpu::notInLaunchRegion())
118 for ( MFIter mfi(S_stage_data[
IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
120 const Array4<Real> & cur_cons = S_data[
IntVars::cons].array(mfi);
121 const Array4<const Real>& prev_cons = S_prev[
IntVars::cons].const_array(mfi);
122 const Array4<const Real>& stage_cons = S_stage_data[
IntVars::cons].const_array(mfi);
123 const Array4<Real>& lagged_delta_rt = S_scratch[
IntVars::cons].array(mfi);
125 const Array4<Real>& old_drho = Delta_rho.array(mfi);
126 const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
127 const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
128 const Array4<Real>& old_drho_w = Delta_rho_w.array(mfi);
129 const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
131 const Array4<const Real>& prev_xmom = S_prev[
IntVars::xmom].const_array(mfi);
132 const Array4<const Real>& prev_ymom = S_prev[
IntVars::ymom].const_array(mfi);
133 const Array4<const Real>& prev_zmom = S_prev[
IntVars::zmom].const_array(mfi);
135 const Array4<const Real>& stage_xmom = S_stage_data[
IntVars::xmom].const_array(mfi);
136 const Array4<const Real>& stage_ymom = S_stage_data[
IntVars::ymom].const_array(mfi);
137 const Array4<const Real>& stage_zmom = S_stage_data[
IntVars::zmom].const_array(mfi);
139 Box bx = mfi.validbox();
140 Box gbx = mfi.tilebox(); gbx.grow(1);
143 ParallelFor(gbx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
149 Box gtbx = mfi.nodaltilebox(0); gtbx.grow(IntVect(1,1,0));
150 Box gtby = mfi.nodaltilebox(1); gtby.grow(IntVect(1,1,0));
151 Box gtbz = mfi.nodaltilebox(2); gtbz.grow(IntVect(1,1,0));
153 const auto& bx_lo = lbound(bx);
154 const auto& bx_hi = ubound(bx);
156 ParallelFor(gtbx, gtby, gtbz,
157 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
158 old_drho_u(i,j,k) = prev_xmom(i,j,k) - stage_xmom(i,j,k);
159 if (k == bx_lo.z && k != domlo.z) {
160 old_drho_u(i,j,k-1) = old_drho_u(i,j,k);
161 }
else if (k == bx_hi.z) {
162 old_drho_u(i,j,k+1) = old_drho_u(i,j,k);
165 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
166 old_drho_v(i,j,k) = prev_ymom(i,j,k) - stage_ymom(i,j,k);
167 if (k == bx_lo.z && k != domlo.z) {
168 old_drho_v(i,j,k-1) = old_drho_v(i,j,k);
169 }
else if (k == bx_hi.z) {
170 old_drho_v(i,j,k+1) = old_drho_v(i,j,k);
173 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
174 old_drho_w(i,j,k) = prev_zmom(i,j,k) - stage_zmom(i,j,k);
177 const Array4<Real>& theta_extrap = extrap.array(mfi);
179 ParallelFor(gbx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
183 theta_extrap(i,j,k) = old_drho_theta(i,j,k);
185 theta_extrap(i,j,k) = old_drho_theta(i,j,k) + beta_d *
186 ( old_drho_theta(i,j,k) - lagged_delta_rt(i,j,k,
RhoTheta_comp) );
192 #pragma omp parallel if (Gpu::notInLaunchRegion())
194 for ( MFIter mfi(S_stage_data[
IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
197 Box gbx = mfi.tilebox(); gbx.grow(1);
198 const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
199 const Array4<Real>& lagged_delta_rt = S_scratch[
IntVars::cons].array(mfi);
200 ParallelFor(gbx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
201 lagged_delta_rt(i,j,k,
RhoTheta_comp) = old_drho_theta(i,j,k);
210 #pragma omp parallel if (Gpu::notInLaunchRegion())
212 for ( MFIter mfi(S_stage_data[
IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
214 Box bx = mfi.validbox();
215 Box tbx = mfi.nodaltilebox(0);
216 Box tby = mfi.nodaltilebox(1);
218 const Array4<Real const>& xmom_src_arr = xmom_src.const_array(mfi);
219 const Array4<Real const>& ymom_src_arr = ymom_src.const_array(mfi);
221 const Array4<const Real> & stage_xmom = S_stage_data[
IntVars::xmom].const_array(mfi);
222 const Array4<const Real> & stage_ymom = S_stage_data[
IntVars::ymom].const_array(mfi);
223 const Array4<const Real> & qt_arr =
qt.const_array(mfi);
225 const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
226 const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
228 const Array4<const Real>& slow_rhs_rho_u = S_slow_rhs[
IntVars::xmom].const_array(mfi);
229 const Array4<const Real>& slow_rhs_rho_v = S_slow_rhs[
IntVars::ymom].const_array(mfi);
231 const Array4<Real>& new_drho_u = New_rho_u.array(mfi);
232 const Array4<Real>& new_drho_v = New_rho_v.array(mfi);
234 const Array4<Real>& cur_xmom = S_data[
IntVars::xmom].array(mfi);
235 const Array4<Real>& cur_ymom = S_data[
IntVars::ymom].array(mfi);
238 const Array4<Real>& avg_xmom = S_scratch[
IntVars::xmom].array(mfi);
239 const Array4<Real>& avg_ymom = S_scratch[
IntVars::ymom].array(mfi);
241 const Array4<const Real>& z_nd = z_phys_nd->const_array(mfi);
243 const Array4<const Real>& pi_stage_ca = pi_stage.const_array(mfi);
245 const Array4<Real>& theta_extrap = extrap.array(mfi);
248 const Array4<const Real>& mf_ux = mapfac[
MapFacType::u_x]->const_array(mfi);
249 const Array4<const Real>& mf_vy = mapfac[
MapFacType::v_y]->const_array(mfi);
261 BL_PROFILE(
"fast_rhs_xymom_T");
263 const auto& bx_lo = lbound(bx);
264 const auto& bx_hi = ubound(bx);
266 ParallelFor(tbx, tby,
267 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
272 Real gp_xi = (theta_extrap(i,j,k) - theta_extrap(i-1,j,k)) * dxi;
273 Real gp_zeta_on_iface = (k == 0) ?
274 0.5 * dzi * ( theta_extrap(i-1,j,k+1) + theta_extrap(i,j,k+1)
275 -theta_extrap(i-1,j,k ) - theta_extrap(i,j,k ) ) :
276 0.25 * dzi * ( theta_extrap(i-1,j,k+1) + theta_extrap(i,j,k+1)
277 -theta_extrap(i-1,j,k-1) - theta_extrap(i,j,k-1) );
278 Real
gpx = gp_xi - (met_h_xi / met_h_zeta) * gp_zeta_on_iface;
281 Real q = (l_use_moisture) ? 0.5 * (qt_arr(i,j,k) + qt_arr(i-1,j,k)) : 0.0;
283 Real pi_c = 0.5 * (pi_stage_ca(i-1,j,k,0) + pi_stage_ca(i ,j,k,0));
284 Real fast_rhs_rho_u = -
Gamma *
R_d * pi_c *
gpx / (1.0 + q);
286 new_drho_u(i, j, k) = old_drho_u(i,j,k) + dtau * fast_rhs_rho_u
287 + dtau * slow_rhs_rho_u(i,j,k)
288 + dtau * xmom_src_arr(i,j,k);
289 if (k == bx_lo.z && k != domlo.z) {
290 new_drho_u(i,j,k-1) = new_drho_u(i,j,k);
291 }
else if (k == bx_hi.z) {
292 new_drho_u(i,j,k+1) = new_drho_u(i,j,k);
295 avg_xmom(i,j,k) += facinv*new_drho_u(i,j,k);
297 cur_xmom(i,j,k) = stage_xmom(i,j,k) + new_drho_u(i,j,k);
299 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
304 Real gp_eta = (theta_extrap(i,j,k) -theta_extrap(i,j-1,k)) * dyi;
305 Real gp_zeta_on_jface = (k == 0) ?
306 0.5 * dzi * ( theta_extrap(i,j,k+1) + theta_extrap(i,j-1,k+1)
307 -theta_extrap(i,j,k ) - theta_extrap(i,j-1,k ) ) :
308 0.25 * dzi * ( theta_extrap(i,j,k+1) + theta_extrap(i,j-1,k+1)
309 -theta_extrap(i,j,k-1) - theta_extrap(i,j-1,k-1) );
310 Real
gpy = gp_eta - (met_h_eta / met_h_zeta) * gp_zeta_on_jface;
313 Real q = (l_use_moisture) ? 0.5 * (qt_arr(i,j,k) + qt_arr(i,j-1,k)) : 0.0;
315 Real pi_c = 0.5 * (pi_stage_ca(i,j-1,k,0) + pi_stage_ca(i,j ,k,0));
316 Real fast_rhs_rho_v = -
Gamma *
R_d * pi_c *
gpy / (1.0 + q);
318 new_drho_v(i, j, k) = old_drho_v(i,j,k) + dtau * fast_rhs_rho_v
319 + dtau * slow_rhs_rho_v(i,j,k)
320 + dtau * ymom_src_arr(i,j,k);
322 if (k == bx_lo.z && k != domlo.z) {
323 new_drho_v(i,j,k-1) = new_drho_v(i,j,k);
324 }
else if (k == bx_hi.z) {
325 new_drho_v(i,j,k+1) = new_drho_v(i,j,k);
328 avg_ymom(i,j,k) += facinv*new_drho_v(i,j,k);
330 cur_ymom(i,j,k) = stage_ymom(i,j,k) + new_drho_v(i,j,k);
338 #pragma omp parallel if (Gpu::notInLaunchRegion())
341 std::array<FArrayBox,AMREX_SPACEDIM> flux;
344 Box bx = mfi.tilebox();
345 Box tbz = surroundingNodes(bx,2);
347 Box vbx = mfi.validbox();
348 const auto& vbx_hi = ubound(vbx);
350 const Array4<Real const>& zmom_src_arr = zmom_src.const_array(mfi);
351 const Array4<Real const>& cc_src_arr = cc_src.const_array(mfi);
353 const Array4<const Real> & stage_zmom = S_stage_data[
IntVars::zmom].const_array(mfi);
354 const Array4<const Real> & prim = S_stage_prim.const_array(mfi);
355 const Array4<const Real> & qt_arr =
qt.const_array(mfi);
357 const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
358 const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
359 const Array4<Real>& old_drho_w = Delta_rho_w.array(mfi);
360 const Array4<Real>& old_drho = Delta_rho.array(mfi);
361 const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
363 const Array4<const Real>& slow_rhs_cons = S_slow_rhs[
IntVars::cons].const_array(mfi);
364 const Array4<const Real>& slow_rhs_rho_w = S_slow_rhs[
IntVars::zmom].const_array(mfi);
366 const Array4<Real>& new_drho_u = New_rho_u.array(mfi);
367 const Array4<Real>& new_drho_v = New_rho_v.array(mfi);
369 const Array4<Real>& cur_cons = S_data[
IntVars::cons].array(mfi);
370 const Array4<Real>& cur_zmom = S_data[
IntVars::zmom].array(mfi);
373 const Array4<Real>& avg_zmom = S_scratch[
IntVars::zmom].array(mfi);
375 const Array4<const Real>& z_nd = z_phys_nd->const_array(mfi);
376 const Array4<const Real>& detJ = detJ_cc->const_array(mfi);
378 const Array4< Real>& omega_arr = Omega.array(mfi);
381 const Array4<const Real>& mf_mx = mapfac[
MapFacType::m_x]->const_array(mfi);
382 const Array4<const Real>& mf_my = mapfac[
MapFacType::m_y]->const_array(mfi);
383 const Array4<const Real>& mf_ux = mapfac[
MapFacType::u_x]->const_array(mfi);
384 const Array4<const Real>& mf_uy = mapfac[
MapFacType::u_y]->const_array(mfi);
385 const Array4<const Real>& mf_vx = mapfac[
MapFacType::v_x]->const_array(mfi);
386 const Array4<const Real>& mf_vy = mapfac[
MapFacType::v_y]->const_array(mfi);
394 FArrayBox temp_rhs_fab;
398 RHS_fab.resize (tbz,1,The_Async_Arena());
399 soln_fab.resize (tbz,1,The_Async_Arena());
400 temp_rhs_fab.resize(tbz,2,The_Async_Arena());
402 auto const& RHS_a = RHS_fab.array();
403 auto const& soln_a = soln_fab.array();
404 auto const& temp_rhs_arr = temp_rhs_fab.array();
406 auto const& coeffA_a = coeff_A_mf.array(mfi);
407 auto const& inv_coeffB_a = inv_coeff_B_mf.array(mfi);
408 auto const& coeffC_a = coeff_C_mf.array(mfi);
409 auto const& coeffP_a = coeff_P_mf.array(mfi);
410 auto const& coeffQ_a = coeff_Q_mf.array(mfi);
415 for (
int dir = 0; dir < AMREX_SPACEDIM; ++dir) {
416 flux[dir].resize(surroundingNodes(bx,dir),2);
417 flux[dir].setVal<RunOn::Device>(0.);
419 const GpuArray<const Array4<Real>, AMREX_SPACEDIM>
420 flx_arr{{AMREX_D_DECL(flux[0].array(), flux[1].array(), flux[2].array())}};
424 BL_PROFILE(
"fast_T_making_rho_rhs");
425 ParallelFor(bx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
426 Real h_zeta_cc_xface_hi = 0.5 * dzi *
427 ( z_nd(i+1,j ,k+1) + z_nd(i+1,j+1,k+1)
428 -z_nd(i+1,j ,k ) - z_nd(i+1,j+1,k ) );
430 Real h_zeta_cc_xface_lo = 0.5 * dzi *
431 ( z_nd(i ,j ,k+1) + z_nd(i ,j+1,k+1)
432 -z_nd(i ,j ,k ) - z_nd(i ,j+1,k ) );
434 Real h_zeta_cc_yface_hi = 0.5 * dzi *
435 ( z_nd(i ,j+1,k+1) + z_nd(i+1,j+1,k+1)
436 -z_nd(i ,j+1,k ) - z_nd(i+1,j+1,k ) );
438 Real h_zeta_cc_yface_lo = 0.5 * dzi *
439 ( z_nd(i ,j ,k+1) + z_nd(i+1,j ,k+1)
440 -z_nd(i ,j ,k ) - z_nd(i+1,j ,k ) );
442 Real xflux_lo = new_drho_u(i ,j,k)*h_zeta_cc_xface_lo / mf_uy(i ,j,0);
443 Real xflux_hi = new_drho_u(i+1,j,k)*h_zeta_cc_xface_hi / mf_uy(i+1,j,0);
444 Real yflux_lo = new_drho_v(i,j ,k)*h_zeta_cc_yface_lo / mf_vx(i,j ,0);
445 Real yflux_hi = new_drho_v(i,j+1,k)*h_zeta_cc_yface_hi / mf_vx(i,j+1,0);
447 Real mfsq = mf_mx(i,j,0) * mf_my(i,j,0);
450 temp_rhs_arr(i,j,k,0) = ( xflux_hi - xflux_lo ) * dxi * mfsq +
451 ( yflux_hi - yflux_lo ) * dyi * mfsq;
452 temp_rhs_arr(i,j,k,1) = (( xflux_hi * (prim(i,j,k,0) + prim(i+1,j,k,0)) -
453 xflux_lo * (prim(i,j,k,0) + prim(i-1,j,k,0)) ) * dxi * mfsq+
454 ( yflux_hi * (prim(i,j,k,0) + prim(i,j+1,k,0)) -
455 yflux_lo * (prim(i,j,k,0) + prim(i,j-1,k,0)) ) * dyi * mfsq) * 0.5;
458 (flx_arr[0])(i,j,k,0) = xflux_lo;
459 (flx_arr[0])(i,j,k,1) = (flx_arr[0])(i ,j,k,0) * 0.5 * (prim(i,j,k,0) + prim(i-1,j,k,0));
461 (flx_arr[1])(i,j,k,0) = yflux_lo;
462 (flx_arr[1])(i,j,k,1) = (flx_arr[1])(i,j ,k,0) * 0.5 * (prim(i,j,k,0) + prim(i,j-1,k,0));
465 (flx_arr[0])(i+1,j,k,0) = xflux_hi;
466 (flx_arr[0])(i+1,j,k,1) = (flx_arr[0])(i+1,j,k,0) * 0.5 * (prim(i,j,k,0) + prim(i+1,j,k,0));
469 (flx_arr[1])(i,j+1,k,0) = yflux_hi;
470 (flx_arr[1])(i,j+1,k,1) = (flx_arr[1])(i,j+1,k,0) * 0.5 * (prim(i,j,k,0) + prim(i,j+1,k,0));
478 Box gbxo = mfi.nodaltilebox(2);
481 if (gbxo.smallEnd(2) == domlo.z) {
482 Box gbxo_lo = gbxo; gbxo_lo.setBig(2,gbxo.smallEnd(2));
483 gbxo_mid.setSmall(2,gbxo.smallEnd(2)+1);
484 ParallelFor(gbxo_lo, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
485 omega_arr(i,j,k) = 0.;
488 if (gbxo.bigEnd(2) == domhi.z+1) {
489 Box gbxo_hi = gbxo; gbxo_hi.setSmall(2,gbxo.bigEnd(2));
490 gbxo_mid.setBig(2,gbxo.bigEnd(2)-1);
491 ParallelFor(gbxo_hi, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
492 omega_arr(i,j,k) = old_drho_w(i,j,k);
495 ParallelFor(gbxo_mid, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
496 omega_arr(i,j,k) =
OmegaFromW(i,j,k,old_drho_w(i,j,k),
497 old_drho_u,old_drho_v,
498 mf_ux,mf_vy,z_nd,dxInv);
503 Box bx_shrunk_in_k = bx;
504 int klo = tbz.smallEnd(2);
505 int khi = tbz.bigEnd(2);
506 bx_shrunk_in_k.setSmall(2,klo+1);
507 bx_shrunk_in_k.setBig(2,khi-1);
512 Real halfg = std::abs(0.5 * grav_gpu[2]);
515 BL_PROFILE(
"fast_loop_on_shrunk_t");
517 ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
519 Real detJ_on_kface = 0.5 * (detJ(i,j,k) + detJ(i,j,k-1));
521 Real q = (l_use_moisture) ? 0.5 * (qt_arr(i,j,k) + qt_arr(i,j,k-1)) : 0.0;
523 Real coeff_P = coeffP_a(i,j,k) / (1.0 + q);
524 Real coeff_Q = coeffQ_a(i,j,k) / (1.0 + q);
531 Real R0_tmp = (-halfg * old_drho(i,j,k ) + coeff_P * old_drho_theta(i,j,k )) * detJ(i,j,k )
532 + (-halfg * old_drho(i,j,k-1) + coeff_Q * old_drho_theta(i,j,k-1)) * detJ(i,j,k-1);
535 Real R1_tmp = - halfg * ( slow_rhs_cons(i,j,k ,
Rho_comp ) * detJ(i,j,k ) +
536 slow_rhs_cons(i,j,k-1,
Rho_comp ) * detJ(i,j,k-1) )
537 + ( coeff_P * slow_rhs_cons(i,j,k ,
RhoTheta_comp) * detJ(i,j,k ) +
538 coeff_Q * slow_rhs_cons(i,j,k-1,
RhoTheta_comp) * detJ(i,j,k-1) );
540 Real Omega_kp1 = omega_arr(i,j,k+1);
541 Real Omega_k = omega_arr(i,j,k );
542 Real Omega_km1 = omega_arr(i,j,k-1);
545 R1_tmp += ( halfg ) *
546 ( beta_1 * dzi * (Omega_kp1 - Omega_km1) + temp_rhs_arr(i,j,k,
Rho_comp) + temp_rhs_arr(i,j,k-1,
Rho_comp));
550 coeff_P * ( beta_1 * dzi * (Omega_kp1*theta_t_hi - Omega_k*theta_t_mid) + temp_rhs_arr(i,j,k ,
RhoTheta_comp) ) +
551 coeff_Q * ( beta_1 * dzi * (Omega_k*theta_t_mid - Omega_km1*theta_t_lo) + temp_rhs_arr(i,j,k-1,
RhoTheta_comp) ) );
554 RHS_a(i,j,k) = detJ_on_kface * old_drho_w(i,j,k) + dtau * (
555 detJ_on_kface * (slow_rhs_rho_w(i,j,k)+zmom_src_arr(i,j,k)) + R0_tmp + dtau*beta_2*R1_tmp);
558 RHS_a(i,j,k) += detJ_on_kface *
OmegaFromW(i,j,k,0.,
559 new_drho_u,new_drho_v,
560 mf_ux,mf_vy,z_nd,dxInv);
567 auto const lo = lbound(bx);
568 auto const hi = ubound(bx);
571 BL_PROFILE(
"fast_rhs_b2d_loop_t");
573 ParallelFor(b2d, [=] AMREX_GPU_DEVICE (
int i,
int j,
int)
576 RHS_a(i,j,lo.z ) = dtau * (slow_rhs_rho_w(i,j,lo.z) + zmom_src_arr(i,j,lo.z));
577 RHS_a(i,j,hi.z+1) = dtau * (slow_rhs_rho_w(i,j,hi.z+1) + zmom_src_arr(i,j,hi.z+1));
580 soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
582 for (
int k = lo.z+1; k <= hi.z+1; k++) {
583 soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
586 cur_zmom(i,j,lo.z ) = stage_zmom(i,j,lo.z ) + soln_a(i,j,lo.z );
587 cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
589 for (
int k = hi.z; k >= lo.z; k--) {
590 soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) *soln_a(i,j,k+1);
594 for (
int j = lo.y; j <= hi.y; ++j) {
596 for (
int i = lo.x; i <= hi.x; ++i)
598 RHS_a(i,j,lo.z) = dtau * (slow_rhs_rho_w(i,j,lo.z) + zmom_src_arr(i,j,lo.z));
599 soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
603 for (
int i = lo.x; i <= hi.x; ++i)
605 RHS_a(i,j,hi.z+1) = dtau * (slow_rhs_rho_w(i,j,hi.z+1) + zmom_src_arr(i,j,hi.z+1));
606 soln_a(i,j,hi.z+1) = RHS_a(i,j,hi.z+1) * inv_coeffB_a(i,j,hi.z+1);
610 for (
int k = lo.z+1; k <= hi.z; ++k) {
611 for (
int j = lo.y; j <= hi.y; ++j) {
613 for (
int i = lo.x; i <= hi.x; ++i) {
614 soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
618 for (
int k = hi.z; k > lo.z; --k) {
619 for (
int j = lo.y; j <= hi.y; ++j) {
621 for (
int i = lo.x; i <= hi.x; ++i) {
622 soln_a(i,j,k) -= (coeffC_a(i,j,k) * inv_coeffB_a(i,j,k)) * soln_a(i,j,k+1);
626 if (hi.z == domhi.z) {
627 for (
int j = lo.y; j <= hi.y; ++j) {
629 for (
int i = lo.x; i <= hi.x; ++i) {
630 cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
637 ParallelFor(tbz, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
639 cur_zmom(i,j,k) = stage_zmom(i,j,k);
642 if (lo.z == domlo.z) {
643 tbz.setSmall(2,domlo.z+1);
645 if (hi.z == domhi.z) {
646 tbz.setBig(2,domhi.z);
648 ParallelFor(tbz, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
651 new_drho_u,new_drho_v,
652 mf_ux,mf_vy,z_nd,dxInv);
653 cur_zmom(i,j,k) += wpp;
660 BL_PROFILE(
"fast_rho_final_update");
661 ParallelFor(bx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept
663 Real zflux_lo = beta_2 * soln_a(i,j,k ) + beta_1 * omega_arr(i,j,k);
664 Real zflux_hi = beta_2 * soln_a(i,j,k+1) + beta_1 * omega_arr(i,j,k+1);
668 avg_zmom(i,j,k) += facinv*zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0));
670 (flx_arr[2])(i,j,k,0) = zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0));
674 avg_zmom(i,j,k+1) += facinv * zflux_hi / (mf_mx(i,j,0) * mf_my(i,j,0));
676 (flx_arr[2])(i,j,k+1,0) = zflux_hi / (mf_mx(i,j,0) * mf_my(i,j,0));
677 (flx_arr[2])(i,j,k+1,1) = (flx_arr[2])(i,j,k+1,0) * 0.5 * (prim(i,j,k) + prim(i,j,k+1));
681 Real fast_rhs_rho = -(temp_rhs_arr(i,j,k,0) + ( zflux_hi - zflux_lo ) * dzi) / detJ(i,j,k);
682 cur_cons(i,j,k,0) += dtau * (slow_rhs_cons(i,j,k,0) + fast_rhs_rho);
684 Real fast_rhs_rhotheta = -( temp_rhs_arr(i,j,k,1) + 0.5 *
685 ( zflux_hi * (prim(i,j,k) + prim(i,j,k+1)) -
686 zflux_lo * (prim(i,j,k) + prim(i,j,k-1)) ) * dzi ) / detJ(i,j,k);
688 cur_cons(i,j,k,1) += dtau * (slow_rhs_cons(i,j,k,1) + fast_rhs_rhotheta);
691 (flx_arr[2])(i,j,k,1) = (flx_arr[2])(i,j,k,0) * 0.5 * (prim(i,j,k) + prim(i,j,k-1));
702 int strt_comp_reflux = 0;
704 int num_comp_reflux = 1;
705 if (level < finest_level) {
706 fr_as_crse->CrseAdd(mfi,
707 {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
708 dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
711 fr_as_fine->FineAdd(mfi,
712 {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
713 dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
719 Gpu::streamSynchronize();
constexpr amrex::Real R_d
Definition: ERF_Constants.H:10
constexpr amrex::Real Gamma
Definition: ERF_Constants.H:19
@ v_x
Definition: ERF_DataStruct.H:22
@ u_y
Definition: ERF_DataStruct.H:23
@ v_y
Definition: ERF_DataStruct.H:23
@ m_y
Definition: ERF_DataStruct.H:23
@ u_x
Definition: ERF_DataStruct.H:22
@ m_x
Definition: ERF_DataStruct.H:22
#define Rho_comp
Definition: ERF_IndexDefines.H:36
#define RhoTheta_comp
Definition: ERF_IndexDefines.H:37
#define PrimTheta_comp
Definition: ERF_IndexDefines.H:50
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real OmegaFromW(int &i, int &j, int &k, amrex::Real w, const amrex::Array4< const amrex::Real > &u_arr, const amrex::Array4< const amrex::Real > &v_arr, const amrex::Array4< const amrex::Real > &mf_u, const amrex::Array4< const amrex::Real > &mf_v, const amrex::Array4< const amrex::Real > &z_nd, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &dxInv)
Definition: ERF_TerrainMetrics.H:415
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_xi_AtIface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:110
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_zeta_AtIface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:96
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_zeta_AtJface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:139
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_eta_AtJface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:167
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real WFromOmega(int &i, int &j, int &k, amrex::Real omega, const amrex::Array4< const amrex::Real > &u_arr, const amrex::Array4< const amrex::Real > &v_arr, const amrex::Array4< const amrex::Real > &mf_u, const amrex::Array4< const amrex::Real > &mf_v, const amrex::Array4< const amrex::Real > &z_nd, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &dxInv)
Definition: ERF_TerrainMetrics.H:465
AMREX_FORCE_INLINE amrex::IntVect TileNoZ()
Definition: ERF_TileNoZ.H:11
@ gpy
Definition: ERF_IndexDefines.H:151
@ gpx
Definition: ERF_IndexDefines.H:150
@ ymom
Definition: ERF_IndexDefines.H:160
@ cons
Definition: ERF_IndexDefines.H:158
@ zmom
Definition: ERF_IndexDefines.H:161
@ xmom
Definition: ERF_IndexDefines.H:159
@ qt
Definition: ERF_Kessler.H:27