76 BL_PROFILE_REGION(
"erf_substep_T()");
78 const Box& domain = geom.Domain();
79 auto const domlo = lbound(domain);
80 auto const domhi = ubound(domain);
83 int ihi = domhi.x + 1;
85 int jhi = domhi.y + 1;
95 bool l_rayleigh_impl_for_w = (sinesq_stag_d !=
nullptr);
97 const Real*
dx = geom.CellSize();
98 const GpuArray<Real, AMREX_SPACEDIM>
dxInv = geom.InvCellSizeArray();
104 const auto& dm = S_stage_data[
IntVars::cons].DistributionMap();
106 MultiFab Delta_rho_u( convert(ba,IntVect(1,0,0)), dm, 1, 1);
107 MultiFab Delta_rho_v( convert(ba,IntVect(0,1,0)), dm, 1, 1);
108 MultiFab Delta_rho_w( convert(ba,IntVect(0,0,1)), dm, 1, IntVect(1,1,0));
109 MultiFab Delta_rho ( ba , dm, 1, 1);
110 MultiFab Delta_rho_theta( ba , dm, 1, 1);
112 MultiFab New_rho_u(convert(ba,IntVect(1,0,0)), dm, 1, 1);
113 MultiFab New_rho_v(convert(ba,IntVect(0,1,0)), dm, 1, 1);
115 MultiFab coeff_A_mf(fast_coeffs, make_alias, 0, 1);
116 MultiFab inv_coeff_B_mf(fast_coeffs, make_alias, 1, 1);
117 MultiFab coeff_C_mf(fast_coeffs, make_alias, 2, 1);
118 MultiFab coeff_P_mf(fast_coeffs, make_alias, 3, 1);
119 MultiFab coeff_Q_mf(fast_coeffs, make_alias, 4, 1);
123 const Array<Real,AMREX_SPACEDIM> grav{
zero,
zero, -gravity};
124 const GpuArray<Real,AMREX_SPACEDIM> grav_gpu{grav[0], grav[1], grav[2]};
133 #pragma omp parallel if (Gpu::notInLaunchRegion())
135 for ( MFIter mfi(S_stage_data[
IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
137 const Array4<Real> & cur_cons = S_data[
IntVars::cons].array(mfi);
138 const Array4<const Real>& prev_cons = S_prev[
IntVars::cons].const_array(mfi);
139 const Array4<const Real>& stage_cons = S_stage_data[
IntVars::cons].const_array(mfi);
140 const Array4<Real>& lagged_arr = lagged_delta_rt.array(mfi);
142 const Array4<Real>& old_drho = Delta_rho.array(mfi);
143 const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
144 const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
145 const Array4<Real>& old_drho_w = Delta_rho_w.array(mfi);
146 const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
148 const Array4<const Real>& prev_xmom = S_prev[
IntVars::xmom].const_array(mfi);
149 const Array4<const Real>& prev_ymom = S_prev[
IntVars::ymom].const_array(mfi);
150 const Array4<const Real>& prev_zmom = S_prev[
IntVars::zmom].const_array(mfi);
152 const Array4<const Real>& stage_xmom = S_stage_data[
IntVars::xmom].const_array(mfi);
153 const Array4<const Real>& stage_ymom = S_stage_data[
IntVars::ymom].const_array(mfi);
154 const Array4<const Real>& stage_zmom = S_stage_data[
IntVars::zmom].const_array(mfi);
156 Box bx = mfi.validbox();
157 Box gbx = mfi.tilebox(); gbx.grow(1);
160 ParallelFor(gbx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
166 Box gtbx = mfi.nodaltilebox(0); gtbx.grow(IntVect(1,1,0));
167 Box gtby = mfi.nodaltilebox(1); gtby.grow(IntVect(1,1,0));
168 Box gtbz = mfi.nodaltilebox(2); gtbz.grow(IntVect(1,1,0));
170 const auto& bx_lo = lbound(bx);
171 const auto& bx_hi = ubound(bx);
174 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
175 old_drho_u(i,j,k) = prev_xmom(i,j,k) - stage_xmom(i,j,k);
176 if (k == bx_lo.z && k != domlo.z) {
177 old_drho_u(i,j,k-1) = old_drho_u(i,j,k);
178 }
else if (k == bx_hi.z) {
179 old_drho_u(i,j,k+1) = old_drho_u(i,j,k);
182 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
183 old_drho_v(i,j,k) = prev_ymom(i,j,k) - stage_ymom(i,j,k);
184 if (k == bx_lo.z && k != domlo.z) {
185 old_drho_v(i,j,k-1) = old_drho_v(i,j,k);
186 }
else if (k == bx_hi.z) {
187 old_drho_v(i,j,k+1) = old_drho_v(i,j,k);
190 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
191 old_drho_w(i,j,k) = prev_zmom(i,j,k) - stage_zmom(i,j,k);
194 const Array4<Real>& theta_extrap = extrap.array(mfi);
195 const Array4<const Real>& prim = S_stage_prim.const_array(mfi);
197 ParallelFor(gbx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
201 theta_extrap(i,j,k) = old_drho_theta(i,j,k);
203 theta_extrap(i,j,k) = old_drho_theta(i,j,k) + beta_d *
204 ( old_drho_theta(i,j,k) - lagged_arr(i,j,k) );
209 theta_extrap(i,j,k) *= (
one + RvOverRd*
qv);
214 #pragma omp parallel if (Gpu::notInLaunchRegion())
216 for ( MFIter mfi(S_stage_data[
IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
219 Box gbx = mfi.tilebox(); gbx.grow(1);
220 const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
221 const Array4<Real>& lagged_arr = lagged_delta_rt.array(mfi);
222 ParallelFor(gbx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
223 lagged_arr(i,j,k) = old_drho_theta(i,j,k);
232 #pragma omp parallel if (Gpu::notInLaunchRegion())
234 for ( MFIter mfi(S_stage_data[
IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
236 Box bx = mfi.validbox();
237 Box tbx = mfi.nodaltilebox(0);
238 Box tby = mfi.nodaltilebox(1);
240 const Array4<Real const>& xmom_src_arr = xmom_src.const_array(mfi);
241 const Array4<Real const>& ymom_src_arr = ymom_src.const_array(mfi);
243 const Array4<const Real> & stage_xmom = S_stage_data[
IntVars::xmom].const_array(mfi);
244 const Array4<const Real> & stage_ymom = S_stage_data[
IntVars::ymom].const_array(mfi);
245 const Array4<const Real> & qt_arr =
qt.const_array(mfi);
247 const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
248 const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
250 const Array4<const Real>& slow_rhs_rho_u = S_slow_rhs[
IntVars::xmom].const_array(mfi);
251 const Array4<const Real>& slow_rhs_rho_v = S_slow_rhs[
IntVars::ymom].const_array(mfi);
253 const Array4<Real>& new_drho_u = New_rho_u.array(mfi);
254 const Array4<Real>& new_drho_v = New_rho_v.array(mfi);
256 const Array4<Real>& cur_xmom = S_data[
IntVars::xmom].array(mfi);
257 const Array4<Real>& cur_ymom = S_data[
IntVars::ymom].array(mfi);
260 const Array4<Real>& avg_xmom_arr = avg_xmom.array(mfi);
261 const Array4<Real>& avg_ymom_arr = avg_ymom.array(mfi);
263 const Array4<const Real>& z_nd = z_phys_nd->const_array(mfi);
265 const Array4<const Real>& pi_stage_ca = pi_stage.const_array(mfi);
267 const Array4<Real>& theta_extrap = extrap.array(mfi);
270 const Array4<const Real>& mf_ux = mapfac[
MapFacType::u_x]->const_array(mfi);
271 const Array4<const Real>& mf_vy = mapfac[
MapFacType::v_y]->const_array(mfi);
283 BL_PROFILE(
"substep_xymom_T");
285 const auto& bx_lo = lbound(bx);
286 const auto& bx_hi = ubound(bx);
289 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
294 Real gp_xi = (theta_extrap(i,j,k) - theta_extrap(i-1,j,k)) * dxi;
295 Real gp_zeta_on_iface = (k == 0) ?
296 myhalf * dzi * ( theta_extrap(i-1,j,k+1) + theta_extrap(i,j,k+1)
297 - theta_extrap(i-1,j,k ) - theta_extrap(i,j,k ) ) :
298 fourth * dzi * ( theta_extrap(i-1,j,k+1) + theta_extrap(i,j,k+1)
299 - theta_extrap(i-1,j,k-1) - theta_extrap(i,j,k-1) );
300 Real gpx = (l_real_bc && (level==0) && (i==ilo || i==ihi)) ?
Real(0.) :
301 gp_xi - (met_h_xi / met_h_zeta) * gp_zeta_on_iface;
305 Real q = (l_use_moisture) ?
myhalf * (qt_arr(i,j,k) + qt_arr(i-1,j,k)) :
zero;
307 Real pi_c =
myhalf * (pi_stage_ca(i-1,j,k,0) + pi_stage_ca(i ,j,k,0));
310 new_drho_u(i, j, k) = old_drho_u(i,j,k) + dtau * fast_rhs_rho_u
311 + dtau * slow_rhs_rho_u(i,j,k)
312 + dtau * xmom_src_arr(i,j,k);
313 if (k == bx_lo.z && k != domlo.z) {
314 new_drho_u(i,j,k-1) = new_drho_u(i,j,k);
315 }
else if (k == bx_hi.z) {
316 new_drho_u(i,j,k+1) = new_drho_u(i,j,k);
319 avg_xmom_arr(i,j,k) += facinv*new_drho_u(i,j,k);
321 cur_xmom(i,j,k) = stage_xmom(i,j,k) + new_drho_u(i,j,k);
323 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
328 Real gp_eta = (theta_extrap(i,j,k) -theta_extrap(i,j-1,k)) * dyi;
329 Real gp_zeta_on_jface = (k == 0) ?
330 myhalf * dzi * ( theta_extrap(i,j,k+1) + theta_extrap(i,j-1,k+1)
331 - theta_extrap(i,j,k ) - theta_extrap(i,j-1,k ) ) :
332 fourth * dzi * ( theta_extrap(i,j,k+1) + theta_extrap(i,j-1,k+1)
333 - theta_extrap(i,j,k-1) - theta_extrap(i,j-1,k-1) );
334 Real gpy = (l_real_bc && (level==0) && (j==jlo || j==jhi)) ?
Real(0.) :
335 gp_eta - (met_h_eta / met_h_zeta) * gp_zeta_on_jface;
339 Real q = (l_use_moisture) ?
myhalf * (qt_arr(i,j,k) + qt_arr(i,j-1,k)) :
zero;
341 Real pi_c =
myhalf * (pi_stage_ca(i,j-1,k,0) + pi_stage_ca(i,j ,k,0));
344 new_drho_v(i, j, k) = old_drho_v(i,j,k) + dtau * fast_rhs_rho_v
345 + dtau * slow_rhs_rho_v(i,j,k)
346 + dtau * ymom_src_arr(i,j,k);
348 if (k == bx_lo.z && k != domlo.z) {
349 new_drho_v(i,j,k-1) = new_drho_v(i,j,k);
350 }
else if (k == bx_hi.z) {
351 new_drho_v(i,j,k+1) = new_drho_v(i,j,k);
354 avg_ymom_arr(i,j,k) += facinv*new_drho_v(i,j,k);
356 cur_ymom(i,j,k) = stage_ymom(i,j,k) + new_drho_v(i,j,k);
364 #pragma omp parallel if (Gpu::notInLaunchRegion())
367 std::array<FArrayBox,AMREX_SPACEDIM> flux;
370 Box bx = mfi.tilebox();
371 Box tbz = surroundingNodes(bx,2);
373 Box vbx = mfi.validbox();
374 const auto& vbx_hi = ubound(vbx);
376 const Array4<Real const>& zmom_src_arr = zmom_src.const_array(mfi);
377 const Array4<Real const>& cc_src_arr = cc_src.const_array(mfi);
379 const Array4<const Real> & stage_zmom = S_stage_data[
IntVars::zmom].const_array(mfi);
380 const Array4<const Real> & prim = S_stage_prim.const_array(mfi);
382 const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
383 const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
384 const Array4<Real>& old_drho_w = Delta_rho_w.array(mfi);
385 const Array4<Real>& old_drho = Delta_rho.array(mfi);
386 const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
388 const Array4<const Real>& slow_rhs_cons = S_slow_rhs[
IntVars::cons].const_array(mfi);
389 const Array4<const Real>& slow_rhs_rho_w = S_slow_rhs[
IntVars::zmom].const_array(mfi);
391 const Array4<Real>& new_drho_u = New_rho_u.array(mfi);
392 const Array4<Real>& new_drho_v = New_rho_v.array(mfi);
394 const Array4<Real>& cur_cons = S_data[
IntVars::cons].array(mfi);
395 const Array4<Real>& cur_zmom = S_data[
IntVars::zmom].array(mfi);
398 const Array4<Real>& avg_zmom_arr = avg_zmom.array(mfi);
400 const Array4<const Real>& z_nd = z_phys_nd->const_array(mfi);
401 const Array4<const Real>& detJ = detJ_cc->const_array(mfi);
403 const Array4< Real>& omega_arr = Omega.array(mfi);
406 const Array4<const Real>& mf_mx = mapfac[
MapFacType::m_x]->const_array(mfi);
407 const Array4<const Real>& mf_my = mapfac[
MapFacType::m_y]->const_array(mfi);
408 const Array4<const Real>& mf_ux = mapfac[
MapFacType::u_x]->const_array(mfi);
409 const Array4<const Real>& mf_uy = mapfac[
MapFacType::u_y]->const_array(mfi);
410 const Array4<const Real>& mf_vx = mapfac[
MapFacType::v_x]->const_array(mfi);
411 const Array4<const Real>& mf_vy = mapfac[
MapFacType::v_y]->const_array(mfi);
419 FArrayBox temp_rhs_fab;
423 RHS_fab.resize (tbz,1,The_Async_Arena());
424 soln_fab.resize (tbz,1,The_Async_Arena());
425 temp_rhs_fab.resize(tbz,2,The_Async_Arena());
427 auto const& RHS_a = RHS_fab.array();
428 auto const& soln_a = soln_fab.array();
429 auto const& temp_rhs_arr = temp_rhs_fab.array();
431 auto const& coeffA_a = coeff_A_mf.array(mfi);
432 auto const& inv_coeffB_a = inv_coeff_B_mf.array(mfi);
433 auto const& coeffC_a = coeff_C_mf.array(mfi);
434 auto const& coeffP_a = coeff_P_mf.array(mfi);
435 auto const& coeffQ_a = coeff_Q_mf.array(mfi);
440 for (
int dir = 0; dir < AMREX_SPACEDIM; ++dir) {
441 flux[dir].resize(surroundingNodes(bx,dir),2,The_Async_Arena());
442 flux[dir].setVal<RunOn::Device>(0);
444 const GpuArray<const Array4<Real>, AMREX_SPACEDIM>
445 flx_arr{{AMREX_D_DECL(flux[0].array(), flux[1].array(), flux[2].array())}};
449 BL_PROFILE(
"fast_T_making_rho_rhs");
450 ParallelFor(bx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
452 ( z_nd(i+1,j ,k+1) + z_nd(i+1,j+1,k+1)
453 -z_nd(i+1,j ,k ) - z_nd(i+1,j+1,k ) );
456 ( z_nd(i ,j ,k+1) + z_nd(i ,j+1,k+1)
457 -z_nd(i ,j ,k ) - z_nd(i ,j+1,k ) );
460 ( z_nd(i ,j+1,k+1) + z_nd(i+1,j+1,k+1)
461 -z_nd(i ,j+1,k ) - z_nd(i+1,j+1,k ) );
464 ( z_nd(i ,j ,k+1) + z_nd(i+1,j ,k+1)
465 -z_nd(i ,j ,k ) - z_nd(i+1,j ,k ) );
467 Real xflux_lo = new_drho_u(i ,j,k)*h_zeta_cc_xface_lo / mf_uy(i ,j,0);
468 Real xflux_hi = new_drho_u(i+1,j,k)*h_zeta_cc_xface_hi / mf_uy(i+1,j,0);
469 Real yflux_lo = new_drho_v(i,j ,k)*h_zeta_cc_yface_lo / mf_vx(i,j ,0);
470 Real yflux_hi = new_drho_v(i,j+1,k)*h_zeta_cc_yface_hi / mf_vx(i,j+1,0);
472 Real mfsq = mf_mx(i,j,0) * mf_my(i,j,0);
475 temp_rhs_arr(i,j,k,0) = ( xflux_hi - xflux_lo ) * dxi * mfsq +
476 ( yflux_hi - yflux_lo ) * dyi * mfsq;
477 temp_rhs_arr(i,j,k,1) = (( xflux_hi * (prim(i,j,k,0) + prim(i+1,j,k,0)) -
478 xflux_lo * (prim(i,j,k,0) + prim(i-1,j,k,0)) ) * dxi * mfsq+
479 ( yflux_hi * (prim(i,j,k,0) + prim(i,j+1,k,0)) -
480 yflux_lo * (prim(i,j,k,0) + prim(i,j-1,k,0)) ) * dyi * mfsq) *
myhalf;
483 (flx_arr[0])(i,j,k,0) = xflux_lo;
484 (flx_arr[0])(i,j,k,1) = (flx_arr[0])(i ,j,k,0) *
myhalf * (prim(i,j,k,0) + prim(i-1,j,k,0));
486 (flx_arr[1])(i,j,k,0) = yflux_lo;
487 (flx_arr[1])(i,j,k,1) = (flx_arr[1])(i,j ,k,0) *
myhalf * (prim(i,j,k,0) + prim(i,j-1,k,0));
490 (flx_arr[0])(i+1,j,k,0) = xflux_hi;
491 (flx_arr[0])(i+1,j,k,1) = (flx_arr[0])(i+1,j,k,0) *
myhalf * (prim(i,j,k,0) + prim(i+1,j,k,0));
494 (flx_arr[1])(i,j+1,k,0) = yflux_hi;
495 (flx_arr[1])(i,j+1,k,1) = (flx_arr[1])(i,j+1,k,0) *
myhalf * (prim(i,j,k,0) + prim(i,j+1,k,0));
503 Box gbxo = mfi.nodaltilebox(2);
506 if (gbxo.smallEnd(2) == domlo.z) {
507 Box gbxo_lo = gbxo; gbxo_lo.setBig(2,gbxo.smallEnd(2));
508 gbxo_mid.setSmall(2,gbxo.smallEnd(2)+1);
509 ParallelFor(gbxo_lo, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
510 omega_arr(i,j,k) =
zero;
513 if (gbxo.bigEnd(2) == domhi.z+1) {
514 Box gbxo_hi = gbxo; gbxo_hi.setSmall(2,gbxo.bigEnd(2));
515 gbxo_mid.setBig(2,gbxo.bigEnd(2)-1);
516 ParallelFor(gbxo_hi, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
517 omega_arr(i,j,k) = old_drho_w(i,j,k);
520 ParallelFor(gbxo_mid, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
521 omega_arr(i,j,k) =
OmegaFromW(i,j,k,old_drho_w(i,j,k),
522 old_drho_u,old_drho_v,
523 mf_ux,mf_vy,z_nd,
dxInv);
528 Box bx_shrunk_in_k = bx;
529 int klo = tbz.smallEnd(2);
530 int khi = tbz.bigEnd(2);
531 bx_shrunk_in_k.setSmall(2,klo+1);
532 bx_shrunk_in_k.setBig(2,
khi-1);
540 BL_PROFILE(
"fast_loop_on_shrunk_t");
542 ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
544 Real coeff_P = coeffP_a(i,j,k);
545 Real coeff_Q = coeffQ_a(i,j,k);
552 Real R0_tmp = -halfg * old_drho(i,j,k ) + coeff_P * old_drho_theta(i,j,k )
553 -halfg * old_drho(i,j,k-1) + coeff_Q * old_drho_theta(i,j,k-1);
561 Real Omega_kp1 = omega_arr(i,j,k+1);
562 Real Omega_k = omega_arr(i,j,k );
563 Real Omega_km1 = omega_arr(i,j,k-1);
565 Real detJdiff = (detJ(i,j,k) - detJ(i,j,k-1)) / (detJ(i,j,k)*detJ(i,j,k-1));
568 R1_tmp += halfg * ( beta_1 * dzi * (Omega_kp1/detJ(i,j,k) + detJdiff*Omega_k - Omega_km1/detJ(i,j,k-1))
569 + temp_rhs_arr(i,j,k,
Rho_comp)/detJ(i,j,k) + temp_rhs_arr(i,j,k-1,
Rho_comp)/detJ(i,j,k-1) );
572 R1_tmp += -( coeff_P/detJ(i,j,k ) * ( beta_1 * dzi * (Omega_kp1*theta_t_hi - Omega_k*theta_t_mid) + temp_rhs_arr(i,j,k ,
RhoTheta_comp) )
573 + coeff_Q/detJ(i,j,k-1) * ( beta_1 * dzi * (Omega_k*theta_t_mid - Omega_km1*theta_t_lo) + temp_rhs_arr(i,j,k-1,
RhoTheta_comp) ) );
576 RHS_a(i,j,k) = old_drho_w(i,j,k) + dtau * (slow_rhs_rho_w(i,j,k) + zmom_src_arr(i,j,k) + R0_tmp + dtau*beta_2*R1_tmp);
580 new_drho_u,new_drho_v,
581 mf_ux,mf_vy,z_nd,
dxInv);
588 auto const lo = lbound(bx);
589 auto const hi = ubound(bx);
592 BL_PROFILE(
"substep_b2d_loop_t");
594 ParallelFor(b2d, [=] AMREX_GPU_DEVICE (
int i,
int j,
int)
597 RHS_a(i,j,lo.z ) = dtau * (slow_rhs_rho_w(i,j,lo.z) + zmom_src_arr(i,j,lo.z));
598 RHS_a(i,j,hi.z+1) = dtau * (slow_rhs_rho_w(i,j,hi.z+1) + zmom_src_arr(i,j,hi.z+1));
601 soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
604 for (
int k = lo.z+1; k <= hi.z+1; k++) {
605 soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
608 cur_zmom(i,j,lo.z ) = stage_zmom(i,j,lo.z ) + soln_a(i,j,lo.z );
609 cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
612 for (
int k = hi.z; k >= lo.z; k--) {
613 soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) *soln_a(i,j,k+1);
617 for (
int j = lo.y; j <= hi.y; ++j) {
619 for (
int i = lo.x; i <= hi.x; ++i)
621 RHS_a(i,j,lo.z) = dtau * (slow_rhs_rho_w(i,j,lo.z) + zmom_src_arr(i,j,lo.z));
622 soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
626 for (
int i = lo.x; i <= hi.x; ++i)
628 RHS_a(i,j,hi.z+1) = dtau * (slow_rhs_rho_w(i,j,hi.z+1) + zmom_src_arr(i,j,hi.z+1));
629 soln_a(i,j,hi.z+1) = RHS_a(i,j,hi.z+1) * inv_coeffB_a(i,j,hi.z+1);
633 for (
int k = lo.z+1; k <= hi.z; ++k) {
634 for (
int j = lo.y; j <= hi.y; ++j) {
636 for (
int i = lo.x; i <= hi.x; ++i) {
637 soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
641 for (
int k = hi.z; k > lo.z; --k) {
642 for (
int j = lo.y; j <= hi.y; ++j) {
644 for (
int i = lo.x; i <= hi.x; ++i) {
645 soln_a(i,j,k) -= (coeffC_a(i,j,k) * inv_coeffB_a(i,j,k)) * soln_a(i,j,k+1);
649 if (hi.z == domhi.z) {
650 for (
int j = lo.y; j <= hi.y; ++j) {
652 for (
int i = lo.x; i <= hi.x; ++i) {
653 cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
660 ParallelFor(tbz, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
662 cur_zmom(i,j,k) = stage_zmom(i,j,k);
665 if (lo.z == domlo.z) {
666 tbz.setSmall(2,domlo.z+1);
668 if (hi.z == domhi.z) {
669 tbz.setBig(2,domhi.z);
671 ParallelFor(tbz, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
674 new_drho_u,new_drho_v,
675 mf_ux,mf_vy,z_nd,
dxInv);
677 cur_zmom(i,j,k) += wpp;
679 if (l_rayleigh_impl_for_w) {
680 Real damping_coeff = l_damp_coef * dtau * sinesq_stag_d[k];
681 cur_zmom(i,j,k) /= (
one + damping_coeff);
689 BL_PROFILE(
"fast_rho_final_update");
690 ParallelFor(bx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept
692 Real zflux_lo = beta_2 * soln_a(i,j,k ) + beta_1 * omega_arr(i,j,k);
693 Real zflux_hi = beta_2 * soln_a(i,j,k+1) + beta_1 * omega_arr(i,j,k+1);
697 avg_zmom_arr(i,j,k) += facinv*zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0));
699 (flx_arr[2])(i,j,k,0) = zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0));
703 avg_zmom_arr(i,j,k+1) += facinv * zflux_hi / (mf_mx(i,j,0) * mf_my(i,j,0));
705 (flx_arr[2])(i,j,k+1,0) = zflux_hi / (mf_mx(i,j,0) * mf_my(i,j,0));
706 (flx_arr[2])(i,j,k+1,1) = (flx_arr[2])(i,j,k+1,0) *
myhalf * (prim(i,j,k) + prim(i,j,k+1));
710 Real fast_rhs_rho = -(temp_rhs_arr(i,j,k,0) + ( zflux_hi - zflux_lo ) * dzi) / detJ(i,j,k);
712 cur_cons(i,j,k,0) += dtau * (slow_rhs_cons(i,j,k,0) + fast_rhs_rho);
714 Real fast_rhs_rhotheta = -( temp_rhs_arr(i,j,k,1) +
myhalf *
715 ( zflux_hi * (prim(i,j,k) + prim(i,j,k+1)) -
716 zflux_lo * (prim(i,j,k) + prim(i,j,k-1)) ) * dzi ) / detJ(i,j,k);
718 cur_cons(i,j,k,1) += dtau * (slow_rhs_cons(i,j,k,1) + fast_rhs_rhotheta);
721 (flx_arr[2])(i,j,k,1) = (flx_arr[2])(i,j,k,0) *
myhalf * (prim(i,j,k) + prim(i,j,k-1));
732 int strt_comp_reflux = 0;
734 int num_comp_reflux = 1;
735 if (level < finest_level) {
736 fr_as_crse->CrseAdd(mfi,
737 {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
738 dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
741 fr_as_fine->FineAdd(mfi,
742 {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
743 dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
749 Gpu::streamSynchronize();
constexpr amrex::Real R_v
Definition: ERF_Constants.H:30
constexpr amrex::Real one
Definition: ERF_Constants.H:7
constexpr amrex::Real fourth
Definition: ERF_Constants.H:12
constexpr amrex::Real zero
Definition: ERF_Constants.H:6
constexpr amrex::Real myhalf
Definition: ERF_Constants.H:11
constexpr amrex::Real R_d
Definition: ERF_Constants.H:29
constexpr amrex::Real Gamma
Definition: ERF_Constants.H:38
@ v_x
Definition: ERF_DataStruct.H:24
@ u_y
Definition: ERF_DataStruct.H:25
@ v_y
Definition: ERF_DataStruct.H:25
@ m_y
Definition: ERF_DataStruct.H:25
@ u_x
Definition: ERF_DataStruct.H:24
@ m_x
Definition: ERF_DataStruct.H:24
#define PrimQ1_comp
Definition: ERF_IndexDefines.H:58
#define Rho_comp
Definition: ERF_IndexDefines.H:36
#define RhoTheta_comp
Definition: ERF_IndexDefines.H:37
#define PrimTheta_comp
Definition: ERF_IndexDefines.H:55
amrex::GpuArray< Real, AMREX_SPACEDIM > dxInv
Definition: ERF_InitCustomPertVels_ParticleTests.H:17
const Real dx
Definition: ERF_InitCustomPert_ABL.H:23
const int khi
Definition: ERF_InitCustomPert_Bubble.H:21
ParallelFor(grown_box, [=] AMREX_GPU_DEVICE(int i, int j, int k) { qrcuten_arr(i, j, k)=Real(0);qscuten_arr(i, j, k)=Real(0);qicuten_arr(i, j, k)=Real(0);})
amrex::Real Real
Definition: ERF_ShocInterface.H:19
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real OmegaFromW(int &i, int &j, int &k, amrex::Real w, const amrex::Array4< const amrex::Real > &u_arr, const amrex::Array4< const amrex::Real > &v_arr, const amrex::Array4< const amrex::Real > &mf_u, const amrex::Array4< const amrex::Real > &mf_v, const amrex::Array4< const amrex::Real > &z_nd, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &dxInv)
Definition: ERF_TerrainMetrics.H:414
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_xi_AtIface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:117
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_zeta_AtIface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:104
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_zeta_AtJface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:144
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_eta_AtJface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:170
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real WFromOmega(int &i, int &j, int &k, amrex::Real omega, const amrex::Array4< const amrex::Real > &u_arr, const amrex::Array4< const amrex::Real > &v_arr, const amrex::Array4< const amrex::Real > &mf_u, const amrex::Array4< const amrex::Real > &mf_v, const amrex::Array4< const amrex::Real > &z_nd, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &dxInv)
Definition: ERF_TerrainMetrics.H:464
AMREX_FORCE_INLINE amrex::IntVect TileNoZ()
Definition: ERF_TileNoZ.H:11
@ gpy
Definition: ERF_IndexDefines.H:185
@ gpx
Definition: ERF_IndexDefines.H:184
@ ymom
Definition: ERF_IndexDefines.H:194
@ cons
Definition: ERF_IndexDefines.H:192
@ zmom
Definition: ERF_IndexDefines.H:195
@ xmom
Definition: ERF_IndexDefines.H:193
@ qt
Definition: ERF_Kessler.H:28
@ qv
Definition: ERF_Kessler.H:29
@ q
Definition: ERF_WSM6.H:169