75 BL_PROFILE_REGION(
"erf_substep_T()");
77 const Box& domain = geom.Domain();
78 auto const domlo = lbound(domain);
79 auto const domhi = ubound(domain);
81 Real beta_1 = 0.5 * (1.0 - beta_s);
82 Real beta_2 = 0.5 * (1.0 + beta_s);
89 bool l_rayleigh_impl_for_w = (sinesq_stag_d !=
nullptr);
91 const Real* dx = geom.CellSize();
92 const GpuArray<Real, AMREX_SPACEDIM> dxInv = geom.InvCellSizeArray();
98 const auto& dm = S_stage_data[
IntVars::cons].DistributionMap();
100 MultiFab Delta_rho_u( convert(ba,IntVect(1,0,0)), dm, 1, 1);
101 MultiFab Delta_rho_v( convert(ba,IntVect(0,1,0)), dm, 1, 1);
102 MultiFab Delta_rho_w( convert(ba,IntVect(0,0,1)), dm, 1, IntVect(1,1,0));
103 MultiFab Delta_rho ( ba , dm, 1, 1);
104 MultiFab Delta_rho_theta( ba , dm, 1, 1);
106 MultiFab New_rho_u(convert(ba,IntVect(1,0,0)), dm, 1, 1);
107 MultiFab New_rho_v(convert(ba,IntVect(0,1,0)), dm, 1, 1);
109 MultiFab coeff_A_mf(fast_coeffs, make_alias, 0, 1);
110 MultiFab inv_coeff_B_mf(fast_coeffs, make_alias, 1, 1);
111 MultiFab coeff_C_mf(fast_coeffs, make_alias, 2, 1);
112 MultiFab coeff_P_mf(fast_coeffs, make_alias, 3, 1);
113 MultiFab coeff_Q_mf(fast_coeffs, make_alias, 4, 1);
117 const Array<Real,AMREX_SPACEDIM> grav{0.0, 0.0, -gravity};
118 const GpuArray<Real,AMREX_SPACEDIM> grav_gpu{grav[0], grav[1], grav[2]};
127 #pragma omp parallel if (Gpu::notInLaunchRegion())
129 for ( MFIter mfi(S_stage_data[
IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
131 const Array4<Real> & cur_cons = S_data[
IntVars::cons].array(mfi);
132 const Array4<const Real>& prev_cons = S_prev[
IntVars::cons].const_array(mfi);
133 const Array4<const Real>& stage_cons = S_stage_data[
IntVars::cons].const_array(mfi);
134 const Array4<Real>& lagged_arr = lagged_delta_rt.array(mfi);
136 const Array4<Real>& old_drho = Delta_rho.array(mfi);
137 const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
138 const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
139 const Array4<Real>& old_drho_w = Delta_rho_w.array(mfi);
140 const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
142 const Array4<const Real>& prev_xmom = S_prev[
IntVars::xmom].const_array(mfi);
143 const Array4<const Real>& prev_ymom = S_prev[
IntVars::ymom].const_array(mfi);
144 const Array4<const Real>& prev_zmom = S_prev[
IntVars::zmom].const_array(mfi);
146 const Array4<const Real>& stage_xmom = S_stage_data[
IntVars::xmom].const_array(mfi);
147 const Array4<const Real>& stage_ymom = S_stage_data[
IntVars::ymom].const_array(mfi);
148 const Array4<const Real>& stage_zmom = S_stage_data[
IntVars::zmom].const_array(mfi);
150 Box bx = mfi.validbox();
151 Box gbx = mfi.tilebox(); gbx.grow(1);
154 ParallelFor(gbx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
160 Box gtbx = mfi.nodaltilebox(0); gtbx.grow(IntVect(1,1,0));
161 Box gtby = mfi.nodaltilebox(1); gtby.grow(IntVect(1,1,0));
162 Box gtbz = mfi.nodaltilebox(2); gtbz.grow(IntVect(1,1,0));
164 const auto& bx_lo = lbound(bx);
165 const auto& bx_hi = ubound(bx);
167 ParallelFor(gtbx, gtby, gtbz,
168 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
169 old_drho_u(i,j,k) = prev_xmom(i,j,k) - stage_xmom(i,j,k);
170 if (k == bx_lo.z && k != domlo.z) {
171 old_drho_u(i,j,k-1) = old_drho_u(i,j,k);
172 }
else if (k == bx_hi.z) {
173 old_drho_u(i,j,k+1) = old_drho_u(i,j,k);
176 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
177 old_drho_v(i,j,k) = prev_ymom(i,j,k) - stage_ymom(i,j,k);
178 if (k == bx_lo.z && k != domlo.z) {
179 old_drho_v(i,j,k-1) = old_drho_v(i,j,k);
180 }
else if (k == bx_hi.z) {
181 old_drho_v(i,j,k+1) = old_drho_v(i,j,k);
184 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
185 old_drho_w(i,j,k) = prev_zmom(i,j,k) - stage_zmom(i,j,k);
188 const Array4<Real>& theta_extrap = extrap.array(mfi);
189 const Array4<const Real>& prim = S_stage_prim.const_array(mfi);
191 ParallelFor(gbx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
195 theta_extrap(i,j,k) = old_drho_theta(i,j,k);
197 theta_extrap(i,j,k) = old_drho_theta(i,j,k) + beta_d *
198 ( old_drho_theta(i,j,k) - lagged_arr(i,j,k) );
203 theta_extrap(i,j,k) *= (1.0 + RvOverRd*
qv);
208 #pragma omp parallel if (Gpu::notInLaunchRegion())
210 for ( MFIter mfi(S_stage_data[
IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
213 Box gbx = mfi.tilebox(); gbx.grow(1);
214 const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
215 const Array4<Real>& lagged_arr = lagged_delta_rt.array(mfi);
216 ParallelFor(gbx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
217 lagged_arr(i,j,k) = old_drho_theta(i,j,k);
226 #pragma omp parallel if (Gpu::notInLaunchRegion())
228 for ( MFIter mfi(S_stage_data[
IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
230 Box bx = mfi.validbox();
231 Box tbx = mfi.nodaltilebox(0);
232 Box tby = mfi.nodaltilebox(1);
234 const Array4<Real const>& xmom_src_arr = xmom_src.const_array(mfi);
235 const Array4<Real const>& ymom_src_arr = ymom_src.const_array(mfi);
237 const Array4<const Real> & stage_xmom = S_stage_data[
IntVars::xmom].const_array(mfi);
238 const Array4<const Real> & stage_ymom = S_stage_data[
IntVars::ymom].const_array(mfi);
239 const Array4<const Real> & qt_arr =
qt.const_array(mfi);
241 const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
242 const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
244 const Array4<const Real>& slow_rhs_rho_u = S_slow_rhs[
IntVars::xmom].const_array(mfi);
245 const Array4<const Real>& slow_rhs_rho_v = S_slow_rhs[
IntVars::ymom].const_array(mfi);
247 const Array4<Real>& new_drho_u = New_rho_u.array(mfi);
248 const Array4<Real>& new_drho_v = New_rho_v.array(mfi);
250 const Array4<Real>& cur_xmom = S_data[
IntVars::xmom].array(mfi);
251 const Array4<Real>& cur_ymom = S_data[
IntVars::ymom].array(mfi);
254 const Array4<Real>& avg_xmom_arr = avg_xmom.array(mfi);
255 const Array4<Real>& avg_ymom_arr = avg_ymom.array(mfi);
257 const Array4<const Real>& z_nd = z_phys_nd->const_array(mfi);
259 const Array4<const Real>& pi_stage_ca = pi_stage.const_array(mfi);
261 const Array4<Real>& theta_extrap = extrap.array(mfi);
264 const Array4<const Real>& mf_ux = mapfac[
MapFacType::u_x]->const_array(mfi);
265 const Array4<const Real>& mf_vy = mapfac[
MapFacType::v_y]->const_array(mfi);
277 BL_PROFILE(
"substep_xymom_T");
279 const auto& bx_lo = lbound(bx);
280 const auto& bx_hi = ubound(bx);
282 ParallelFor(tbx, tby,
283 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
288 Real gp_xi = (theta_extrap(i,j,k) - theta_extrap(i-1,j,k)) * dxi;
289 Real gp_zeta_on_iface = (k == 0) ?
290 0.5 * dzi * ( theta_extrap(i-1,j,k+1) + theta_extrap(i,j,k+1)
291 -theta_extrap(i-1,j,k ) - theta_extrap(i,j,k ) ) :
292 0.25 * dzi * ( theta_extrap(i-1,j,k+1) + theta_extrap(i,j,k+1)
293 -theta_extrap(i-1,j,k-1) - theta_extrap(i,j,k-1) );
294 Real gpx = gp_xi - (met_h_xi / met_h_zeta) * gp_zeta_on_iface;
297 Real q = (l_use_moisture) ? 0.5 * (qt_arr(i,j,k) + qt_arr(i-1,j,k)) : 0.0;
299 Real pi_c = 0.5 * (pi_stage_ca(i-1,j,k,0) + pi_stage_ca(i ,j,k,0));
302 new_drho_u(i, j, k) = old_drho_u(i,j,k) + dtau * fast_rhs_rho_u
303 + dtau * slow_rhs_rho_u(i,j,k)
304 + dtau * xmom_src_arr(i,j,k);
305 if (k == bx_lo.z && k != domlo.z) {
306 new_drho_u(i,j,k-1) = new_drho_u(i,j,k);
307 }
else if (k == bx_hi.z) {
308 new_drho_u(i,j,k+1) = new_drho_u(i,j,k);
311 avg_xmom_arr(i,j,k) += facinv*new_drho_u(i,j,k);
313 cur_xmom(i,j,k) = stage_xmom(i,j,k) + new_drho_u(i,j,k);
315 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
320 Real gp_eta = (theta_extrap(i,j,k) -theta_extrap(i,j-1,k)) * dyi;
321 Real gp_zeta_on_jface = (k == 0) ?
322 0.5 * dzi * ( theta_extrap(i,j,k+1) + theta_extrap(i,j-1,k+1)
323 -theta_extrap(i,j,k ) - theta_extrap(i,j-1,k ) ) :
324 0.25 * dzi * ( theta_extrap(i,j,k+1) + theta_extrap(i,j-1,k+1)
325 -theta_extrap(i,j,k-1) - theta_extrap(i,j-1,k-1) );
326 Real gpy = gp_eta - (met_h_eta / met_h_zeta) * gp_zeta_on_jface;
329 Real q = (l_use_moisture) ? 0.5 * (qt_arr(i,j,k) + qt_arr(i,j-1,k)) : 0.0;
331 Real pi_c = 0.5 * (pi_stage_ca(i,j-1,k,0) + pi_stage_ca(i,j ,k,0));
334 new_drho_v(i, j, k) = old_drho_v(i,j,k) + dtau * fast_rhs_rho_v
335 + dtau * slow_rhs_rho_v(i,j,k)
336 + dtau * ymom_src_arr(i,j,k);
338 if (k == bx_lo.z && k != domlo.z) {
339 new_drho_v(i,j,k-1) = new_drho_v(i,j,k);
340 }
else if (k == bx_hi.z) {
341 new_drho_v(i,j,k+1) = new_drho_v(i,j,k);
344 avg_ymom_arr(i,j,k) += facinv*new_drho_v(i,j,k);
346 cur_ymom(i,j,k) = stage_ymom(i,j,k) + new_drho_v(i,j,k);
354 #pragma omp parallel if (Gpu::notInLaunchRegion())
357 std::array<FArrayBox,AMREX_SPACEDIM> flux;
360 Box bx = mfi.tilebox();
361 Box tbz = surroundingNodes(bx,2);
363 Box vbx = mfi.validbox();
364 const auto& vbx_hi = ubound(vbx);
366 const Array4<Real const>& zmom_src_arr = zmom_src.const_array(mfi);
367 const Array4<Real const>& cc_src_arr = cc_src.const_array(mfi);
369 const Array4<const Real> & stage_zmom = S_stage_data[
IntVars::zmom].const_array(mfi);
370 const Array4<const Real> & prim = S_stage_prim.const_array(mfi);
371 const Array4<const Real> & qt_arr =
qt.const_array(mfi);
373 const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
374 const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
375 const Array4<Real>& old_drho_w = Delta_rho_w.array(mfi);
376 const Array4<Real>& old_drho = Delta_rho.array(mfi);
377 const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
379 const Array4<const Real>& slow_rhs_cons = S_slow_rhs[
IntVars::cons].const_array(mfi);
380 const Array4<const Real>& slow_rhs_rho_w = S_slow_rhs[
IntVars::zmom].const_array(mfi);
382 const Array4<Real>& new_drho_u = New_rho_u.array(mfi);
383 const Array4<Real>& new_drho_v = New_rho_v.array(mfi);
385 const Array4<Real>& cur_cons = S_data[
IntVars::cons].array(mfi);
386 const Array4<Real>& cur_zmom = S_data[
IntVars::zmom].array(mfi);
389 const Array4<Real>& avg_zmom_arr = avg_zmom.array(mfi);
391 const Array4<const Real>& z_nd = z_phys_nd->const_array(mfi);
392 const Array4<const Real>& detJ = detJ_cc->const_array(mfi);
394 const Array4< Real>& omega_arr = Omega.array(mfi);
397 const Array4<const Real>& mf_mx = mapfac[
MapFacType::m_x]->const_array(mfi);
398 const Array4<const Real>& mf_my = mapfac[
MapFacType::m_y]->const_array(mfi);
399 const Array4<const Real>& mf_ux = mapfac[
MapFacType::u_x]->const_array(mfi);
400 const Array4<const Real>& mf_uy = mapfac[
MapFacType::u_y]->const_array(mfi);
401 const Array4<const Real>& mf_vx = mapfac[
MapFacType::v_x]->const_array(mfi);
402 const Array4<const Real>& mf_vy = mapfac[
MapFacType::v_y]->const_array(mfi);
410 FArrayBox temp_rhs_fab;
414 RHS_fab.resize (tbz,1,The_Async_Arena());
415 soln_fab.resize (tbz,1,The_Async_Arena());
416 temp_rhs_fab.resize(tbz,2,The_Async_Arena());
418 auto const& RHS_a = RHS_fab.array();
419 auto const& soln_a = soln_fab.array();
420 auto const& temp_rhs_arr = temp_rhs_fab.array();
422 auto const& coeffA_a = coeff_A_mf.array(mfi);
423 auto const& inv_coeffB_a = inv_coeff_B_mf.array(mfi);
424 auto const& coeffC_a = coeff_C_mf.array(mfi);
425 auto const& coeffP_a = coeff_P_mf.array(mfi);
426 auto const& coeffQ_a = coeff_Q_mf.array(mfi);
431 for (
int dir = 0; dir < AMREX_SPACEDIM; ++dir) {
432 flux[dir].resize(surroundingNodes(bx,dir),2,The_Async_Arena());
433 flux[dir].setVal<RunOn::Device>(0.);
435 const GpuArray<const Array4<Real>, AMREX_SPACEDIM>
436 flx_arr{{AMREX_D_DECL(flux[0].array(), flux[1].array(), flux[2].array())}};
440 BL_PROFILE(
"fast_T_making_rho_rhs");
441 ParallelFor(bx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
442 Real h_zeta_cc_xface_hi = 0.5 * dzi *
443 ( z_nd(i+1,j ,k+1) + z_nd(i+1,j+1,k+1)
444 -z_nd(i+1,j ,k ) - z_nd(i+1,j+1,k ) );
446 Real h_zeta_cc_xface_lo = 0.5 * dzi *
447 ( z_nd(i ,j ,k+1) + z_nd(i ,j+1,k+1)
448 -z_nd(i ,j ,k ) - z_nd(i ,j+1,k ) );
450 Real h_zeta_cc_yface_hi = 0.5 * dzi *
451 ( z_nd(i ,j+1,k+1) + z_nd(i+1,j+1,k+1)
452 -z_nd(i ,j+1,k ) - z_nd(i+1,j+1,k ) );
454 Real h_zeta_cc_yface_lo = 0.5 * dzi *
455 ( z_nd(i ,j ,k+1) + z_nd(i+1,j ,k+1)
456 -z_nd(i ,j ,k ) - z_nd(i+1,j ,k ) );
458 Real xflux_lo = new_drho_u(i ,j,k)*h_zeta_cc_xface_lo / mf_uy(i ,j,0);
459 Real xflux_hi = new_drho_u(i+1,j,k)*h_zeta_cc_xface_hi / mf_uy(i+1,j,0);
460 Real yflux_lo = new_drho_v(i,j ,k)*h_zeta_cc_yface_lo / mf_vx(i,j ,0);
461 Real yflux_hi = new_drho_v(i,j+1,k)*h_zeta_cc_yface_hi / mf_vx(i,j+1,0);
463 Real mfsq = mf_mx(i,j,0) * mf_my(i,j,0);
466 temp_rhs_arr(i,j,k,0) = ( xflux_hi - xflux_lo ) * dxi * mfsq +
467 ( yflux_hi - yflux_lo ) * dyi * mfsq;
468 temp_rhs_arr(i,j,k,1) = (( xflux_hi * (prim(i,j,k,0) + prim(i+1,j,k,0)) -
469 xflux_lo * (prim(i,j,k,0) + prim(i-1,j,k,0)) ) * dxi * mfsq+
470 ( yflux_hi * (prim(i,j,k,0) + prim(i,j+1,k,0)) -
471 yflux_lo * (prim(i,j,k,0) + prim(i,j-1,k,0)) ) * dyi * mfsq) * 0.5;
474 (flx_arr[0])(i,j,k,0) = xflux_lo;
475 (flx_arr[0])(i,j,k,1) = (flx_arr[0])(i ,j,k,0) * 0.5 * (prim(i,j,k,0) + prim(i-1,j,k,0));
477 (flx_arr[1])(i,j,k,0) = yflux_lo;
478 (flx_arr[1])(i,j,k,1) = (flx_arr[1])(i,j ,k,0) * 0.5 * (prim(i,j,k,0) + prim(i,j-1,k,0));
481 (flx_arr[0])(i+1,j,k,0) = xflux_hi;
482 (flx_arr[0])(i+1,j,k,1) = (flx_arr[0])(i+1,j,k,0) * 0.5 * (prim(i,j,k,0) + prim(i+1,j,k,0));
485 (flx_arr[1])(i,j+1,k,0) = yflux_hi;
486 (flx_arr[1])(i,j+1,k,1) = (flx_arr[1])(i,j+1,k,0) * 0.5 * (prim(i,j,k,0) + prim(i,j+1,k,0));
494 Box gbxo = mfi.nodaltilebox(2);
497 if (gbxo.smallEnd(2) == domlo.z) {
498 Box gbxo_lo = gbxo; gbxo_lo.setBig(2,gbxo.smallEnd(2));
499 gbxo_mid.setSmall(2,gbxo.smallEnd(2)+1);
500 ParallelFor(gbxo_lo, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
501 omega_arr(i,j,k) = 0.;
504 if (gbxo.bigEnd(2) == domhi.z+1) {
505 Box gbxo_hi = gbxo; gbxo_hi.setSmall(2,gbxo.bigEnd(2));
506 gbxo_mid.setBig(2,gbxo.bigEnd(2)-1);
507 ParallelFor(gbxo_hi, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
508 omega_arr(i,j,k) = old_drho_w(i,j,k);
511 ParallelFor(gbxo_mid, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
512 omega_arr(i,j,k) =
OmegaFromW(i,j,k,old_drho_w(i,j,k),
513 old_drho_u,old_drho_v,
514 mf_ux,mf_vy,z_nd,dxInv);
519 Box bx_shrunk_in_k = bx;
520 int klo = tbz.smallEnd(2);
521 int khi = tbz.bigEnd(2);
522 bx_shrunk_in_k.setSmall(2,klo+1);
523 bx_shrunk_in_k.setBig(2,khi-1);
528 Real halfg = std::abs(0.5 * grav_gpu[2]);
531 BL_PROFILE(
"fast_loop_on_shrunk_t");
533 ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
535 Real q = (l_use_moisture) ? 0.5 * (qt_arr(i,j,k) + qt_arr(i,j,k-1)) : 0.0;
537 Real coeff_P = coeffP_a(i,j,k) / (1.0 + q);
538 Real coeff_Q = coeffQ_a(i,j,k) / (1.0 + q);
545 Real R0_tmp = -halfg * old_drho(i,j,k ) + coeff_P * old_drho_theta(i,j,k )
546 -halfg * old_drho(i,j,k-1) + coeff_Q * old_drho_theta(i,j,k-1);
553 Real Omega_kp1 = omega_arr(i,j,k+1);
554 Real Omega_k = omega_arr(i,j,k );
555 Real Omega_km1 = omega_arr(i,j,k-1);
557 Real detJdiff = (detJ(i,j,k) - detJ(i,j,k-1)) / (detJ(i,j,k)*detJ(i,j,k-1));
560 R1_tmp += halfg * ( beta_1 * dzi * (Omega_kp1/detJ(i,j,k) + detJdiff*Omega_k - Omega_km1/detJ(i,j,k-1))
561 + temp_rhs_arr(i,j,k,
Rho_comp)/detJ(i,j,k) + temp_rhs_arr(i,j,k-1,
Rho_comp)/detJ(i,j,k-1) );
564 R1_tmp += -( coeff_P/detJ(i,j,k ) * ( beta_1 * dzi * (Omega_kp1*theta_t_hi - Omega_k*theta_t_mid) + temp_rhs_arr(i,j,k ,
RhoTheta_comp) )
565 + coeff_Q/detJ(i,j,k-1) * ( beta_1 * dzi * (Omega_k*theta_t_mid - Omega_km1*theta_t_lo) + temp_rhs_arr(i,j,k-1,
RhoTheta_comp) ) );
568 RHS_a(i,j,k) = old_drho_w(i,j,k)
569 + dtau * (slow_rhs_rho_w(i,j,k) + zmom_src_arr(i,j,k) + R0_tmp + dtau*beta_2*R1_tmp);
573 new_drho_u,new_drho_v,
574 mf_ux,mf_vy,z_nd,dxInv);
581 auto const lo = lbound(bx);
582 auto const hi = ubound(bx);
585 BL_PROFILE(
"substep_b2d_loop_t");
587 ParallelFor(b2d, [=] AMREX_GPU_DEVICE (
int i,
int j,
int)
590 RHS_a(i,j,lo.z ) = dtau * (slow_rhs_rho_w(i,j,lo.z) + zmom_src_arr(i,j,lo.z));
591 RHS_a(i,j,hi.z+1) = dtau * (slow_rhs_rho_w(i,j,hi.z+1) + zmom_src_arr(i,j,hi.z+1));
594 soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
596 for (
int k = lo.z+1; k <= hi.z+1; k++) {
597 soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
600 cur_zmom(i,j,lo.z ) = stage_zmom(i,j,lo.z ) + soln_a(i,j,lo.z );
601 cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
603 for (
int k = hi.z; k >= lo.z; k--) {
604 soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) *soln_a(i,j,k+1);
608 for (
int j = lo.y; j <= hi.y; ++j) {
610 for (
int i = lo.x; i <= hi.x; ++i)
612 RHS_a(i,j,lo.z) = dtau * (slow_rhs_rho_w(i,j,lo.z) + zmom_src_arr(i,j,lo.z));
613 soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
617 for (
int i = lo.x; i <= hi.x; ++i)
619 RHS_a(i,j,hi.z+1) = dtau * (slow_rhs_rho_w(i,j,hi.z+1) + zmom_src_arr(i,j,hi.z+1));
620 soln_a(i,j,hi.z+1) = RHS_a(i,j,hi.z+1) * inv_coeffB_a(i,j,hi.z+1);
624 for (
int k = lo.z+1; k <= hi.z; ++k) {
625 for (
int j = lo.y; j <= hi.y; ++j) {
627 for (
int i = lo.x; i <= hi.x; ++i) {
628 soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
632 for (
int k = hi.z; k > lo.z; --k) {
633 for (
int j = lo.y; j <= hi.y; ++j) {
635 for (
int i = lo.x; i <= hi.x; ++i) {
636 soln_a(i,j,k) -= (coeffC_a(i,j,k) * inv_coeffB_a(i,j,k)) * soln_a(i,j,k+1);
640 if (hi.z == domhi.z) {
641 for (
int j = lo.y; j <= hi.y; ++j) {
643 for (
int i = lo.x; i <= hi.x; ++i) {
644 cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
651 ParallelFor(tbz, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
653 cur_zmom(i,j,k) = stage_zmom(i,j,k);
656 if (lo.z == domlo.z) {
657 tbz.setSmall(2,domlo.z+1);
659 if (hi.z == domhi.z) {
660 tbz.setBig(2,domhi.z);
662 ParallelFor(tbz, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
665 new_drho_u,new_drho_v,
666 mf_ux,mf_vy,z_nd,dxInv);
668 cur_zmom(i,j,k) += wpp;
670 if (l_rayleigh_impl_for_w) {
671 Real damping_coeff = l_damp_coef * dtau * sinesq_stag_d[k];
672 cur_zmom(i,j,k) /= (1.0 + damping_coeff);
680 BL_PROFILE(
"fast_rho_final_update");
681 ParallelFor(bx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept
683 Real zflux_lo = beta_2 * soln_a(i,j,k ) + beta_1 * omega_arr(i,j,k);
684 Real zflux_hi = beta_2 * soln_a(i,j,k+1) + beta_1 * omega_arr(i,j,k+1);
688 avg_zmom_arr(i,j,k) += facinv*zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0));
690 (flx_arr[2])(i,j,k,0) = zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0));
694 avg_zmom_arr(i,j,k+1) += facinv * zflux_hi / (mf_mx(i,j,0) * mf_my(i,j,0));
696 (flx_arr[2])(i,j,k+1,0) = zflux_hi / (mf_mx(i,j,0) * mf_my(i,j,0));
697 (flx_arr[2])(i,j,k+1,1) = (flx_arr[2])(i,j,k+1,0) * 0.5 * (prim(i,j,k) + prim(i,j,k+1));
701 Real fast_rhs_rho = -(temp_rhs_arr(i,j,k,0) + ( zflux_hi - zflux_lo ) * dzi) / detJ(i,j,k);
702 cur_cons(i,j,k,0) += dtau * (slow_rhs_cons(i,j,k,0) + fast_rhs_rho);
704 Real fast_rhs_rhotheta = -( temp_rhs_arr(i,j,k,1) + 0.5 *
705 ( zflux_hi * (prim(i,j,k) + prim(i,j,k+1)) -
706 zflux_lo * (prim(i,j,k) + prim(i,j,k-1)) ) * dzi ) / detJ(i,j,k);
708 cur_cons(i,j,k,1) += dtau * (slow_rhs_cons(i,j,k,1) + fast_rhs_rhotheta);
711 (flx_arr[2])(i,j,k,1) = (flx_arr[2])(i,j,k,0) * 0.5 * (prim(i,j,k) + prim(i,j,k-1));
722 int strt_comp_reflux = 0;
724 int num_comp_reflux = 1;
725 if (level < finest_level) {
726 fr_as_crse->CrseAdd(mfi,
727 {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
728 dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
731 fr_as_fine->FineAdd(mfi,
732 {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
733 dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
739 Gpu::streamSynchronize();
constexpr amrex::Real R_v
Definition: ERF_Constants.H:11
constexpr amrex::Real R_d
Definition: ERF_Constants.H:10
constexpr amrex::Real Gamma
Definition: ERF_Constants.H:19
@ v_x
Definition: ERF_DataStruct.H:23
@ u_y
Definition: ERF_DataStruct.H:24
@ v_y
Definition: ERF_DataStruct.H:24
@ m_y
Definition: ERF_DataStruct.H:24
@ u_x
Definition: ERF_DataStruct.H:23
@ m_x
Definition: ERF_DataStruct.H:23
#define PrimQ1_comp
Definition: ERF_IndexDefines.H:53
#define Rho_comp
Definition: ERF_IndexDefines.H:36
#define RhoTheta_comp
Definition: ERF_IndexDefines.H:37
#define PrimTheta_comp
Definition: ERF_IndexDefines.H:50
amrex::Real Real
Definition: ERF_ShocInterface.H:19
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real OmegaFromW(int &i, int &j, int &k, amrex::Real w, const amrex::Array4< const amrex::Real > &u_arr, const amrex::Array4< const amrex::Real > &v_arr, const amrex::Array4< const amrex::Real > &mf_u, const amrex::Array4< const amrex::Real > &mf_v, const amrex::Array4< const amrex::Real > &z_nd, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &dxInv)
Definition: ERF_TerrainMetrics.H:412
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_xi_AtIface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:115
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_zeta_AtIface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:102
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_zeta_AtJface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:142
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_eta_AtJface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:168
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real WFromOmega(int &i, int &j, int &k, amrex::Real omega, const amrex::Array4< const amrex::Real > &u_arr, const amrex::Array4< const amrex::Real > &v_arr, const amrex::Array4< const amrex::Real > &mf_u, const amrex::Array4< const amrex::Real > &mf_v, const amrex::Array4< const amrex::Real > &z_nd, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &dxInv)
Definition: ERF_TerrainMetrics.H:462
AMREX_FORCE_INLINE amrex::IntVect TileNoZ()
Definition: ERF_TileNoZ.H:11
@ gpy
Definition: ERF_IndexDefines.H:151
@ gpx
Definition: ERF_IndexDefines.H:150
@ ymom
Definition: ERF_IndexDefines.H:160
@ cons
Definition: ERF_IndexDefines.H:158
@ zmom
Definition: ERF_IndexDefines.H:161
@ xmom
Definition: ERF_IndexDefines.H:159
@ qt
Definition: ERF_Kessler.H:27
@ qv
Definition: ERF_Kessler.H:28