74 BL_PROFILE_REGION(
"erf_fast_rhs_T()");
76 const Box& domain = geom.Domain();
77 auto const domlo = lbound(domain);
78 auto const domhi = ubound(domain);
80 Real beta_1 = 0.5 * (1.0 - beta_s);
81 Real beta_2 = 0.5 * (1.0 + beta_s);
88 const Real* dx = geom.CellSize();
89 const GpuArray<Real, AMREX_SPACEDIM> dxInv = geom.InvCellSizeArray();
95 const auto& dm = S_stage_data[
IntVars::cons].DistributionMap();
97 MultiFab Delta_rho_u( convert(ba,IntVect(1,0,0)), dm, 1, 1);
98 MultiFab Delta_rho_v( convert(ba,IntVect(0,1,0)), dm, 1, 1);
99 MultiFab Delta_rho_w( convert(ba,IntVect(0,0,1)), dm, 1, IntVect(1,1,0));
100 MultiFab Delta_rho ( ba , dm, 1, 1);
101 MultiFab Delta_rho_theta( ba , dm, 1, 1);
103 MultiFab New_rho_u(convert(ba,IntVect(1,0,0)), dm, 1, 1);
104 MultiFab New_rho_v(convert(ba,IntVect(0,1,0)), dm, 1, 1);
106 MultiFab coeff_A_mf(fast_coeffs, make_alias, 0, 1);
107 MultiFab inv_coeff_B_mf(fast_coeffs, make_alias, 1, 1);
108 MultiFab coeff_C_mf(fast_coeffs, make_alias, 2, 1);
109 MultiFab coeff_P_mf(fast_coeffs, make_alias, 3, 1);
110 MultiFab coeff_Q_mf(fast_coeffs, make_alias, 4, 1);
114 const Array<Real,AMREX_SPACEDIM> grav{0.0, 0.0, -gravity};
115 const GpuArray<Real,AMREX_SPACEDIM> grav_gpu{grav[0], grav[1], grav[2]};
124 #pragma omp parallel if (Gpu::notInLaunchRegion())
126 for ( MFIter mfi(S_stage_data[
IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
128 const Array4<Real> & cur_cons = S_data[
IntVars::cons].array(mfi);
129 const Array4<const Real>& prev_cons = S_prev[
IntVars::cons].const_array(mfi);
130 const Array4<const Real>& stage_cons = S_stage_data[
IntVars::cons].const_array(mfi);
131 const Array4<Real>& lagged_arr = lagged_delta_rt.array(mfi);
133 const Array4<Real>& old_drho = Delta_rho.array(mfi);
134 const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
135 const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
136 const Array4<Real>& old_drho_w = Delta_rho_w.array(mfi);
137 const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
139 const Array4<const Real>& prev_xmom = S_prev[
IntVars::xmom].const_array(mfi);
140 const Array4<const Real>& prev_ymom = S_prev[
IntVars::ymom].const_array(mfi);
141 const Array4<const Real>& prev_zmom = S_prev[
IntVars::zmom].const_array(mfi);
143 const Array4<const Real>& stage_xmom = S_stage_data[
IntVars::xmom].const_array(mfi);
144 const Array4<const Real>& stage_ymom = S_stage_data[
IntVars::ymom].const_array(mfi);
145 const Array4<const Real>& stage_zmom = S_stage_data[
IntVars::zmom].const_array(mfi);
147 Box bx = mfi.validbox();
148 Box gbx = mfi.tilebox(); gbx.grow(1);
151 ParallelFor(gbx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
157 Box gtbx = mfi.nodaltilebox(0); gtbx.grow(IntVect(1,1,0));
158 Box gtby = mfi.nodaltilebox(1); gtby.grow(IntVect(1,1,0));
159 Box gtbz = mfi.nodaltilebox(2); gtbz.grow(IntVect(1,1,0));
161 const auto& bx_lo = lbound(bx);
162 const auto& bx_hi = ubound(bx);
164 ParallelFor(gtbx, gtby, gtbz,
165 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
166 old_drho_u(i,j,k) = prev_xmom(i,j,k) - stage_xmom(i,j,k);
167 if (k == bx_lo.z && k != domlo.z) {
168 old_drho_u(i,j,k-1) = old_drho_u(i,j,k);
169 }
else if (k == bx_hi.z) {
170 old_drho_u(i,j,k+1) = old_drho_u(i,j,k);
173 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
174 old_drho_v(i,j,k) = prev_ymom(i,j,k) - stage_ymom(i,j,k);
175 if (k == bx_lo.z && k != domlo.z) {
176 old_drho_v(i,j,k-1) = old_drho_v(i,j,k);
177 }
else if (k == bx_hi.z) {
178 old_drho_v(i,j,k+1) = old_drho_v(i,j,k);
181 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
182 old_drho_w(i,j,k) = prev_zmom(i,j,k) - stage_zmom(i,j,k);
185 const Array4<Real>& theta_extrap = extrap.array(mfi);
186 const Array4<const Real>& prim = S_stage_prim.const_array(mfi);
188 ParallelFor(gbx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
192 theta_extrap(i,j,k) = old_drho_theta(i,j,k);
194 theta_extrap(i,j,k) = old_drho_theta(i,j,k) + beta_d *
195 ( old_drho_theta(i,j,k) - lagged_arr(i,j,k) );
200 theta_extrap(i,j,k) *= (1.0 + RvOverRd*
qv);
205 #pragma omp parallel if (Gpu::notInLaunchRegion())
207 for ( MFIter mfi(S_stage_data[
IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
210 Box gbx = mfi.tilebox(); gbx.grow(1);
211 const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
212 const Array4<Real>& lagged_arr = lagged_delta_rt.array(mfi);
213 ParallelFor(gbx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
214 lagged_arr(i,j,k) = old_drho_theta(i,j,k);
223 #pragma omp parallel if (Gpu::notInLaunchRegion())
225 for ( MFIter mfi(S_stage_data[
IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
227 Box bx = mfi.validbox();
228 Box tbx = mfi.nodaltilebox(0);
229 Box tby = mfi.nodaltilebox(1);
231 const Array4<Real const>& xmom_src_arr = xmom_src.const_array(mfi);
232 const Array4<Real const>& ymom_src_arr = ymom_src.const_array(mfi);
234 const Array4<const Real> & stage_xmom = S_stage_data[
IntVars::xmom].const_array(mfi);
235 const Array4<const Real> & stage_ymom = S_stage_data[
IntVars::ymom].const_array(mfi);
236 const Array4<const Real> & qt_arr =
qt.const_array(mfi);
238 const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
239 const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
241 const Array4<const Real>& slow_rhs_rho_u = S_slow_rhs[
IntVars::xmom].const_array(mfi);
242 const Array4<const Real>& slow_rhs_rho_v = S_slow_rhs[
IntVars::ymom].const_array(mfi);
244 const Array4<Real>& new_drho_u = New_rho_u.array(mfi);
245 const Array4<Real>& new_drho_v = New_rho_v.array(mfi);
247 const Array4<Real>& cur_xmom = S_data[
IntVars::xmom].array(mfi);
248 const Array4<Real>& cur_ymom = S_data[
IntVars::ymom].array(mfi);
251 const Array4<Real>& avg_xmom_arr = avg_xmom.array(mfi);
252 const Array4<Real>& avg_ymom_arr = avg_ymom.array(mfi);
254 const Array4<const Real>& z_nd = z_phys_nd->const_array(mfi);
256 const Array4<const Real>& pi_stage_ca = pi_stage.const_array(mfi);
258 const Array4<Real>& theta_extrap = extrap.array(mfi);
261 const Array4<const Real>& mf_ux = mapfac[
MapFacType::u_x]->const_array(mfi);
262 const Array4<const Real>& mf_vy = mapfac[
MapFacType::v_y]->const_array(mfi);
274 BL_PROFILE(
"fast_rhs_xymom_T");
276 const auto& bx_lo = lbound(bx);
277 const auto& bx_hi = ubound(bx);
279 ParallelFor(tbx, tby,
280 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
285 Real gp_xi = (theta_extrap(i,j,k) - theta_extrap(i-1,j,k)) * dxi;
286 Real gp_zeta_on_iface = (k == 0) ?
287 0.5 * dzi * ( theta_extrap(i-1,j,k+1) + theta_extrap(i,j,k+1)
288 -theta_extrap(i-1,j,k ) - theta_extrap(i,j,k ) ) :
289 0.25 * dzi * ( theta_extrap(i-1,j,k+1) + theta_extrap(i,j,k+1)
290 -theta_extrap(i-1,j,k-1) - theta_extrap(i,j,k-1) );
291 Real gpx = gp_xi - (met_h_xi / met_h_zeta) * gp_zeta_on_iface;
294 Real q = (l_use_moisture) ? 0.5 * (qt_arr(i,j,k) + qt_arr(i-1,j,k)) : 0.0;
296 Real pi_c = 0.5 * (pi_stage_ca(i-1,j,k,0) + pi_stage_ca(i ,j,k,0));
299 new_drho_u(i, j, k) = old_drho_u(i,j,k) + dtau * fast_rhs_rho_u
300 + dtau * slow_rhs_rho_u(i,j,k)
301 + dtau * xmom_src_arr(i,j,k);
302 if (k == bx_lo.z && k != domlo.z) {
303 new_drho_u(i,j,k-1) = new_drho_u(i,j,k);
304 }
else if (k == bx_hi.z) {
305 new_drho_u(i,j,k+1) = new_drho_u(i,j,k);
308 avg_xmom_arr(i,j,k) += facinv*new_drho_u(i,j,k);
310 cur_xmom(i,j,k) = stage_xmom(i,j,k) + new_drho_u(i,j,k);
312 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
317 Real gp_eta = (theta_extrap(i,j,k) -theta_extrap(i,j-1,k)) * dyi;
318 Real gp_zeta_on_jface = (k == 0) ?
319 0.5 * dzi * ( theta_extrap(i,j,k+1) + theta_extrap(i,j-1,k+1)
320 -theta_extrap(i,j,k ) - theta_extrap(i,j-1,k ) ) :
321 0.25 * dzi * ( theta_extrap(i,j,k+1) + theta_extrap(i,j-1,k+1)
322 -theta_extrap(i,j,k-1) - theta_extrap(i,j-1,k-1) );
323 Real gpy = gp_eta - (met_h_eta / met_h_zeta) * gp_zeta_on_jface;
326 Real q = (l_use_moisture) ? 0.5 * (qt_arr(i,j,k) + qt_arr(i,j-1,k)) : 0.0;
328 Real pi_c = 0.5 * (pi_stage_ca(i,j-1,k,0) + pi_stage_ca(i,j ,k,0));
331 new_drho_v(i, j, k) = old_drho_v(i,j,k) + dtau * fast_rhs_rho_v
332 + dtau * slow_rhs_rho_v(i,j,k)
333 + dtau * ymom_src_arr(i,j,k);
335 if (k == bx_lo.z && k != domlo.z) {
336 new_drho_v(i,j,k-1) = new_drho_v(i,j,k);
337 }
else if (k == bx_hi.z) {
338 new_drho_v(i,j,k+1) = new_drho_v(i,j,k);
341 avg_ymom_arr(i,j,k) += facinv*new_drho_v(i,j,k);
343 cur_ymom(i,j,k) = stage_ymom(i,j,k) + new_drho_v(i,j,k);
351 #pragma omp parallel if (Gpu::notInLaunchRegion())
354 std::array<FArrayBox,AMREX_SPACEDIM> flux;
357 Box bx = mfi.tilebox();
358 Box tbz = surroundingNodes(bx,2);
360 Box vbx = mfi.validbox();
361 const auto& vbx_hi = ubound(vbx);
363 const Array4<Real const>& zmom_src_arr = zmom_src.const_array(mfi);
364 const Array4<Real const>& cc_src_arr = cc_src.const_array(mfi);
366 const Array4<const Real> & stage_zmom = S_stage_data[
IntVars::zmom].const_array(mfi);
367 const Array4<const Real> & prim = S_stage_prim.const_array(mfi);
368 const Array4<const Real> & qt_arr =
qt.const_array(mfi);
370 const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
371 const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
372 const Array4<Real>& old_drho_w = Delta_rho_w.array(mfi);
373 const Array4<Real>& old_drho = Delta_rho.array(mfi);
374 const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
376 const Array4<const Real>& slow_rhs_cons = S_slow_rhs[
IntVars::cons].const_array(mfi);
377 const Array4<const Real>& slow_rhs_rho_w = S_slow_rhs[
IntVars::zmom].const_array(mfi);
379 const Array4<Real>& new_drho_u = New_rho_u.array(mfi);
380 const Array4<Real>& new_drho_v = New_rho_v.array(mfi);
382 const Array4<Real>& cur_cons = S_data[
IntVars::cons].array(mfi);
383 const Array4<Real>& cur_zmom = S_data[
IntVars::zmom].array(mfi);
386 const Array4<Real>& avg_zmom_arr = avg_zmom.array(mfi);
388 const Array4<const Real>& z_nd = z_phys_nd->const_array(mfi);
389 const Array4<const Real>& detJ = detJ_cc->const_array(mfi);
391 const Array4< Real>& omega_arr = Omega.array(mfi);
394 const Array4<const Real>& mf_mx = mapfac[
MapFacType::m_x]->const_array(mfi);
395 const Array4<const Real>& mf_my = mapfac[
MapFacType::m_y]->const_array(mfi);
396 const Array4<const Real>& mf_ux = mapfac[
MapFacType::u_x]->const_array(mfi);
397 const Array4<const Real>& mf_uy = mapfac[
MapFacType::u_y]->const_array(mfi);
398 const Array4<const Real>& mf_vx = mapfac[
MapFacType::v_x]->const_array(mfi);
399 const Array4<const Real>& mf_vy = mapfac[
MapFacType::v_y]->const_array(mfi);
407 FArrayBox temp_rhs_fab;
411 RHS_fab.resize (tbz,1,The_Async_Arena());
412 soln_fab.resize (tbz,1,The_Async_Arena());
413 temp_rhs_fab.resize(tbz,2,The_Async_Arena());
415 auto const& RHS_a = RHS_fab.array();
416 auto const& soln_a = soln_fab.array();
417 auto const& temp_rhs_arr = temp_rhs_fab.array();
419 auto const& coeffA_a = coeff_A_mf.array(mfi);
420 auto const& inv_coeffB_a = inv_coeff_B_mf.array(mfi);
421 auto const& coeffC_a = coeff_C_mf.array(mfi);
422 auto const& coeffP_a = coeff_P_mf.array(mfi);
423 auto const& coeffQ_a = coeff_Q_mf.array(mfi);
428 for (
int dir = 0; dir < AMREX_SPACEDIM; ++dir) {
429 flux[dir].resize(surroundingNodes(bx,dir),2);
430 flux[dir].setVal<RunOn::Device>(0.);
432 const GpuArray<const Array4<Real>, AMREX_SPACEDIM>
433 flx_arr{{AMREX_D_DECL(flux[0].array(), flux[1].array(), flux[2].array())}};
437 BL_PROFILE(
"fast_T_making_rho_rhs");
438 ParallelFor(bx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
439 Real h_zeta_cc_xface_hi = 0.5 * dzi *
440 ( z_nd(i+1,j ,k+1) + z_nd(i+1,j+1,k+1)
441 -z_nd(i+1,j ,k ) - z_nd(i+1,j+1,k ) );
443 Real h_zeta_cc_xface_lo = 0.5 * dzi *
444 ( z_nd(i ,j ,k+1) + z_nd(i ,j+1,k+1)
445 -z_nd(i ,j ,k ) - z_nd(i ,j+1,k ) );
447 Real h_zeta_cc_yface_hi = 0.5 * dzi *
448 ( z_nd(i ,j+1,k+1) + z_nd(i+1,j+1,k+1)
449 -z_nd(i ,j+1,k ) - z_nd(i+1,j+1,k ) );
451 Real h_zeta_cc_yface_lo = 0.5 * dzi *
452 ( z_nd(i ,j ,k+1) + z_nd(i+1,j ,k+1)
453 -z_nd(i ,j ,k ) - z_nd(i+1,j ,k ) );
455 Real xflux_lo = new_drho_u(i ,j,k)*h_zeta_cc_xface_lo / mf_uy(i ,j,0);
456 Real xflux_hi = new_drho_u(i+1,j,k)*h_zeta_cc_xface_hi / mf_uy(i+1,j,0);
457 Real yflux_lo = new_drho_v(i,j ,k)*h_zeta_cc_yface_lo / mf_vx(i,j ,0);
458 Real yflux_hi = new_drho_v(i,j+1,k)*h_zeta_cc_yface_hi / mf_vx(i,j+1,0);
460 Real mfsq = mf_mx(i,j,0) * mf_my(i,j,0);
463 temp_rhs_arr(i,j,k,0) = ( xflux_hi - xflux_lo ) * dxi * mfsq +
464 ( yflux_hi - yflux_lo ) * dyi * mfsq;
465 temp_rhs_arr(i,j,k,1) = (( xflux_hi * (prim(i,j,k,0) + prim(i+1,j,k,0)) -
466 xflux_lo * (prim(i,j,k,0) + prim(i-1,j,k,0)) ) * dxi * mfsq+
467 ( yflux_hi * (prim(i,j,k,0) + prim(i,j+1,k,0)) -
468 yflux_lo * (prim(i,j,k,0) + prim(i,j-1,k,0)) ) * dyi * mfsq) * 0.5;
471 (flx_arr[0])(i,j,k,0) = xflux_lo;
472 (flx_arr[0])(i,j,k,1) = (flx_arr[0])(i ,j,k,0) * 0.5 * (prim(i,j,k,0) + prim(i-1,j,k,0));
474 (flx_arr[1])(i,j,k,0) = yflux_lo;
475 (flx_arr[1])(i,j,k,1) = (flx_arr[1])(i,j ,k,0) * 0.5 * (prim(i,j,k,0) + prim(i,j-1,k,0));
478 (flx_arr[0])(i+1,j,k,0) = xflux_hi;
479 (flx_arr[0])(i+1,j,k,1) = (flx_arr[0])(i+1,j,k,0) * 0.5 * (prim(i,j,k,0) + prim(i+1,j,k,0));
482 (flx_arr[1])(i,j+1,k,0) = yflux_hi;
483 (flx_arr[1])(i,j+1,k,1) = (flx_arr[1])(i,j+1,k,0) * 0.5 * (prim(i,j,k,0) + prim(i,j+1,k,0));
491 Box gbxo = mfi.nodaltilebox(2);
494 if (gbxo.smallEnd(2) == domlo.z) {
495 Box gbxo_lo = gbxo; gbxo_lo.setBig(2,gbxo.smallEnd(2));
496 gbxo_mid.setSmall(2,gbxo.smallEnd(2)+1);
497 ParallelFor(gbxo_lo, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
498 omega_arr(i,j,k) = 0.;
501 if (gbxo.bigEnd(2) == domhi.z+1) {
502 Box gbxo_hi = gbxo; gbxo_hi.setSmall(2,gbxo.bigEnd(2));
503 gbxo_mid.setBig(2,gbxo.bigEnd(2)-1);
504 ParallelFor(gbxo_hi, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
505 omega_arr(i,j,k) = old_drho_w(i,j,k);
508 ParallelFor(gbxo_mid, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
509 omega_arr(i,j,k) =
OmegaFromW(i,j,k,old_drho_w(i,j,k),
510 old_drho_u,old_drho_v,
511 mf_ux,mf_vy,z_nd,dxInv);
516 Box bx_shrunk_in_k = bx;
517 int klo = tbz.smallEnd(2);
518 int khi = tbz.bigEnd(2);
519 bx_shrunk_in_k.setSmall(2,klo+1);
520 bx_shrunk_in_k.setBig(2,khi-1);
525 Real halfg = std::abs(0.5 * grav_gpu[2]);
528 BL_PROFILE(
"fast_loop_on_shrunk_t");
530 ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
532 Real detJ_on_kface = 0.5 * (detJ(i,j,k) + detJ(i,j,k-1));
534 Real q = (l_use_moisture) ? 0.5 * (qt_arr(i,j,k) + qt_arr(i,j,k-1)) : 0.0;
536 Real coeff_P = coeffP_a(i,j,k) / (1.0 + q);
537 Real coeff_Q = coeffQ_a(i,j,k) / (1.0 + q);
544 Real R0_tmp = (-halfg * old_drho(i,j,k ) + coeff_P * old_drho_theta(i,j,k )) * detJ(i,j,k )
545 + (-halfg * old_drho(i,j,k-1) + coeff_Q * old_drho_theta(i,j,k-1)) * detJ(i,j,k-1);
548 Real R1_tmp = - halfg * ( slow_rhs_cons(i,j,k ,
Rho_comp ) * detJ(i,j,k ) +
549 slow_rhs_cons(i,j,k-1,
Rho_comp ) * detJ(i,j,k-1) )
550 + ( coeff_P * slow_rhs_cons(i,j,k ,
RhoTheta_comp) * detJ(i,j,k ) +
551 coeff_Q * slow_rhs_cons(i,j,k-1,
RhoTheta_comp) * detJ(i,j,k-1) );
553 Real Omega_kp1 = omega_arr(i,j,k+1);
554 Real Omega_k = omega_arr(i,j,k );
555 Real Omega_km1 = omega_arr(i,j,k-1);
558 R1_tmp += ( halfg ) *
559 ( beta_1 * dzi * (Omega_kp1 - Omega_km1) + temp_rhs_arr(i,j,k,
Rho_comp) + temp_rhs_arr(i,j,k-1,
Rho_comp));
563 coeff_P * ( beta_1 * dzi * (Omega_kp1*theta_t_hi - Omega_k*theta_t_mid) + temp_rhs_arr(i,j,k ,
RhoTheta_comp) ) +
564 coeff_Q * ( beta_1 * dzi * (Omega_k*theta_t_mid - Omega_km1*theta_t_lo) + temp_rhs_arr(i,j,k-1,
RhoTheta_comp) ) );
567 RHS_a(i,j,k) = detJ_on_kface * old_drho_w(i,j,k) + dtau * (
568 detJ_on_kface * (slow_rhs_rho_w(i,j,k)+zmom_src_arr(i,j,k)) + R0_tmp + dtau*beta_2*R1_tmp);
571 RHS_a(i,j,k) += detJ_on_kface *
OmegaFromW(i,j,k,0.,
572 new_drho_u,new_drho_v,
573 mf_ux,mf_vy,z_nd,dxInv);
580 auto const lo = lbound(bx);
581 auto const hi = ubound(bx);
584 BL_PROFILE(
"fast_rhs_b2d_loop_t");
586 ParallelFor(b2d, [=] AMREX_GPU_DEVICE (
int i,
int j,
int)
589 RHS_a(i,j,lo.z ) = dtau * (slow_rhs_rho_w(i,j,lo.z) + zmom_src_arr(i,j,lo.z));
590 RHS_a(i,j,hi.z+1) = dtau * (slow_rhs_rho_w(i,j,hi.z+1) + zmom_src_arr(i,j,hi.z+1));
593 soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
595 for (
int k = lo.z+1; k <= hi.z+1; k++) {
596 soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
599 cur_zmom(i,j,lo.z ) = stage_zmom(i,j,lo.z ) + soln_a(i,j,lo.z );
600 cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
602 for (
int k = hi.z; k >= lo.z; k--) {
603 soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) *soln_a(i,j,k+1);
607 for (
int j = lo.y; j <= hi.y; ++j) {
609 for (
int i = lo.x; i <= hi.x; ++i)
611 RHS_a(i,j,lo.z) = dtau * (slow_rhs_rho_w(i,j,lo.z) + zmom_src_arr(i,j,lo.z));
612 soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
616 for (
int i = lo.x; i <= hi.x; ++i)
618 RHS_a(i,j,hi.z+1) = dtau * (slow_rhs_rho_w(i,j,hi.z+1) + zmom_src_arr(i,j,hi.z+1));
619 soln_a(i,j,hi.z+1) = RHS_a(i,j,hi.z+1) * inv_coeffB_a(i,j,hi.z+1);
623 for (
int k = lo.z+1; k <= hi.z; ++k) {
624 for (
int j = lo.y; j <= hi.y; ++j) {
626 for (
int i = lo.x; i <= hi.x; ++i) {
627 soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
631 for (
int k = hi.z; k > lo.z; --k) {
632 for (
int j = lo.y; j <= hi.y; ++j) {
634 for (
int i = lo.x; i <= hi.x; ++i) {
635 soln_a(i,j,k) -= (coeffC_a(i,j,k) * inv_coeffB_a(i,j,k)) * soln_a(i,j,k+1);
639 if (hi.z == domhi.z) {
640 for (
int j = lo.y; j <= hi.y; ++j) {
642 for (
int i = lo.x; i <= hi.x; ++i) {
643 cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
650 ParallelFor(tbz, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
652 cur_zmom(i,j,k) = stage_zmom(i,j,k);
655 if (lo.z == domlo.z) {
656 tbz.setSmall(2,domlo.z+1);
658 if (hi.z == domhi.z) {
659 tbz.setBig(2,domhi.z);
661 ParallelFor(tbz, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
664 new_drho_u,new_drho_v,
665 mf_ux,mf_vy,z_nd,dxInv);
666 cur_zmom(i,j,k) += wpp;
673 BL_PROFILE(
"fast_rho_final_update");
674 ParallelFor(bx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept
676 Real zflux_lo = beta_2 * soln_a(i,j,k ) + beta_1 * omega_arr(i,j,k);
677 Real zflux_hi = beta_2 * soln_a(i,j,k+1) + beta_1 * omega_arr(i,j,k+1);
681 avg_zmom_arr(i,j,k) += facinv*zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0));
683 (flx_arr[2])(i,j,k,0) = zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0));
687 avg_zmom_arr(i,j,k+1) += facinv * zflux_hi / (mf_mx(i,j,0) * mf_my(i,j,0));
689 (flx_arr[2])(i,j,k+1,0) = zflux_hi / (mf_mx(i,j,0) * mf_my(i,j,0));
690 (flx_arr[2])(i,j,k+1,1) = (flx_arr[2])(i,j,k+1,0) * 0.5 * (prim(i,j,k) + prim(i,j,k+1));
694 Real fast_rhs_rho = -(temp_rhs_arr(i,j,k,0) + ( zflux_hi - zflux_lo ) * dzi) / detJ(i,j,k);
695 cur_cons(i,j,k,0) += dtau * (slow_rhs_cons(i,j,k,0) + fast_rhs_rho);
697 Real fast_rhs_rhotheta = -( temp_rhs_arr(i,j,k,1) + 0.5 *
698 ( zflux_hi * (prim(i,j,k) + prim(i,j,k+1)) -
699 zflux_lo * (prim(i,j,k) + prim(i,j,k-1)) ) * dzi ) / detJ(i,j,k);
701 cur_cons(i,j,k,1) += dtau * (slow_rhs_cons(i,j,k,1) + fast_rhs_rhotheta);
704 (flx_arr[2])(i,j,k,1) = (flx_arr[2])(i,j,k,0) * 0.5 * (prim(i,j,k) + prim(i,j,k-1));
715 int strt_comp_reflux = 0;
717 int num_comp_reflux = 1;
718 if (level < finest_level) {
719 fr_as_crse->CrseAdd(mfi,
720 {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
721 dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
724 fr_as_fine->FineAdd(mfi,
725 {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
726 dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
732 Gpu::streamSynchronize();
constexpr amrex::Real R_v
Definition: ERF_Constants.H:11
constexpr amrex::Real R_d
Definition: ERF_Constants.H:10
constexpr amrex::Real Gamma
Definition: ERF_Constants.H:19
@ v_x
Definition: ERF_DataStruct.H:22
@ u_y
Definition: ERF_DataStruct.H:23
@ v_y
Definition: ERF_DataStruct.H:23
@ m_y
Definition: ERF_DataStruct.H:23
@ u_x
Definition: ERF_DataStruct.H:22
@ m_x
Definition: ERF_DataStruct.H:22
#define PrimQ1_comp
Definition: ERF_IndexDefines.H:53
#define Rho_comp
Definition: ERF_IndexDefines.H:36
#define RhoTheta_comp
Definition: ERF_IndexDefines.H:37
#define PrimTheta_comp
Definition: ERF_IndexDefines.H:50
amrex::Real Real
Definition: ERF_ShocInterface.H:19
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real OmegaFromW(int &i, int &j, int &k, amrex::Real w, const amrex::Array4< const amrex::Real > &u_arr, const amrex::Array4< const amrex::Real > &v_arr, const amrex::Array4< const amrex::Real > &mf_u, const amrex::Array4< const amrex::Real > &mf_v, const amrex::Array4< const amrex::Real > &z_nd, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &dxInv)
Definition: ERF_TerrainMetrics.H:406
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_xi_AtIface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:109
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_zeta_AtIface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:96
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_zeta_AtJface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:136
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_eta_AtJface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:162
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real WFromOmega(int &i, int &j, int &k, amrex::Real omega, const amrex::Array4< const amrex::Real > &u_arr, const amrex::Array4< const amrex::Real > &v_arr, const amrex::Array4< const amrex::Real > &mf_u, const amrex::Array4< const amrex::Real > &mf_v, const amrex::Array4< const amrex::Real > &z_nd, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &dxInv)
Definition: ERF_TerrainMetrics.H:456
AMREX_FORCE_INLINE amrex::IntVect TileNoZ()
Definition: ERF_TileNoZ.H:11
@ gpy
Definition: ERF_IndexDefines.H:151
@ gpx
Definition: ERF_IndexDefines.H:150
@ ymom
Definition: ERF_IndexDefines.H:160
@ cons
Definition: ERF_IndexDefines.H:158
@ zmom
Definition: ERF_IndexDefines.H:161
@ xmom
Definition: ERF_IndexDefines.H:159
@ qt
Definition: ERF_Kessler.H:27
@ qv
Definition: ERF_Kessler.H:28