63 BL_PROFILE_REGION(
"erf_fast_rhs_T()");
65 const Box& domain = geom.Domain();
66 auto const domlo = lbound(domain);
67 auto const domhi = ubound(domain);
69 Real beta_1 = 0.5 * (1.0 - beta_s);
70 Real beta_2 = 0.5 * (1.0 + beta_s);
75 const Real* dx = geom.CellSize();
76 const GpuArray<Real, AMREX_SPACEDIM> dxInv = geom.InvCellSizeArray();
82 const auto& dm = S_stage_data[
IntVars::cons].DistributionMap();
84 MultiFab Delta_rho_u( convert(ba,IntVect(1,0,0)), dm, 1, 1);
85 MultiFab Delta_rho_v( convert(ba,IntVect(0,1,0)), dm, 1, 1);
86 MultiFab Delta_rho_w( convert(ba,IntVect(0,0,1)), dm, 1, IntVect(1,1,0));
87 MultiFab Delta_rho ( ba , dm, 1, 1);
88 MultiFab Delta_rho_theta( ba , dm, 1, 1);
90 MultiFab New_rho_u(convert(ba,IntVect(1,0,0)), dm, 1, 1);
91 MultiFab New_rho_v(convert(ba,IntVect(0,1,0)), dm, 1, 1);
93 MultiFab coeff_A_mf(fast_coeffs, make_alias, 0, 1);
94 MultiFab inv_coeff_B_mf(fast_coeffs, make_alias, 1, 1);
95 MultiFab coeff_C_mf(fast_coeffs, make_alias, 2, 1);
96 MultiFab coeff_P_mf(fast_coeffs, make_alias, 3, 1);
97 MultiFab coeff_Q_mf(fast_coeffs, make_alias, 4, 1);
101 const Array<Real,AMREX_SPACEDIM> grav{0.0, 0.0, -gravity};
102 const GpuArray<Real,AMREX_SPACEDIM> grav_gpu{grav[0], grav[1], grav[2]};
111 #pragma omp parallel if (Gpu::notInLaunchRegion())
113 for ( MFIter mfi(S_stage_data[
IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
115 const Array4<Real> & cur_cons = S_data[
IntVars::cons].array(mfi);
116 const Array4<const Real>& prev_cons = S_prev[
IntVars::cons].const_array(mfi);
117 const Array4<const Real>& stage_cons = S_stage_data[
IntVars::cons].const_array(mfi);
118 const Array4<Real>& lagged_delta_rt = S_scratch[
IntVars::cons].array(mfi);
120 const Array4<Real>& old_drho = Delta_rho.array(mfi);
121 const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
122 const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
123 const Array4<Real>& old_drho_w = Delta_rho_w.array(mfi);
124 const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
126 const Array4<const Real>& prev_xmom = S_prev[
IntVars::xmom].const_array(mfi);
127 const Array4<const Real>& prev_ymom = S_prev[
IntVars::ymom].const_array(mfi);
128 const Array4<const Real>& prev_zmom = S_prev[
IntVars::zmom].const_array(mfi);
130 const Array4<const Real>& stage_xmom = S_stage_data[
IntVars::xmom].const_array(mfi);
131 const Array4<const Real>& stage_ymom = S_stage_data[
IntVars::ymom].const_array(mfi);
132 const Array4<const Real>& stage_zmom = S_stage_data[
IntVars::zmom].const_array(mfi);
134 Box bx = mfi.validbox();
135 Box gbx = mfi.tilebox(); gbx.grow(1);
138 ParallelFor(gbx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
144 Box gtbx = mfi.nodaltilebox(0); gtbx.grow(IntVect(1,1,0));
145 Box gtby = mfi.nodaltilebox(1); gtby.grow(IntVect(1,1,0));
146 Box gtbz = mfi.nodaltilebox(2); gtbz.grow(IntVect(1,1,0));
148 const auto& bx_lo = lbound(bx);
149 const auto& bx_hi = ubound(bx);
151 ParallelFor(gtbx, gtby, gtbz,
152 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
153 old_drho_u(i,j,k) = prev_xmom(i,j,k) - stage_xmom(i,j,k);
154 if (k == bx_lo.z && k != domlo.z) {
155 old_drho_u(i,j,k-1) = old_drho_u(i,j,k);
156 }
else if (k == bx_hi.z) {
157 old_drho_u(i,j,k+1) = old_drho_u(i,j,k);
160 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
161 old_drho_v(i,j,k) = prev_ymom(i,j,k) - stage_ymom(i,j,k);
162 if (k == bx_lo.z && k != domlo.z) {
163 old_drho_v(i,j,k-1) = old_drho_v(i,j,k);
164 }
else if (k == bx_hi.z) {
165 old_drho_v(i,j,k+1) = old_drho_v(i,j,k);
168 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
169 old_drho_w(i,j,k) = prev_zmom(i,j,k) - stage_zmom(i,j,k);
172 const Array4<Real>& theta_extrap = extrap.array(mfi);
174 ParallelFor(gbx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
178 theta_extrap(i,j,k) = old_drho_theta(i,j,k);
180 theta_extrap(i,j,k) = old_drho_theta(i,j,k) + beta_d *
181 ( old_drho_theta(i,j,k) - lagged_delta_rt(i,j,k,
RhoTheta_comp) );
187 #pragma omp parallel if (Gpu::notInLaunchRegion())
189 for ( MFIter mfi(S_stage_data[
IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
192 Box gbx = mfi.tilebox(); gbx.grow(1);
193 const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
194 const Array4<Real>& lagged_delta_rt = S_scratch[
IntVars::cons].array(mfi);
195 ParallelFor(gbx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
196 lagged_delta_rt(i,j,k,
RhoTheta_comp) = old_drho_theta(i,j,k);
205 #pragma omp parallel if (Gpu::notInLaunchRegion())
207 for ( MFIter mfi(S_stage_data[
IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
209 Box bx = mfi.validbox();
210 Box tbx = mfi.nodaltilebox(0);
211 Box tby = mfi.nodaltilebox(1);
213 const Array4<const Real> & stage_xmom = S_stage_data[
IntVars::xmom].const_array(mfi);
214 const Array4<const Real> & stage_ymom = S_stage_data[
IntVars::ymom].const_array(mfi);
215 const Array4<const Real> & prim = S_stage_prim.const_array(mfi);
217 const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
218 const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
220 const Array4<const Real>& slow_rhs_rho_u = S_slow_rhs[
IntVars::xmom].const_array(mfi);
221 const Array4<const Real>& slow_rhs_rho_v = S_slow_rhs[
IntVars::ymom].const_array(mfi);
223 const Array4<Real>& new_drho_u = New_rho_u.array(mfi);
224 const Array4<Real>& new_drho_v = New_rho_v.array(mfi);
226 const Array4<Real>& cur_xmom = S_data[
IntVars::xmom].array(mfi);
227 const Array4<Real>& cur_ymom = S_data[
IntVars::ymom].array(mfi);
230 const Array4<Real>& avg_xmom = S_scratch[
IntVars::xmom].array(mfi);
231 const Array4<Real>& avg_ymom = S_scratch[
IntVars::ymom].array(mfi);
233 const Array4<const Real>& z_nd = z_phys_nd->const_array(mfi);
235 const Array4<const Real>& pi_stage_ca = pi_stage.const_array(mfi);
237 const Array4<Real>& theta_extrap = extrap.array(mfi);
240 const Array4<const Real>& mf_u = mapfac_u->const_array(mfi);
241 const Array4<const Real>& mf_v = mapfac_v->const_array(mfi);
253 BL_PROFILE(
"fast_rhs_xymom_T");
255 const auto& bx_lo = lbound(bx);
256 const auto& bx_hi = ubound(bx);
258 ParallelFor(tbx, tby,
259 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
264 Real gp_xi = (theta_extrap(i,j,k) - theta_extrap(i-1,j,k)) * dxi;
265 Real gp_zeta_on_iface = (k == 0) ?
266 0.5 * dzi * ( theta_extrap(i-1,j,k+1) + theta_extrap(i,j,k+1)
267 -theta_extrap(i-1,j,k ) - theta_extrap(i,j,k ) ) :
268 0.25 * dzi * ( theta_extrap(i-1,j,k+1) + theta_extrap(i,j,k+1)
269 -theta_extrap(i-1,j,k-1) - theta_extrap(i,j,k-1) );
270 Real gpx = gp_xi - (met_h_xi / met_h_zeta) * gp_zeta_on_iface;
273 if (l_use_moisture) {
279 Real pi_c = 0.5 * (pi_stage_ca(i-1,j,k,0) + pi_stage_ca(i ,j,k,0));
280 Real fast_rhs_rho_u = -
Gamma *
R_d * pi_c * gpx;
282 new_drho_u(i, j, k) = old_drho_u(i,j,k) + dtau * fast_rhs_rho_u
283 + dtau * slow_rhs_rho_u(i,j,k);
284 if (k == bx_lo.z && k != domlo.z) {
285 new_drho_u(i,j,k-1) = new_drho_u(i,j,k);
286 }
else if (k == bx_hi.z) {
287 new_drho_u(i,j,k+1) = new_drho_u(i,j,k);
290 avg_xmom(i,j,k) += facinv*new_drho_u(i,j,k);
292 cur_xmom(i,j,k) = stage_xmom(i,j,k) + new_drho_u(i,j,k);
294 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
299 Real gp_eta = (theta_extrap(i,j,k) -theta_extrap(i,j-1,k)) * dyi;
300 Real gp_zeta_on_jface = (k == 0) ?
301 0.5 * dzi * ( theta_extrap(i,j,k+1) + theta_extrap(i,j-1,k+1)
302 -theta_extrap(i,j,k ) - theta_extrap(i,j-1,k ) ) :
303 0.25 * dzi * ( theta_extrap(i,j,k+1) + theta_extrap(i,j-1,k+1)
304 -theta_extrap(i,j,k-1) - theta_extrap(i,j-1,k-1) );
305 Real gpy = gp_eta - (met_h_eta / met_h_zeta) * gp_zeta_on_jface;
308 if (l_use_moisture) {
314 Real pi_c = 0.5 * (pi_stage_ca(i,j-1,k,0) + pi_stage_ca(i,j ,k,0));
315 Real fast_rhs_rho_v = -
Gamma *
R_d * pi_c * gpy;
317 new_drho_v(i, j, k) = old_drho_v(i,j,k) + dtau * fast_rhs_rho_v
318 + dtau * slow_rhs_rho_v(i,j,k);
320 if (k == bx_lo.z && k != domlo.z) {
321 new_drho_v(i,j,k-1) = new_drho_v(i,j,k);
322 }
else if (k == bx_hi.z) {
323 new_drho_v(i,j,k+1) = new_drho_v(i,j,k);
326 avg_ymom(i,j,k) += facinv*new_drho_v(i,j,k);
328 cur_ymom(i,j,k) = stage_ymom(i,j,k) + new_drho_v(i,j,k);
336 #pragma omp parallel if (Gpu::notInLaunchRegion())
339 std::array<FArrayBox,AMREX_SPACEDIM> flux;
342 Box bx = mfi.tilebox();
343 Box tbz = surroundingNodes(bx,2);
345 Box vbx = mfi.validbox();
346 const auto& vbx_hi = ubound(vbx);
348 const Array4<const Real> & stage_zmom = S_stage_data[
IntVars::zmom].const_array(mfi);
349 const Array4<const Real> & prim = S_stage_prim.const_array(mfi);
351 const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
352 const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
353 const Array4<Real>& old_drho_w = Delta_rho_w.array(mfi);
354 const Array4<Real>& old_drho = Delta_rho.array(mfi);
355 const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
357 const Array4<const Real>& slow_rhs_cons = S_slow_rhs[
IntVars::cons].const_array(mfi);
358 const Array4<const Real>& slow_rhs_rho_w = S_slow_rhs[
IntVars::zmom].const_array(mfi);
360 const Array4<Real>& new_drho_u = New_rho_u.array(mfi);
361 const Array4<Real>& new_drho_v = New_rho_v.array(mfi);
363 const Array4<Real>& cur_cons = S_data[
IntVars::cons].array(mfi);
364 const Array4<Real>& cur_zmom = S_data[
IntVars::zmom].array(mfi);
367 const Array4<Real>& avg_zmom = S_scratch[
IntVars::zmom].array(mfi);
369 const Array4<const Real>& z_nd = z_phys_nd->const_array(mfi);
370 const Array4<const Real>& detJ = detJ_cc->const_array(mfi);
372 const Array4< Real>& omega_arr = Omega.array(mfi);
375 const Array4<const Real>& mf_m = mapfac_m->const_array(mfi);
376 const Array4<const Real>& mf_u = mapfac_u->const_array(mfi);
377 const Array4<const Real>& mf_v = mapfac_v->const_array(mfi);
385 FArrayBox temp_rhs_fab;
389 RHS_fab.resize (tbz,1,The_Async_Arena());
390 soln_fab.resize (tbz,1,The_Async_Arena());
391 temp_rhs_fab.resize(tbz,2,The_Async_Arena());
393 auto const& RHS_a = RHS_fab.array();
394 auto const& soln_a = soln_fab.array();
395 auto const& temp_rhs_arr = temp_rhs_fab.array();
397 auto const& coeffA_a = coeff_A_mf.array(mfi);
398 auto const& inv_coeffB_a = inv_coeff_B_mf.array(mfi);
399 auto const& coeffC_a = coeff_C_mf.array(mfi);
400 auto const& coeffP_a = coeff_P_mf.array(mfi);
401 auto const& coeffQ_a = coeff_Q_mf.array(mfi);
406 for (
int dir = 0; dir < AMREX_SPACEDIM; ++dir) {
407 flux[dir].resize(surroundingNodes(bx,dir),2);
408 flux[dir].setVal<RunOn::Device>(0.);
410 const GpuArray<const Array4<Real>, AMREX_SPACEDIM>
411 flx_arr{{AMREX_D_DECL(flux[0].array(), flux[1].array(), flux[2].array())}};
415 BL_PROFILE(
"fast_T_making_rho_rhs");
416 ParallelFor(bx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
417 Real h_zeta_cc_xface_hi = 0.5 * dzi *
418 ( z_nd(i+1,j ,k+1) + z_nd(i+1,j+1,k+1)
419 -z_nd(i+1,j ,k ) - z_nd(i+1,j+1,k ) );
421 Real h_zeta_cc_xface_lo = 0.5 * dzi *
422 ( z_nd(i ,j ,k+1) + z_nd(i ,j+1,k+1)
423 -z_nd(i ,j ,k ) - z_nd(i ,j+1,k ) );
425 Real h_zeta_cc_yface_hi = 0.5 * dzi *
426 ( z_nd(i ,j+1,k+1) + z_nd(i+1,j+1,k+1)
427 -z_nd(i ,j+1,k ) - z_nd(i+1,j+1,k ) );
429 Real h_zeta_cc_yface_lo = 0.5 * dzi *
430 ( z_nd(i ,j ,k+1) + z_nd(i+1,j ,k+1)
431 -z_nd(i ,j ,k ) - z_nd(i+1,j ,k ) );
433 Real xflux_lo = new_drho_u(i ,j,k)*h_zeta_cc_xface_lo / mf_u(i ,j,0);
434 Real xflux_hi = new_drho_u(i+1,j,k)*h_zeta_cc_xface_hi / mf_u(i+1,j,0);
435 Real yflux_lo = new_drho_v(i,j ,k)*h_zeta_cc_yface_lo / mf_v(i,j ,0);
436 Real yflux_hi = new_drho_v(i,j+1,k)*h_zeta_cc_yface_hi / mf_v(i,j+1,0);
438 Real mfsq = mf_m(i,j,0) * mf_m(i,j,0);
441 temp_rhs_arr(i,j,k,0) = ( xflux_hi - xflux_lo ) * dxi * mfsq +
442 ( yflux_hi - yflux_lo ) * dyi * mfsq;
443 temp_rhs_arr(i,j,k,1) = (( xflux_hi * (prim(i,j,k,0) + prim(i+1,j,k,0)) -
444 xflux_lo * (prim(i,j,k,0) + prim(i-1,j,k,0)) ) * dxi * mfsq+
445 ( yflux_hi * (prim(i,j,k,0) + prim(i,j+1,k,0)) -
446 yflux_lo * (prim(i,j,k,0) + prim(i,j-1,k,0)) ) * dyi * mfsq) * 0.5;
448 (flx_arr[0])(i,j,k,0) = xflux_lo;
449 (flx_arr[0])(i,j,k,1) = (flx_arr[0])(i ,j,k,0) * 0.5 * (prim(i,j,k,0) + prim(i-1,j,k,0));
451 (flx_arr[1])(i,j,k,0) = yflux_lo;
452 (flx_arr[1])(i,j,k,1) = (flx_arr[0])(i,j ,k,0) * 0.5 * (prim(i,j,k,0) + prim(i,j-1,k,0));
455 (flx_arr[0])(i+1,j,k,0) = xflux_hi;
456 (flx_arr[0])(i+1,j,k,1) = (flx_arr[0])(i+1,j,k,0) * 0.5 * (prim(i,j,k,0) + prim(i+1,j,k,0));
459 (flx_arr[1])(i,j+1,k,0) = yflux_hi;
460 (flx_arr[1])(i,j+1,k,1) = (flx_arr[1])(i,j+1,k,0) * 0.5 * (prim(i,j,k,0) + prim(i,j+1,k,0));
468 Box gbxo = mfi.nodaltilebox(2);
471 if (gbxo.smallEnd(2) == domlo.z) {
472 Box gbxo_lo = gbxo; gbxo_lo.setBig(2,gbxo.smallEnd(2));
473 gbxo_mid.setSmall(2,gbxo.smallEnd(2)+1);
474 ParallelFor(gbxo_lo, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
475 omega_arr(i,j,k) = 0.;
478 if (gbxo.bigEnd(2) == domhi.z+1) {
479 Box gbxo_hi = gbxo; gbxo_hi.setSmall(2,gbxo.bigEnd(2));
480 gbxo_mid.setBig(2,gbxo.bigEnd(2)-1);
481 ParallelFor(gbxo_hi, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
482 omega_arr(i,j,k) = old_drho_w(i,j,k);
485 ParallelFor(gbxo_mid, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
486 omega_arr(i,j,k) =
OmegaFromW(i,j,k,old_drho_w(i,j,k),old_drho_u,old_drho_v,z_nd,dxInv);
491 Box bx_shrunk_in_k = bx;
492 int klo = tbz.smallEnd(2);
493 int khi = tbz.bigEnd(2);
494 bx_shrunk_in_k.setSmall(2,klo+1);
495 bx_shrunk_in_k.setBig(2,khi-1);
500 Real halfg = std::abs(0.5 * grav_gpu[2]);
503 BL_PROFILE(
"fast_loop_on_shrunk_t");
505 ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
507 Real detJ_on_kface = 0.5 * (detJ(i,j,k) + detJ(i,j,k-1));
509 Real coeff_P = coeffP_a(i,j,k);
510 Real coeff_Q = coeffQ_a(i,j,k);
512 if (l_use_moisture) {
515 coeff_P /= (1.0 + q);
516 coeff_Q /= (1.0 + q);
524 Real R0_tmp = (-halfg * old_drho(i,j,k ) + coeff_P * old_drho_theta(i,j,k )) * detJ(i,j,k )
525 + (-halfg * old_drho(i,j,k-1) + coeff_Q * old_drho_theta(i,j,k-1)) * detJ(i,j,k-1);
528 Real R1_tmp = - halfg * ( slow_rhs_cons(i,j,k ,
Rho_comp ) * detJ(i,j,k ) +
529 slow_rhs_cons(i,j,k-1,
Rho_comp ) * detJ(i,j,k-1) )
530 + ( coeff_P * slow_rhs_cons(i,j,k ,
RhoTheta_comp) * detJ(i,j,k ) +
531 coeff_Q * slow_rhs_cons(i,j,k-1,
RhoTheta_comp) * detJ(i,j,k-1) );
533 Real Omega_kp1 = omega_arr(i,j,k+1);
534 Real Omega_k = omega_arr(i,j,k );
535 Real Omega_km1 = omega_arr(i,j,k-1);
538 R1_tmp += ( halfg ) *
539 ( beta_1 * dzi * (Omega_kp1 - Omega_km1) + temp_rhs_arr(i,j,k,
Rho_comp) + temp_rhs_arr(i,j,k-1,
Rho_comp));
543 coeff_P * ( beta_1 * dzi * (Omega_kp1*theta_t_hi - Omega_k*theta_t_mid) + temp_rhs_arr(i,j,k ,
RhoTheta_comp) ) +
544 coeff_Q * ( beta_1 * dzi * (Omega_k*theta_t_mid - Omega_km1*theta_t_lo) + temp_rhs_arr(i,j,k-1,
RhoTheta_comp) ) );
547 RHS_a(i,j,k) = detJ_on_kface * old_drho_w(i,j,k) + dtau * (
548 detJ_on_kface * slow_rhs_rho_w(i,j,k) + R0_tmp + dtau*beta_2*R1_tmp);
551 RHS_a(i,j,k) += detJ_on_kface *
OmegaFromW(i,j,k,0.,new_drho_u,new_drho_v,z_nd,dxInv);
558 auto const lo = lbound(bx);
559 auto const hi = ubound(bx);
562 BL_PROFILE(
"fast_rhs_b2d_loop_t");
564 ParallelFor(b2d, [=] AMREX_GPU_DEVICE (
int i,
int j,
int)
567 RHS_a(i,j,lo.z ) = dtau * slow_rhs_rho_w(i,j,lo.z);
568 RHS_a(i,j,hi.z+1) = dtau * slow_rhs_rho_w(i,j,hi.z+1);
571 soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
573 for (
int k = lo.z+1; k <= hi.z+1; k++) {
574 soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
577 cur_zmom(i,j,lo.z ) = stage_zmom(i,j,lo.z ) + soln_a(i,j,lo.z );
578 cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
580 for (
int k = hi.z; k >= lo.z; k--) {
581 soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) *soln_a(i,j,k+1);
585 for (
int j = lo.y; j <= hi.y; ++j) {
587 for (
int i = lo.x; i <= hi.x; ++i)
589 RHS_a(i,j,lo.z) = dtau * slow_rhs_rho_w(i,j,lo.z);
590 soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
594 for (
int i = lo.x; i <= hi.x; ++i)
596 RHS_a(i,j,hi.z+1) = dtau * slow_rhs_rho_w(i,j,hi.z+1);
597 soln_a(i,j,hi.z+1) = RHS_a(i,j,hi.z+1) * inv_coeffB_a(i,j,hi.z+1);
601 for (
int k = lo.z+1; k <= hi.z; ++k) {
602 for (
int j = lo.y; j <= hi.y; ++j) {
604 for (
int i = lo.x; i <= hi.x; ++i) {
605 soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
609 for (
int k = hi.z; k > lo.z; --k) {
610 for (
int j = lo.y; j <= hi.y; ++j) {
612 for (
int i = lo.x; i <= hi.x; ++i) {
613 soln_a(i,j,k) -= (coeffC_a(i,j,k) * inv_coeffB_a(i,j,k)) * soln_a(i,j,k+1);
617 if (hi.z == domhi.z) {
618 for (
int j = lo.y; j <= hi.y; ++j) {
620 for (
int i = lo.x; i <= hi.x; ++i) {
621 cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
628 ParallelFor(tbz, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
630 cur_zmom(i,j,k) = stage_zmom(i,j,k);
633 if (lo.z == domlo.z) {
634 tbz.setSmall(2,domlo.z+1);
636 if (hi.z == domhi.z) {
637 tbz.setBig(2,domhi.z);
639 ParallelFor(tbz, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
641 Real wpp =
WFromOmega(i,j,k,soln_a(i,j,k),new_drho_u,new_drho_v,z_nd,dxInv);
642 cur_zmom(i,j,k) += wpp;
650 BL_PROFILE(
"fast_rho_final_update");
651 ParallelFor(bx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept
653 Real zflux_lo = beta_2 * soln_a(i,j,k ) + beta_1 * omega_arr(i,j,k);
654 Real zflux_hi = beta_2 * soln_a(i,j,k+1) + beta_1 * omega_arr(i,j,k+1);
658 avg_zmom(i,j,k) += facinv*zflux_lo / (mf_m(i,j,0) * mf_m(i,j,0));
659 (flx_arr[2])(i,j,k,0) = zflux_lo / (mf_m(i,j,0) * mf_m(i,j,0));
662 avg_zmom(i,j,k+1) += facinv * zflux_hi / (mf_m(i,j,0) * mf_m(i,j,0));
663 (flx_arr[2])(i,j,k+1,0) = zflux_hi / (mf_m(i,j,0) * mf_m(i,j,0));
664 (flx_arr[2])(i,j,k+1,1) = (flx_arr[2])(i,j,k+1,0) * 0.5 * (prim(i,j,k) + prim(i,j,k+1));
667 Real fast_rhs_rho = -(temp_rhs_arr(i,j,k,0) + ( zflux_hi - zflux_lo ) * dzi) / detJ(i,j,k);
668 cur_cons(i,j,k,0) += dtau * (slow_rhs_cons(i,j,k,0) + fast_rhs_rho);
670 Real fast_rhs_rhotheta = -( temp_rhs_arr(i,j,k,1) + 0.5 *
671 ( zflux_hi * (prim(i,j,k) + prim(i,j,k+1)) -
672 zflux_lo * (prim(i,j,k) + prim(i,j,k-1)) ) * dzi ) / detJ(i,j,k);
674 cur_cons(i,j,k,1) += dtau * (slow_rhs_cons(i,j,k,1) + fast_rhs_rhotheta);
676 (flx_arr[2])(i,j,k,1) = (flx_arr[2])(i,j,k,0) * 0.5 * (prim(i,j,k) + prim(i,j,k-1));
681 if (l_reflux && nrk == 2) {
682 int strt_comp_reflux = 0;
684 int num_comp_reflux = 1;
685 if (level < finest_level) {
686 fr_as_crse->CrseAdd(mfi,
687 {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
688 dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
691 fr_as_fine->FineAdd(mfi,
692 {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
693 dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
699 Gpu::streamSynchronize();
constexpr amrex::Real R_d
Definition: ERF_Constants.H:10
constexpr amrex::Real Gamma
Definition: ERF_Constants.H:19
#define PrimQ1_comp
Definition: ERF_IndexDefines.H:53
#define PrimQ2_comp
Definition: ERF_IndexDefines.H:54
#define Rho_comp
Definition: ERF_IndexDefines.H:36
#define RhoTheta_comp
Definition: ERF_IndexDefines.H:37
#define PrimTheta_comp
Definition: ERF_IndexDefines.H:50
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_xi_AtIface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:101
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real WFromOmega(int i, int j, int k, amrex::Real omega, amrex::Real u, amrex::Real v, const amrex::Array4< const amrex::Real > &z_nd, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &dxInv)
Definition: ERF_TerrainMetrics.H:407
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real OmegaFromW(int i, int j, int k, amrex::Real w, const amrex::Array4< const amrex::Real > u_arr, const amrex::Array4< const amrex::Real > v_arr, const amrex::Array4< const amrex::Real > z_nd, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &dxInv)
Definition: ERF_TerrainMetrics.H:374
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_zeta_AtIface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:87
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_zeta_AtJface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:130
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_eta_AtJface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:158
AMREX_FORCE_INLINE amrex::IntVect TileNoZ()
Definition: ERF_TileNoZ.H:11
@ ymom
Definition: ERF_IndexDefines.H:141
@ cons
Definition: ERF_IndexDefines.H:139
@ zmom
Definition: ERF_IndexDefines.H:142
@ xmom
Definition: ERF_IndexDefines.H:140