63 BL_PROFILE_REGION(
"erf_fast_rhs_N()");
65 Real beta_1 = 0.5 * (1.0 - beta_s);
66 Real beta_2 = 0.5 * (1.0 + beta_s);
71 const Real* dx = geom.CellSize();
72 const GpuArray<Real, AMREX_SPACEDIM> dxInv = geom.InvCellSizeArray();
79 const auto& dm = S_stage_data[
IntVars::cons].DistributionMap();
81 MultiFab Delta_rho_theta( ba , dm, 1, 1);
82 MultiFab Delta_rho_w (convert(ba,IntVect(0,0,1)), dm, 1, IntVect(1,1,0));
84 MultiFab coeff_A_mf(fast_coeffs, make_alias, 0, 1);
85 MultiFab inv_coeff_B_mf(fast_coeffs, make_alias, 1, 1);
86 MultiFab coeff_C_mf(fast_coeffs, make_alias, 2, 1);
87 MultiFab coeff_P_mf(fast_coeffs, make_alias, 3, 1);
88 MultiFab coeff_Q_mf(fast_coeffs, make_alias, 4, 1);
92 const Array<Real,AMREX_SPACEDIM> grav{0.0, 0.0, -gravity};
93 const GpuArray<Real,AMREX_SPACEDIM> grav_gpu{grav[0], grav[1], grav[2]};
106 AMREX_ALWAYS_ASSERT(nrk > 0 || step == 0);
113 #pragma omp parallel if (Gpu::notInLaunchRegion())
115 for ( MFIter mfi(S_stage_data[
IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
117 const Array4<const Real>& prev_cons = S_prev[
IntVars::cons].const_array(mfi);
118 const Array4<const Real>& prev_zmom = S_prev[
IntVars::zmom].const_array(mfi);
120 const Array4<const Real>& stage_cons = S_stage_data[
IntVars::cons].const_array(mfi);
121 const Array4<const Real>& stage_zmom = S_stage_data[
IntVars::zmom].const_array(mfi);
123 const Array4<Real>& prev_drho_w = Delta_rho_w.array(mfi);
124 const Array4<Real>& prev_drho_theta = Delta_rho_theta.array(mfi);
125 const Array4<Real>& lagged_delta_rt = S_scratch[
IntVars::cons].array(mfi);
126 const Array4<Real>& theta_extrap = extrap.array(mfi);
128 Box gbx = mfi.growntilebox(1);
129 ParallelFor(gbx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept
134 theta_extrap(i,j,k) = prev_drho_theta(i,j,k);
136 theta_extrap(i,j,k) = prev_drho_theta(i,j,k) + beta_d *
137 ( prev_drho_theta(i,j,k) - lagged_delta_rt(i,j,k,
RhoTheta_comp) );
142 lagged_delta_rt(i,j,k,
RhoTheta_comp) = prev_drho_theta(i,j,k);
147 Box gtbz = mfi.nodaltilebox(2);
148 gtbz.grow(IntVect(1,1,0));
149 ParallelFor(gtbz, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
150 prev_drho_w(i,j,k) = prev_zmom(i,j,k) - stage_zmom(i,j,k);
159 #pragma omp parallel if (Gpu::notInLaunchRegion())
161 for ( MFIter mfi(S_stage_data[
IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
163 Box tbx = mfi.nodaltilebox(0);
164 Box tby = mfi.nodaltilebox(1);
166 const Array4<const Real> & stage_xmom = S_stage_data[
IntVars::xmom].const_array(mfi);
167 const Array4<const Real> & stage_ymom = S_stage_data[
IntVars::ymom].const_array(mfi);
168 const Array4<const Real> & prim = S_stage_prim.const_array(mfi);
170 const Array4<const Real>& slow_rhs_rho_u = S_slow_rhs[
IntVars::xmom].const_array(mfi);
171 const Array4<const Real>& slow_rhs_rho_v = S_slow_rhs[
IntVars::ymom].const_array(mfi);
173 const Array4<Real>& temp_cur_xmom_arr = temp_cur_xmom.array(mfi);
174 const Array4<Real>& temp_cur_ymom_arr = temp_cur_ymom.array(mfi);
176 const Array4<const Real>& prev_xmom = S_prev[
IntVars::xmom].const_array(mfi);
177 const Array4<const Real>& prev_ymom = S_prev[
IntVars::ymom].const_array(mfi);
180 const Array4< Real>& avg_xmom = S_scratch[
IntVars::xmom].array(mfi);
181 const Array4< Real>& avg_ymom = S_scratch[
IntVars::ymom].array(mfi);
183 const Array4<const Real>& pi_stage_ca = pi_stage.const_array(mfi);
185 const Array4<Real>& theta_extrap = extrap.array(mfi);
188 const Array4<const Real>& mf_u = mapfac_u->const_array(mfi);
189 const Array4<const Real>& mf_v = mapfac_v->const_array(mfi);
194 if (nrk == 0 and step == 0) {
195 ParallelFor(tbx, tby,
196 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
198 Real new_drho_u = dtau * slow_rhs_rho_u(i,j,k);
199 avg_xmom(i,j,k) += facinv*new_drho_u;
200 temp_cur_xmom_arr(i,j,k) = stage_xmom(i,j,k) + new_drho_u;
202 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
204 Real new_drho_v = dtau * slow_rhs_rho_v(i,j,k);
205 avg_ymom(i,j,k) += facinv*new_drho_v;
206 temp_cur_ymom_arr(i,j,k) = stage_ymom(i,j,k) + new_drho_v;
209 ParallelFor(tbx, tby,
210 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
213 Real gpx = (theta_extrap(i,j,k) - theta_extrap(i-1,j,k))*dxi;
216 if (l_use_moisture) {
222 Real pi_c = 0.5 * (pi_stage_ca(i-1,j,k,0) + pi_stage_ca(i,j,k,0));
223 Real fast_rhs_rho_u = -
Gamma *
R_d * pi_c * gpx;
225 Real new_drho_u = prev_xmom(i,j,k) - stage_xmom(i,j,k)
226 + dtau * fast_rhs_rho_u + dtau * slow_rhs_rho_u(i,j,k);
228 avg_xmom(i,j,k) += facinv*new_drho_u;
230 temp_cur_xmom_arr(i,j,k) = stage_xmom(i,j,k) + new_drho_u;
232 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
235 Real gpy = (theta_extrap(i,j,k) - theta_extrap(i,j-1,k))*dyi;
238 if (l_use_moisture) {
244 Real pi_c = 0.5 * (pi_stage_ca(i,j-1,k,0) + pi_stage_ca(i,j,k,0));
245 Real fast_rhs_rho_v = -
Gamma *
R_d * pi_c * gpy;
247 Real new_drho_v = prev_ymom(i,j,k) - stage_ymom(i,j,k)
248 + dtau * fast_rhs_rho_v + dtau * slow_rhs_rho_v(i,j,k);
250 avg_ymom(i,j,k) += facinv*new_drho_v;
252 temp_cur_ymom_arr(i,j,k) = stage_ymom(i,j,k) + new_drho_v;
258 #pragma omp parallel if (Gpu::notInLaunchRegion())
261 std::array<FArrayBox,AMREX_SPACEDIM> flux;
264 Box bx = mfi.tilebox();
265 Box tbz = surroundingNodes(bx,2);
267 Box vbx = mfi.validbox();
268 const auto& vbx_hi = ubound(vbx);
270 const Array4<const Real>& stage_xmom = S_stage_data[
IntVars::xmom].const_array(mfi);
271 const Array4<const Real>& stage_ymom = S_stage_data[
IntVars::ymom].const_array(mfi);
272 const Array4<const Real>& stage_zmom = S_stage_data[
IntVars::zmom].const_array(mfi);
273 const Array4<const Real> & prim = S_stage_prim.const_array(mfi);
275 const Array4<const Real>& prev_drho_theta = Delta_rho_theta.array(mfi);
277 const Array4<const Real>& prev_cons = S_prev[
IntVars::cons].const_array(mfi);
278 const Array4<const Real>& stage_cons = S_stage_data[
IntVars::cons].const_array(mfi);
280 const Array4<const Real>& slow_rhs_cons = S_slow_rhs[
IntVars::cons].const_array(mfi);
281 const Array4<const Real>& slow_rhs_rho_w = S_slow_rhs[
IntVars::zmom].const_array(mfi);
283 const Array4<const Real>& prev_zmom = S_prev[
IntVars::zmom].const_array(mfi);
284 const Array4< Real>& cur_zmom = S_data[
IntVars::zmom].array(mfi);
286 const Array4<Real>& temp_cur_xmom_arr = temp_cur_xmom.array(mfi);
287 const Array4<Real>& temp_cur_ymom_arr = temp_cur_ymom.array(mfi);
291 const Array4< Real>& avg_zmom = S_scratch[
IntVars::zmom].array(mfi);
294 const Array4<const Real>& mf_m = mapfac_m->const_array(mfi);
295 const Array4<const Real>& mf_u = mapfac_u->const_array(mfi);
296 const Array4<const Real>& mf_v = mapfac_v->const_array(mfi);
299 RHS_fab.resize(tbz,1, The_Async_Arena());
302 soln_fab.resize(tbz,1, The_Async_Arena());
304 auto const& RHS_a = RHS_fab.array();
305 auto const& soln_a = soln_fab.array();
307 auto const& temp_rhs_arr = temp_rhs.array(mfi);
309 auto const& coeffA_a = coeff_A_mf.array(mfi);
310 auto const& inv_coeffB_a = inv_coeff_B_mf.array(mfi);
311 auto const& coeffC_a = coeff_C_mf.array(mfi);
312 auto const& coeffP_a = coeff_P_mf.array(mfi);
313 auto const& coeffQ_a = coeff_Q_mf.array(mfi);
318 for (
int dir = 0; dir < AMREX_SPACEDIM; ++dir) {
319 flux[dir].resize(surroundingNodes(bx,dir),2);
320 flux[dir].setVal<RunOn::Device>(0.);
322 const GpuArray<const Array4<Real>, AMREX_SPACEDIM>
323 flx_arr{{AMREX_D_DECL(flux[0].array(), flux[1].array(), flux[2].array())}};
326 ParallelFor(bx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
327 Real xflux_lo = (temp_cur_xmom_arr(i ,j,k) - stage_xmom(i ,j,k)) / mf_u(i ,j,0);
328 Real xflux_hi = (temp_cur_xmom_arr(i+1,j,k) - stage_xmom(i+1,j,k)) / mf_u(i+1,j,0);
329 Real yflux_lo = (temp_cur_ymom_arr(i,j ,k) - stage_ymom(i,j ,k)) / mf_v(i,j ,0);
330 Real yflux_hi = (temp_cur_ymom_arr(i,j+1,k) - stage_ymom(i,j+1,k)) / mf_v(i,j+1,0);
332 Real mfsq = mf_m(i,j,0) * mf_m(i,j,0);
334 temp_rhs_arr(i,j,k,
Rho_comp ) = ( xflux_hi - xflux_lo ) * dxi * mfsq
335 + ( yflux_hi - yflux_lo ) * dyi * mfsq;
336 temp_rhs_arr(i,j,k,
RhoTheta_comp) = (( xflux_hi * (prim(i,j,k,0) + prim(i+1,j,k,0)) -
337 xflux_lo * (prim(i,j,k,0) + prim(i-1,j,k,0)) ) * dxi * mfsq +
338 ( yflux_hi * (prim(i,j,k,0) + prim(i,j+1,k,0)) -
339 yflux_lo * (prim(i,j,k,0) + prim(i,j-1,k,0)) ) * dyi * mfsq) * 0.5;
341 (flx_arr[0])(i,j,k,0) = xflux_lo;
342 (flx_arr[0])(i,j,k,1) = (flx_arr[0])(i ,j,k,0) * 0.5 * (prim(i,j,k,0) + prim(i-1,j,k,0));
344 (flx_arr[1])(i,j,k,0) = yflux_lo;
345 (flx_arr[1])(i,j,k,1) = (flx_arr[0])(i,j ,k,0) * 0.5 * (prim(i,j,k,0) + prim(i,j-1,k,0));
348 (flx_arr[0])(i+1,j,k,0) = xflux_hi;
349 (flx_arr[0])(i+1,j,k,1) = (flx_arr[0])(i+1,j,k,0) * 0.5 * (prim(i,j,k,0) + prim(i+1,j,k,0));
352 (flx_arr[1])(i,j+1,k,0) = yflux_hi;
353 (flx_arr[1])(i,j+1,k,1) = (flx_arr[1])(i,j+1,k,0) * 0.5 * (prim(i,j,k,0) + prim(i,j+1,k,0));
357 Box bx_shrunk_in_k = bx;
358 int klo = tbz.smallEnd(2);
359 int khi = tbz.bigEnd(2);
360 bx_shrunk_in_k.setSmall(2,klo+1);
361 bx_shrunk_in_k.setBig(2,khi-1);
366 Real halfg = std::abs(0.5 * grav_gpu[2]);
372 ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
374 Real coeff_P = coeffP_a(i,j,k);
375 Real coeff_Q = coeffQ_a(i,j,k);
377 if (l_use_moisture) {
380 coeff_P /= (1.0 + q);
381 coeff_Q /= (1.0 + q);
388 Real Omega_kp1 = prev_zmom(i,j,k+1) - stage_zmom(i,j,k+1);
389 Real Omega_k = prev_zmom(i,j,k ) - stage_zmom(i,j,k );
390 Real Omega_km1 = prev_zmom(i,j,k-1) - stage_zmom(i,j,k-1);
394 Real old_drho_km1 = prev_cons(i,j,k-1,
Rho_comp) - stage_cons(i,j,k-1,
Rho_comp);
395 Real R0_tmp = coeff_P * prev_drho_theta(i,j,k) + coeff_Q * prev_drho_theta(i,j,k-1)
396 - halfg * ( old_drho_k + old_drho_km1 );
399 Real R1_tmp = halfg * (-slow_rhs_cons(i,j,k ,
Rho_comp)
401 +temp_rhs_arr(i,j,k,0) + temp_rhs_arr(i,j,k-1) )
406 R1_tmp += beta_1 * dzi * ( (Omega_kp1 - Omega_km1) * halfg
407 -(Omega_kp1*theta_t_hi - Omega_k *theta_t_mid) * coeff_P
408 -(Omega_k *theta_t_mid - Omega_km1*theta_t_lo ) * coeff_Q );
411 RHS_a(i,j,k) = Omega_k + dtau * (slow_rhs_rho_w(i,j,k) + R0_tmp + dtau * beta_2 * R1_tmp);
418 auto const lo = lbound(bx);
419 auto const hi = ubound(bx);
421 ParallelFor(b2d, [=] AMREX_GPU_DEVICE (
int i,
int j,
int)
424 RHS_a (i,j,lo.z) = prev_zmom(i,j,lo.z) - stage_zmom(i,j,lo.z)
425 + dtau * slow_rhs_rho_w(i,j,lo.z);
428 RHS_a (i,j,hi.z+1) = prev_zmom(i,j,hi.z+1) - stage_zmom(i,j,hi.z+1)
429 + dtau * slow_rhs_rho_w(i,j,hi.z+1);
433 if (l_implicit_substepping) {
435 ParallelFor(b2d, [=] AMREX_GPU_DEVICE (
int i,
int j,
int)
438 soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
439 cur_zmom(i,j,lo.z) = stage_zmom(i,j,lo.z) + soln_a(i,j,lo.z);
441 for (
int k = lo.z+1; k <= hi.z+1; k++) {
442 soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
445 cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
447 for (
int k = hi.z; k >= lo.z; k--) {
448 soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) *soln_a(i,j,k+1);
449 cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k);
455 ParallelFor(b2d, [=] AMREX_GPU_DEVICE (
int i,
int j,
int)
457 for (
int k = lo.z; k <= hi.z+1; k++) {
458 soln_a(i,j,k) = RHS_a(i,j,k);
459 cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k);
464 if (l_implicit_substepping) {
466 for (
int j = lo.y; j <= hi.y; ++j) {
468 for (
int i = lo.x; i <= hi.x; ++i) {
469 soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
472 for (
int k = lo.z+1; k <= hi.z+1; ++k) {
473 for (
int j = lo.y; j <= hi.y; ++j) {
475 for (
int i = lo.x; i <= hi.x; ++i) {
476 soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
480 for (
int j = lo.y; j <= hi.y; ++j) {
482 for (
int i = lo.x; i <= hi.x; ++i) {
483 cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
486 for (
int k = hi.z; k >= lo.z; --k) {
487 for (
int j = lo.y; j <= hi.y; ++j) {
489 for (
int i = lo.x; i <= hi.x; ++i) {
490 soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) * soln_a(i,j,k+1);
491 cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k);
497 for (
int k = lo.z; k <= hi.z+1; ++k) {
498 for (
int j = lo.y; j <= hi.y; ++j) {
500 for (
int i = lo.x; i <= hi.x; ++i) {
502 soln_a(i,j,k) = RHS_a(i,j,k);
504 cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k);
515 const Array4<Real>& prev_drho_w = Delta_rho_w.array(mfi);
516 ParallelFor(bx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept
518 Real zflux_lo = beta_2 * soln_a(i,j,k ) + beta_1 * prev_drho_w(i,j,k );
519 Real zflux_hi = beta_2 * soln_a(i,j,k+1) + beta_1 * prev_drho_w(i,j,k+1);
521 avg_zmom(i,j,k) += facinv*zflux_lo / (mf_m(i,j,0) * mf_m(i,j,0));
522 (flx_arr[2])(i,j,k,0) = zflux_lo / (mf_m(i,j,0) * mf_m(i,j,0));
523 (flx_arr[2])(i,j,k,1) = (flx_arr[2])(i,j,k,0) * 0.5 * (prim(i,j,k) + prim(i,j,k-1));
526 avg_zmom(i,j,k+1) += facinv * zflux_hi / (mf_m(i,j,0) * mf_m(i,j,0));
527 (flx_arr[2])(i,j,k+1,0) = zflux_hi / (mf_m(i,j,0) * mf_m(i,j,0));
528 (flx_arr[2])(i,j,k+1,1) = (flx_arr[2])(i,j,k+1,0) * 0.5 * (prim(i,j,k) + prim(i,j,k+1));
531 temp_rhs_arr(i,j,k,
Rho_comp ) += dzi * ( zflux_hi - zflux_lo );
532 temp_rhs_arr(i,j,k,
RhoTheta_comp) += 0.5 * dzi * ( zflux_hi * (prim(i,j,k) + prim(i,j,k+1))
533 - zflux_lo * (prim(i,j,k) + prim(i,j,k-1)) );
537 if (l_reflux && nrk == 2) {
538 int strt_comp_reflux = 0;
540 int num_comp_reflux = 1;
541 if (level < finest_level) {
542 fr_as_crse->CrseAdd(mfi,
543 {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
544 dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
547 fr_as_fine->FineAdd(mfi,
548 {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
549 dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
555 Gpu::streamSynchronize();
562 #pragma omp parallel if (Gpu::notInLaunchRegion())
564 for ( MFIter mfi(S_stage_data[
IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
566 const Box& bx = mfi.tilebox();
568 const Array4< Real>& cur_cons = S_data[
IntVars::cons].array(mfi);
569 const Array4<const Real>& prev_cons = S_prev[
IntVars::cons].const_array(mfi);
570 auto const& temp_rhs_arr = temp_rhs.const_array(mfi);
571 auto const& slow_rhs_cons = S_slow_rhs[
IntVars::cons].const_array(mfi);
574 ParallelFor(bx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept
582 ParallelFor(bx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept
592 const Array4<Real>& cur_xmom = S_data[
IntVars::xmom].array(mfi);
593 const Array4<Real>& cur_ymom = S_data[
IntVars::ymom].array(mfi);
595 const Array4<Real const>& temp_cur_xmom_arr = temp_cur_xmom.const_array(mfi);
596 const Array4<Real const>& temp_cur_ymom_arr = temp_cur_ymom.const_array(mfi);
598 Box tbx = surroundingNodes(bx,0);
599 Box tby = surroundingNodes(bx,1);
601 ParallelFor(tbx, tby,
602 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
604 cur_xmom(i,j,k) = temp_cur_xmom_arr(i,j,k);
606 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
608 cur_ymom(i,j,k) = temp_cur_ymom_arr(i,j,k);
constexpr amrex::Real R_d
Definition: ERF_Constants.H:10
constexpr amrex::Real Gamma
Definition: ERF_Constants.H:19
#define PrimQ1_comp
Definition: ERF_IndexDefines.H:53
#define PrimQ2_comp
Definition: ERF_IndexDefines.H:54
#define Rho_comp
Definition: ERF_IndexDefines.H:36
#define RhoTheta_comp
Definition: ERF_IndexDefines.H:37
#define PrimTheta_comp
Definition: ERF_IndexDefines.H:50
AMREX_FORCE_INLINE amrex::IntVect TileNoZ()
Definition: ERF_TileNoZ.H:11
@ ymom
Definition: ERF_IndexDefines.H:141
@ cons
Definition: ERF_IndexDefines.H:139
@ zmom
Definition: ERF_IndexDefines.H:142
@ xmom
Definition: ERF_IndexDefines.H:140