ERF
Energy Research and Forecasting: An Atmospheric Modeling Code
ERF_FastRhs_N.cpp File Reference
Include dependency graph for ERF_FastRhs_N.cpp:

Functions

void erf_fast_rhs_N (int step, int nrk, int level, int finest_level, Vector< MultiFab > &S_slow_rhs, const Vector< MultiFab > &S_prev, Vector< MultiFab > &S_stage_data, const MultiFab &S_stage_prim, const MultiFab &pi_stage, const MultiFab &fast_coeffs, Vector< MultiFab > &S_data, Vector< MultiFab > &S_scratch, const Geometry geom, const Real gravity, const Real dtau, const Real beta_s, const Real facinv, std::unique_ptr< MultiFab > &mapfac_m, std::unique_ptr< MultiFab > &mapfac_u, std::unique_ptr< MultiFab > &mapfac_v, YAFluxRegister *fr_as_crse, YAFluxRegister *fr_as_fine, bool l_use_moisture, bool l_reflux, bool l_implicit_substepping)
 

Function Documentation

◆ erf_fast_rhs_N()

void erf_fast_rhs_N ( int  step,
int  nrk,
int  level,
int  finest_level,
Vector< MultiFab > &  S_slow_rhs,
const Vector< MultiFab > &  S_prev,
Vector< MultiFab > &  S_stage_data,
const MultiFab &  S_stage_prim,
const MultiFab &  pi_stage,
const MultiFab &  fast_coeffs,
Vector< MultiFab > &  S_data,
Vector< MultiFab > &  S_scratch,
const Geometry  geom,
const Real  gravity,
const Real  dtau,
const Real  beta_s,
const Real  facinv,
std::unique_ptr< MultiFab > &  mapfac_m,
std::unique_ptr< MultiFab > &  mapfac_u,
std::unique_ptr< MultiFab > &  mapfac_v,
YAFluxRegister *  fr_as_crse,
YAFluxRegister *  fr_as_fine,
bool  l_use_moisture,
bool  l_reflux,
bool  l_implicit_substepping 
)

Function for computing the fast RHS with no terrain

Parameters
[in]stepwhich fast time step within each Runge-Kutta step
[in]nrkwhich Runge-Kutta step
[in]levellevel of resolution
[in]finest_levelfinest level of resolution
[in]S_slow_rhsslow RHS computed in erf_slow_rhs_pre
[in]S_previf step == 0, this is S_old, else the previous fast solution
[in]S_stage_datasolution at previous RK stage
[in]S_stage_primprimitive variables at previous RK stage
[in]pi_stageExner function at previous RK stage
[in]fast_coeffscoefficients for the tridiagonal solve used in the fast integrator
[out]S_datacurrent solution
[in]S_scratchscratch space
[in]geomcontainer for geometric information
[in]gravitymagnitude of gravity
[in]dtaufast time step
[in]beta_sCoefficient which determines how implicit vs explicit the solve is
[in]facinvinverse factor for time-averaging the momenta
[in]mapfac_mmap factor at cell centers
[in]mapfac_umap factor at x-faces
[in]mapfac_vmap factor at y-faces
[in,out]fr_as_crseYAFluxRegister at level l at level l / l+1 interface
[in,out]fr_as_fineYAFluxRegister at level l at level l-1 / l interface
[in]l_use_moisture
[in]l_refluxshould we add fluxes to the FluxRegisters?
[in]l_implicit_substepping
58 {
59  //
60  // NOTE: for step > 0, S_data and S_prev point to the same MultiFab data!!
61  //
62 
63  BL_PROFILE_REGION("erf_fast_rhs_N()");
64 
65  Real beta_1 = 0.5 * (1.0 - beta_s); // multiplies explicit terms
66  Real beta_2 = 0.5 * (1.0 + beta_s); // multiplies implicit terms
67 
68  // How much do we project forward the (rho theta) that is used in the horizontal momentum equations
69  Real beta_d = 0.1;
70 
71  const Real* dx = geom.CellSize();
72  const GpuArray<Real, AMREX_SPACEDIM> dxInv = geom.InvCellSizeArray();
73 
74  Real dxi = dxInv[0];
75  Real dyi = dxInv[1];
76  Real dzi = dxInv[2];
77 
78  const auto& ba = S_stage_data[IntVars::cons].boxArray();
79  const auto& dm = S_stage_data[IntVars::cons].DistributionMap();
80 
81  MultiFab Delta_rho_theta( ba , dm, 1, 1);
82  MultiFab Delta_rho_w (convert(ba,IntVect(0,0,1)), dm, 1, IntVect(1,1,0));
83 
84  MultiFab coeff_A_mf(fast_coeffs, make_alias, 0, 1);
85  MultiFab inv_coeff_B_mf(fast_coeffs, make_alias, 1, 1);
86  MultiFab coeff_C_mf(fast_coeffs, make_alias, 2, 1);
87  MultiFab coeff_P_mf(fast_coeffs, make_alias, 3, 1);
88  MultiFab coeff_Q_mf(fast_coeffs, make_alias, 4, 1);
89 
90  // *************************************************************************
91  // Set gravity as a vector
92  const Array<Real,AMREX_SPACEDIM> grav{0.0, 0.0, -gravity};
93  const GpuArray<Real,AMREX_SPACEDIM> grav_gpu{grav[0], grav[1], grav[2]};
94 
95  // This will hold theta extrapolated forward in time
96  MultiFab extrap(S_data[IntVars::cons].boxArray(),S_data[IntVars::cons].DistributionMap(),1,1);
97 
98  // This will hold the update for (rho) and (rho theta)
99  MultiFab temp_rhs(S_stage_data[IntVars::zmom].boxArray(),S_stage_data[IntVars::zmom].DistributionMap(),2,0);
100 
101  // This will hold the new x- and y-momenta temporarily (so that we don't overwrite values we need when tiling)
102  MultiFab temp_cur_xmom(S_stage_data[IntVars::xmom].boxArray(),S_stage_data[IntVars::xmom].DistributionMap(),1,0);
103  MultiFab temp_cur_ymom(S_stage_data[IntVars::ymom].boxArray(),S_stage_data[IntVars::ymom].DistributionMap(),1,0);
104 
105  // We assume that in the first step (nrk == 0) we are only doing one substep.
106  AMREX_ALWAYS_ASSERT(nrk > 0 || step == 0);
107 
108  // *************************************************************************
109  // First set up some arrays we'll need
110  // *************************************************************************
111 
112 #ifdef _OPENMP
113 #pragma omp parallel if (Gpu::notInLaunchRegion())
114 #endif
115  for ( MFIter mfi(S_stage_data[IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
116  {
117  const Array4<const Real>& prev_cons = S_prev[IntVars::cons].const_array(mfi);
118  const Array4<const Real>& prev_zmom = S_prev[IntVars::zmom].const_array(mfi);
119 
120  const Array4<const Real>& stage_cons = S_stage_data[IntVars::cons].const_array(mfi);
121  const Array4<const Real>& stage_zmom = S_stage_data[IntVars::zmom].const_array(mfi);
122 
123  const Array4<Real>& prev_drho_w = Delta_rho_w.array(mfi);
124  const Array4<Real>& prev_drho_theta = Delta_rho_theta.array(mfi);
125  const Array4<Real>& lagged_delta_rt = S_scratch[IntVars::cons].array(mfi);
126  const Array4<Real>& theta_extrap = extrap.array(mfi);
127 
128  Box gbx = mfi.growntilebox(1);
129  ParallelFor(gbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
130  {
131  prev_drho_theta(i,j,k) = prev_cons(i,j,k,RhoTheta_comp) - stage_cons(i,j,k,RhoTheta_comp);
132 
133  if (step == 0) {
134  theta_extrap(i,j,k) = prev_drho_theta(i,j,k);
135  } else {
136  theta_extrap(i,j,k) = prev_drho_theta(i,j,k) + beta_d *
137  ( prev_drho_theta(i,j,k) - lagged_delta_rt(i,j,k,RhoTheta_comp) );
138  }
139 
140  // We define lagged_delta_rt for our next step as the current delta_rt
141  // (after using it above to extrapolate theta for this step)
142  lagged_delta_rt(i,j,k,RhoTheta_comp) = prev_drho_theta(i,j,k);
143  });
144 
145  // NOTE: We must do this here because for step > 0, prev_zmom and cur_zmom both point to the same data,
146  // so by the time we would use prev_zmom to define zflux, it would have already been over-written.
147  Box gtbz = mfi.nodaltilebox(2);
148  gtbz.grow(IntVect(1,1,0));
149  ParallelFor(gtbz, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
150  prev_drho_w(i,j,k) = prev_zmom(i,j,k) - stage_zmom(i,j,k);
151  });
152  } // mfi
153 
154  // *************************************************************************
155  // Define updates in the current RK stage
156  // *************************************************************************
157 
158 #ifdef _OPENMP
159 #pragma omp parallel if (Gpu::notInLaunchRegion())
160 #endif
161  for ( MFIter mfi(S_stage_data[IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
162  {
163  Box tbx = mfi.nodaltilebox(0);
164  Box tby = mfi.nodaltilebox(1);
165 
166  const Array4<const Real> & stage_xmom = S_stage_data[IntVars::xmom].const_array(mfi);
167  const Array4<const Real> & stage_ymom = S_stage_data[IntVars::ymom].const_array(mfi);
168  const Array4<const Real> & prim = S_stage_prim.const_array(mfi);
169 
170  const Array4<const Real>& slow_rhs_rho_u = S_slow_rhs[IntVars::xmom].const_array(mfi);
171  const Array4<const Real>& slow_rhs_rho_v = S_slow_rhs[IntVars::ymom].const_array(mfi);
172 
173  const Array4<Real>& temp_cur_xmom_arr = temp_cur_xmom.array(mfi);
174  const Array4<Real>& temp_cur_ymom_arr = temp_cur_ymom.array(mfi);
175 
176  const Array4<const Real>& prev_xmom = S_prev[IntVars::xmom].const_array(mfi);
177  const Array4<const Real>& prev_ymom = S_prev[IntVars::ymom].const_array(mfi);
178 
179  // These store the advection momenta which we will use to update the slow variables
180  const Array4< Real>& avg_xmom = S_scratch[IntVars::xmom].array(mfi);
181  const Array4< Real>& avg_ymom = S_scratch[IntVars::ymom].array(mfi);
182 
183  const Array4<const Real>& pi_stage_ca = pi_stage.const_array(mfi);
184 
185  const Array4<Real>& theta_extrap = extrap.array(mfi);
186 
187  // Map factors
188  const Array4<const Real>& mf_u = mapfac_u->const_array(mfi);
189  const Array4<const Real>& mf_v = mapfac_v->const_array(mfi);
190 
191  // *********************************************************************
192  // Define updates in the RHS of {x, y, z}-momentum equations
193  // *********************************************************************
194  if (nrk == 0 and step == 0) { // prev == stage
195  ParallelFor(tbx, tby,
196  [=] AMREX_GPU_DEVICE (int i, int j, int k)
197  {
198  Real new_drho_u = dtau * slow_rhs_rho_u(i,j,k);
199  avg_xmom(i,j,k) += facinv*new_drho_u;
200  temp_cur_xmom_arr(i,j,k) = stage_xmom(i,j,k) + new_drho_u;
201  },
202  [=] AMREX_GPU_DEVICE (int i, int j, int k)
203  {
204  Real new_drho_v = dtau * slow_rhs_rho_v(i,j,k);
205  avg_ymom(i,j,k) += facinv*new_drho_v;
206  temp_cur_ymom_arr(i,j,k) = stage_ymom(i,j,k) + new_drho_v;
207  });
208  } else {
209  ParallelFor(tbx, tby,
210  [=] AMREX_GPU_DEVICE (int i, int j, int k)
211  {
212  // Add (negative) gradient of (rho theta) multiplied by lagged "pi"
213  Real gpx = (theta_extrap(i,j,k) - theta_extrap(i-1,j,k))*dxi;
214  gpx *= mf_u(i,j,0);
215 
216  if (l_use_moisture) {
217  Real q = 0.5 * ( prim(i,j,k,PrimQ1_comp) + prim(i-1,j,k,PrimQ1_comp)
218  +prim(i,j,k,PrimQ2_comp) + prim(i-1,j,k,PrimQ2_comp) );
219  gpx /= (1.0 + q);
220  }
221 
222  Real pi_c = 0.5 * (pi_stage_ca(i-1,j,k,0) + pi_stage_ca(i,j,k,0));
223  Real fast_rhs_rho_u = -Gamma * R_d * pi_c * gpx;
224 
225  Real new_drho_u = prev_xmom(i,j,k) - stage_xmom(i,j,k)
226  + dtau * fast_rhs_rho_u + dtau * slow_rhs_rho_u(i,j,k);
227 
228  avg_xmom(i,j,k) += facinv*new_drho_u;
229 
230  temp_cur_xmom_arr(i,j,k) = stage_xmom(i,j,k) + new_drho_u;
231  },
232  [=] AMREX_GPU_DEVICE (int i, int j, int k)
233  {
234  // Add (negative) gradient of (rho theta) multiplied by lagged "pi"
235  Real gpy = (theta_extrap(i,j,k) - theta_extrap(i,j-1,k))*dyi;
236  gpy *= mf_v(i,j,0);
237 
238  if (l_use_moisture) {
239  Real q = 0.5 * ( prim(i,j,k,PrimQ1_comp) + prim(i,j-1,k,PrimQ1_comp)
240  +prim(i,j,k,PrimQ2_comp) + prim(i,j-1,k,PrimQ2_comp) );
241  gpy /= (1.0 + q);
242  }
243 
244  Real pi_c = 0.5 * (pi_stage_ca(i,j-1,k,0) + pi_stage_ca(i,j,k,0));
245  Real fast_rhs_rho_v = -Gamma * R_d * pi_c * gpy;
246 
247  Real new_drho_v = prev_ymom(i,j,k) - stage_ymom(i,j,k)
248  + dtau * fast_rhs_rho_v + dtau * slow_rhs_rho_v(i,j,k);
249 
250  avg_ymom(i,j,k) += facinv*new_drho_v;
251 
252  temp_cur_ymom_arr(i,j,k) = stage_ymom(i,j,k) + new_drho_v;
253  });
254  } // nrk > 0 and/or step > 0
255  } //mfi
256 
257 #ifdef _OPENMP
258 #pragma omp parallel if (Gpu::notInLaunchRegion())
259 #endif
260  {
261  std::array<FArrayBox,AMREX_SPACEDIM> flux;
262  for ( MFIter mfi(S_stage_data[IntVars::cons],TileNoZ()); mfi.isValid(); ++mfi)
263  {
264  Box bx = mfi.tilebox();
265  Box tbz = surroundingNodes(bx,2);
266 
267  Box vbx = mfi.validbox();
268  const auto& vbx_hi = ubound(vbx);
269 
270  const Array4<const Real>& stage_xmom = S_stage_data[IntVars::xmom].const_array(mfi);
271  const Array4<const Real>& stage_ymom = S_stage_data[IntVars::ymom].const_array(mfi);
272  const Array4<const Real>& stage_zmom = S_stage_data[IntVars::zmom].const_array(mfi);
273  const Array4<const Real> & prim = S_stage_prim.const_array(mfi);
274 
275  const Array4<const Real>& prev_drho_theta = Delta_rho_theta.array(mfi);
276 
277  const Array4<const Real>& prev_cons = S_prev[IntVars::cons].const_array(mfi);
278  const Array4<const Real>& stage_cons = S_stage_data[IntVars::cons].const_array(mfi);
279 
280  const Array4<const Real>& slow_rhs_cons = S_slow_rhs[IntVars::cons].const_array(mfi);
281  const Array4<const Real>& slow_rhs_rho_w = S_slow_rhs[IntVars::zmom].const_array(mfi);
282 
283  const Array4<const Real>& prev_zmom = S_prev[IntVars::zmom].const_array(mfi);
284  const Array4< Real>& cur_zmom = S_data[IntVars::zmom].array(mfi);
285 
286  const Array4<Real>& temp_cur_xmom_arr = temp_cur_xmom.array(mfi);
287  const Array4<Real>& temp_cur_ymom_arr = temp_cur_ymom.array(mfi);
288 
289 
290  // These store the advection momenta which we will use to update the slow variables
291  const Array4< Real>& avg_zmom = S_scratch[IntVars::zmom].array(mfi);
292 
293  // Map factors
294  const Array4<const Real>& mf_m = mapfac_m->const_array(mfi);
295  const Array4<const Real>& mf_u = mapfac_u->const_array(mfi);
296  const Array4<const Real>& mf_v = mapfac_v->const_array(mfi);
297 
298  FArrayBox RHS_fab;
299  RHS_fab.resize(tbz,1, The_Async_Arena());
300 
301  FArrayBox soln_fab;
302  soln_fab.resize(tbz,1, The_Async_Arena());
303 
304  auto const& RHS_a = RHS_fab.array();
305  auto const& soln_a = soln_fab.array();
306 
307  auto const& temp_rhs_arr = temp_rhs.array(mfi);
308 
309  auto const& coeffA_a = coeff_A_mf.array(mfi);
310  auto const& inv_coeffB_a = inv_coeff_B_mf.array(mfi);
311  auto const& coeffC_a = coeff_C_mf.array(mfi);
312  auto const& coeffP_a = coeff_P_mf.array(mfi);
313  auto const& coeffQ_a = coeff_Q_mf.array(mfi);
314 
315  // *************************************************************************
316  // Define flux arrays for use in advection
317  // *************************************************************************
318  for (int dir = 0; dir < AMREX_SPACEDIM; ++dir) {
319  flux[dir].resize(surroundingNodes(bx,dir),2);
320  flux[dir].setVal<RunOn::Device>(0.);
321  }
322  const GpuArray<const Array4<Real>, AMREX_SPACEDIM>
323  flx_arr{{AMREX_D_DECL(flux[0].array(), flux[1].array(), flux[2].array())}};
324 
325  // *********************************************************************
326  ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
327  Real xflux_lo = (temp_cur_xmom_arr(i ,j,k) - stage_xmom(i ,j,k)) / mf_u(i ,j,0);
328  Real xflux_hi = (temp_cur_xmom_arr(i+1,j,k) - stage_xmom(i+1,j,k)) / mf_u(i+1,j,0);
329  Real yflux_lo = (temp_cur_ymom_arr(i,j ,k) - stage_ymom(i,j ,k)) / mf_v(i,j ,0);
330  Real yflux_hi = (temp_cur_ymom_arr(i,j+1,k) - stage_ymom(i,j+1,k)) / mf_v(i,j+1,0);
331 
332  Real mfsq = mf_m(i,j,0) * mf_m(i,j,0);
333 
334  temp_rhs_arr(i,j,k,Rho_comp ) = ( xflux_hi - xflux_lo ) * dxi * mfsq
335  + ( yflux_hi - yflux_lo ) * dyi * mfsq;
336  temp_rhs_arr(i,j,k,RhoTheta_comp) = (( xflux_hi * (prim(i,j,k,0) + prim(i+1,j,k,0)) -
337  xflux_lo * (prim(i,j,k,0) + prim(i-1,j,k,0)) ) * dxi * mfsq +
338  ( yflux_hi * (prim(i,j,k,0) + prim(i,j+1,k,0)) -
339  yflux_lo * (prim(i,j,k,0) + prim(i,j-1,k,0)) ) * dyi * mfsq) * 0.5;
340 
341  (flx_arr[0])(i,j,k,0) = xflux_lo;
342  (flx_arr[0])(i,j,k,1) = (flx_arr[0])(i ,j,k,0) * 0.5 * (prim(i,j,k,0) + prim(i-1,j,k,0));
343 
344  (flx_arr[1])(i,j,k,0) = yflux_lo;
345  (flx_arr[1])(i,j,k,1) = (flx_arr[0])(i,j ,k,0) * 0.5 * (prim(i,j,k,0) + prim(i,j-1,k,0));
346 
347  if (i == vbx_hi.x) {
348  (flx_arr[0])(i+1,j,k,0) = xflux_hi;
349  (flx_arr[0])(i+1,j,k,1) = (flx_arr[0])(i+1,j,k,0) * 0.5 * (prim(i,j,k,0) + prim(i+1,j,k,0));
350  }
351  if (j == vbx_hi.y) {
352  (flx_arr[1])(i,j+1,k,0) = yflux_hi;
353  (flx_arr[1])(i,j+1,k,1) = (flx_arr[1])(i,j+1,k,0) * 0.5 * (prim(i,j,k,0) + prim(i,j+1,k,0));
354  }
355  });
356 
357  Box bx_shrunk_in_k = bx;
358  int klo = tbz.smallEnd(2);
359  int khi = tbz.bigEnd(2);
360  bx_shrunk_in_k.setSmall(2,klo+1);
361  bx_shrunk_in_k.setBig(2,khi-1);
362 
363  // Note that the notes use "g" to mean the magnitude of gravity, so it is positive
364  // We set grav_gpu[2] to be the vector component which is negative
365  // We define halfg to match the notes (which is why we take the absolute value)
366  Real halfg = std::abs(0.5 * grav_gpu[2]);
367 
368  // *********************************************************************
369  // fast_loop_on_shrunk
370  // *********************************************************************
371  //Note we don't act on the bottom or top boundaries of the domain
372  ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (int i, int j, int k)
373  {
374  Real coeff_P = coeffP_a(i,j,k);
375  Real coeff_Q = coeffQ_a(i,j,k);
376 
377  if (l_use_moisture) {
378  Real q = 0.5 * ( prim(i,j,k,PrimQ1_comp) + prim(i,j,k-1,PrimQ1_comp)
379  +prim(i,j,k,PrimQ2_comp) + prim(i,j,k-1,PrimQ2_comp) );
380  coeff_P /= (1.0 + q);
381  coeff_Q /= (1.0 + q);
382  }
383 
384  Real theta_t_lo = 0.5 * ( prim(i,j,k-2,PrimTheta_comp) + prim(i,j,k-1,PrimTheta_comp) );
385  Real theta_t_mid = 0.5 * ( prim(i,j,k-1,PrimTheta_comp) + prim(i,j,k ,PrimTheta_comp) );
386  Real theta_t_hi = 0.5 * ( prim(i,j,k ,PrimTheta_comp) + prim(i,j,k+1,PrimTheta_comp) );
387 
388  Real Omega_kp1 = prev_zmom(i,j,k+1) - stage_zmom(i,j,k+1);
389  Real Omega_k = prev_zmom(i,j,k ) - stage_zmom(i,j,k );
390  Real Omega_km1 = prev_zmom(i,j,k-1) - stage_zmom(i,j,k-1);
391 
392  // line 2 last two terms (order dtau)
393  Real old_drho_k = prev_cons(i,j,k ,Rho_comp) - stage_cons(i,j,k ,Rho_comp);
394  Real old_drho_km1 = prev_cons(i,j,k-1,Rho_comp) - stage_cons(i,j,k-1,Rho_comp);
395  Real R0_tmp = coeff_P * prev_drho_theta(i,j,k) + coeff_Q * prev_drho_theta(i,j,k-1)
396  - halfg * ( old_drho_k + old_drho_km1 );
397 
398  // lines 3-5 residuals (order dtau^2) 1.0 <-> beta_2
399  Real R1_tmp = halfg * (-slow_rhs_cons(i,j,k ,Rho_comp)
400  -slow_rhs_cons(i,j,k-1,Rho_comp)
401  +temp_rhs_arr(i,j,k,0) + temp_rhs_arr(i,j,k-1) )
402  + ( coeff_P * (slow_rhs_cons(i,j,k ,RhoTheta_comp) - temp_rhs_arr(i,j,k ,RhoTheta_comp)) +
403  coeff_Q * (slow_rhs_cons(i,j,k-1,RhoTheta_comp) - temp_rhs_arr(i,j,k-1,RhoTheta_comp)) );
404 
405  // lines 6&7 consolidated (reuse Omega & metrics) (order dtau^2)
406  R1_tmp += beta_1 * dzi * ( (Omega_kp1 - Omega_km1) * halfg
407  -(Omega_kp1*theta_t_hi - Omega_k *theta_t_mid) * coeff_P
408  -(Omega_k *theta_t_mid - Omega_km1*theta_t_lo ) * coeff_Q );
409 
410  // line 1
411  RHS_a(i,j,k) = Omega_k + dtau * (slow_rhs_rho_w(i,j,k) + R0_tmp + dtau * beta_2 * R1_tmp);
412 
413  }); // bx_shrunk_in_k
414 
415  Box b2d = tbz; // Copy constructor
416  b2d.setRange(2,0);
417 
418  auto const lo = lbound(bx);
419  auto const hi = ubound(bx);
420 
421  ParallelFor(b2d, [=] AMREX_GPU_DEVICE (int i, int j, int)
422  {
423  // w at bottom boundary of grid is 0 if at domain boundary, otherwise w = w_old + dtau * slow_rhs
424  RHS_a (i,j,lo.z) = prev_zmom(i,j,lo.z) - stage_zmom(i,j,lo.z)
425  + dtau * slow_rhs_rho_w(i,j,lo.z);
426 
427  // w at top boundary of grid is 0 if at domain boundary, otherwise w = w_old + dtau * slow_rhs
428  RHS_a (i,j,hi.z+1) = prev_zmom(i,j,hi.z+1) - stage_zmom(i,j,hi.z+1)
429  + dtau * slow_rhs_rho_w(i,j,hi.z+1);
430  }); // b2d
431 
432 #ifdef AMREX_USE_GPU
433  if (l_implicit_substepping) {
434 
435  ParallelFor(b2d, [=] AMREX_GPU_DEVICE (int i, int j, int)
436  {
437  // w = specified Dirichlet value at k = lo.z
438  soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
439  cur_zmom(i,j,lo.z) = stage_zmom(i,j,lo.z) + soln_a(i,j,lo.z);
440 
441  for (int k = lo.z+1; k <= hi.z+1; k++) {
442  soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
443  }
444 
445  cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
446 
447  for (int k = hi.z; k >= lo.z; k--) {
448  soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) *soln_a(i,j,k+1);
449  cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k);
450  }
451  }); // b2d
452 
453  } else { // explicit substepping (beta_1 = 1; beta_2 = 0)
454 
455  ParallelFor(b2d, [=] AMREX_GPU_DEVICE (int i, int j, int)
456  {
457  for (int k = lo.z; k <= hi.z+1; k++) {
458  soln_a(i,j,k) = RHS_a(i,j,k);
459  cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k);
460  }
461  }); // b2d
462  } // end of explicit substepping
463 #else
464  if (l_implicit_substepping) {
465 
466  for (int j = lo.y; j <= hi.y; ++j) {
467  AMREX_PRAGMA_SIMD
468  for (int i = lo.x; i <= hi.x; ++i) {
469  soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
470  }
471  }
472  for (int k = lo.z+1; k <= hi.z+1; ++k) {
473  for (int j = lo.y; j <= hi.y; ++j) {
474  AMREX_PRAGMA_SIMD
475  for (int i = lo.x; i <= hi.x; ++i) {
476  soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
477  }
478  }
479  }
480  for (int j = lo.y; j <= hi.y; ++j) {
481  AMREX_PRAGMA_SIMD
482  for (int i = lo.x; i <= hi.x; ++i) {
483  cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
484  }
485  }
486  for (int k = hi.z; k >= lo.z; --k) {
487  for (int j = lo.y; j <= hi.y; ++j) {
488  AMREX_PRAGMA_SIMD
489  for (int i = lo.x; i <= hi.x; ++i) {
490  soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) * soln_a(i,j,k+1);
491  cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k);
492  }
493  }
494  }
495  } else { // explicit substepping (beta_1 = 1; beta_2 = 0)
496 
497  for (int k = lo.z; k <= hi.z+1; ++k) {
498  for (int j = lo.y; j <= hi.y; ++j) {
499  AMREX_PRAGMA_SIMD
500  for (int i = lo.x; i <= hi.x; ++i) {
501 
502  soln_a(i,j,k) = RHS_a(i,j,k);
503 
504  cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k);
505  }
506  }
507  }
508 
509  } // end of explicit substepping
510 #endif
511 
512  // **************************************************************************
513  // Define updates in the RHS of rho and (rho theta)
514  // **************************************************************************
515  const Array4<Real>& prev_drho_w = Delta_rho_w.array(mfi);
516  ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
517  {
518  Real zflux_lo = beta_2 * soln_a(i,j,k ) + beta_1 * prev_drho_w(i,j,k );
519  Real zflux_hi = beta_2 * soln_a(i,j,k+1) + beta_1 * prev_drho_w(i,j,k+1);
520 
521  avg_zmom(i,j,k) += facinv*zflux_lo / (mf_m(i,j,0) * mf_m(i,j,0));
522  (flx_arr[2])(i,j,k,0) = zflux_lo / (mf_m(i,j,0) * mf_m(i,j,0));
523  (flx_arr[2])(i,j,k,1) = (flx_arr[2])(i,j,k,0) * 0.5 * (prim(i,j,k) + prim(i,j,k-1));
524 
525  if (k == vbx_hi.z) {
526  avg_zmom(i,j,k+1) += facinv * zflux_hi / (mf_m(i,j,0) * mf_m(i,j,0));
527  (flx_arr[2])(i,j,k+1,0) = zflux_hi / (mf_m(i,j,0) * mf_m(i,j,0));
528  (flx_arr[2])(i,j,k+1,1) = (flx_arr[2])(i,j,k+1,0) * 0.5 * (prim(i,j,k) + prim(i,j,k+1));
529  }
530 
531  temp_rhs_arr(i,j,k,Rho_comp ) += dzi * ( zflux_hi - zflux_lo );
532  temp_rhs_arr(i,j,k,RhoTheta_comp) += 0.5 * dzi * ( zflux_hi * (prim(i,j,k) + prim(i,j,k+1))
533  - zflux_lo * (prim(i,j,k) + prim(i,j,k-1)) );
534  });
535 
536  // We only add to the flux registers in the final RK step
537  if (l_reflux && nrk == 2) {
538  int strt_comp_reflux = 0;
539  // For now we don't reflux (rho theta) because it seems to create issues at c/f boundaries
540  int num_comp_reflux = 1;
541  if (level < finest_level) {
542  fr_as_crse->CrseAdd(mfi,
543  {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
544  dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
545  }
546  if (level > 0) {
547  fr_as_fine->FineAdd(mfi,
548  {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
549  dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
550  }
551 
552  // This is necessary here so we don't go on to the next FArrayBox without
553  // having finished copying the fluxes into the FluxRegisters (since the fluxes
554  // are stored in temporary FArrayBox's)
555  Gpu::streamSynchronize();
556 
557  } // two-way coupling
558  } // mfi
559  } // OMP
560 
561 #ifdef _OPENMP
562 #pragma omp parallel if (Gpu::notInLaunchRegion())
563 #endif
564  for ( MFIter mfi(S_stage_data[IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
565  {
566  const Box& bx = mfi.tilebox();
567 
568  const Array4< Real>& cur_cons = S_data[IntVars::cons].array(mfi);
569  const Array4<const Real>& prev_cons = S_prev[IntVars::cons].const_array(mfi);
570  auto const& temp_rhs_arr = temp_rhs.const_array(mfi);
571  auto const& slow_rhs_cons = S_slow_rhs[IntVars::cons].const_array(mfi);
572 
573  if (step == 0) {
574  ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
575  {
576  cur_cons(i,j,k,Rho_comp) = prev_cons(i,j,k,Rho_comp) +
577  dtau * (slow_rhs_cons(i,j,k,Rho_comp) - temp_rhs_arr(i,j,k,Rho_comp));
578  cur_cons(i,j,k,RhoTheta_comp) = prev_cons(i,j,k,RhoTheta_comp) +
579  dtau * (slow_rhs_cons(i,j,k,RhoTheta_comp) - temp_rhs_arr(i,j,k,RhoTheta_comp));
580  });
581  } else {
582  ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
583  {
584  //
585  // We didn't need to set cur_cons = prev_cons above because they point to the same data for step > 0
586  //
587  cur_cons(i,j,k,Rho_comp) += dtau * (slow_rhs_cons(i,j,k,Rho_comp) - temp_rhs_arr(i,j,k,Rho_comp));
588  cur_cons(i,j,k,RhoTheta_comp) += dtau * (slow_rhs_cons(i,j,k,RhoTheta_comp) - temp_rhs_arr(i,j,k,RhoTheta_comp));
589  });
590  } // step = 0
591 
592  const Array4<Real>& cur_xmom = S_data[IntVars::xmom].array(mfi);
593  const Array4<Real>& cur_ymom = S_data[IntVars::ymom].array(mfi);
594 
595  const Array4<Real const>& temp_cur_xmom_arr = temp_cur_xmom.const_array(mfi);
596  const Array4<Real const>& temp_cur_ymom_arr = temp_cur_ymom.const_array(mfi);
597 
598  Box tbx = surroundingNodes(bx,0);
599  Box tby = surroundingNodes(bx,1);
600 
601  ParallelFor(tbx, tby,
602  [=] AMREX_GPU_DEVICE (int i, int j, int k)
603  {
604  cur_xmom(i,j,k) = temp_cur_xmom_arr(i,j,k);
605  },
606  [=] AMREX_GPU_DEVICE (int i, int j, int k)
607  {
608  cur_ymom(i,j,k) = temp_cur_ymom_arr(i,j,k);
609  });
610 
611  } // mfi
612 }
constexpr amrex::Real R_d
Definition: ERF_Constants.H:10
constexpr amrex::Real Gamma
Definition: ERF_Constants.H:19
#define PrimQ1_comp
Definition: ERF_IndexDefines.H:53
#define PrimQ2_comp
Definition: ERF_IndexDefines.H:54
#define Rho_comp
Definition: ERF_IndexDefines.H:36
#define RhoTheta_comp
Definition: ERF_IndexDefines.H:37
#define PrimTheta_comp
Definition: ERF_IndexDefines.H:50
AMREX_FORCE_INLINE amrex::IntVect TileNoZ()
Definition: ERF_TileNoZ.H:11
@ ymom
Definition: ERF_IndexDefines.H:141
@ cons
Definition: ERF_IndexDefines.H:139
@ zmom
Definition: ERF_IndexDefines.H:142
@ xmom
Definition: ERF_IndexDefines.H:140
Here is the call graph for this function: