ERF
Energy Research and Forecasting: An Atmospheric Modeling Code
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
ERF_FastRhs_N.cpp File Reference
Include dependency graph for ERF_FastRhs_N.cpp:

Functions

void erf_fast_rhs_N (int step, int nrk, int level, int finest_level, Vector< MultiFab > &S_slow_rhs, const Vector< MultiFab > &S_prev, Vector< MultiFab > &S_stage_data, const MultiFab &S_stage_prim, const MultiFab &qt, const MultiFab &pi_stage, const MultiFab &fast_coeffs, Vector< MultiFab > &S_data, Vector< MultiFab > &S_scratch, const MultiFab &cc_src, const MultiFab &xmom_src, const MultiFab &ymom_src, const MultiFab &zmom_src, const Geometry geom, const Real gravity, const Real dtau, const Real beta_s, const Real facinv, Vector< std::unique_ptr< MultiFab >> &mapfac, YAFluxRegister *fr_as_crse, YAFluxRegister *fr_as_fine, bool l_use_moisture, bool l_reflux, bool l_implicit_substepping)
 

Function Documentation

◆ erf_fast_rhs_N()

void erf_fast_rhs_N ( int  step,
int  nrk,
int  level,
int  finest_level,
Vector< MultiFab > &  S_slow_rhs,
const Vector< MultiFab > &  S_prev,
Vector< MultiFab > &  S_stage_data,
const MultiFab &  S_stage_prim,
const MultiFab &  qt,
const MultiFab &  pi_stage,
const MultiFab &  fast_coeffs,
Vector< MultiFab > &  S_data,
Vector< MultiFab > &  S_scratch,
const MultiFab &  cc_src,
const MultiFab &  xmom_src,
const MultiFab &  ymom_src,
const MultiFab &  zmom_src,
const Geometry  geom,
const Real  gravity,
const Real  dtau,
const Real  beta_s,
const Real  facinv,
Vector< std::unique_ptr< MultiFab >> &  mapfac,
YAFluxRegister *  fr_as_crse,
YAFluxRegister *  fr_as_fine,
bool  l_use_moisture,
bool  l_reflux,
bool  l_implicit_substepping 
)

Function for computing the fast RHS with no terrain

Parameters
[in]stepwhich fast time step within each Runge-Kutta step
[in]nrkwhich Runge-Kutta step
[in]levellevel of resolution
[in]finest_levelfinest level of resolution
[in]S_slow_rhsslow RHS computed in erf_slow_rhs_pre
[in]S_previf step == 0, this is S_old, else the previous fast solution
[in]S_stage_datasolution at previous RK stage
[in]S_stage_primprimitive variables at previous RK stage
[in]pi_stageExner function at previous RK stage
[in]fast_coeffscoefficients for the tridiagonal solve used in the fast integrator
[out]S_datacurrent solution
[in]S_scratchscratch space
[in]cc_srcsource terms for conserved variables
[in]xmom_srcsource terms for x-momentum
[in]ymom_srcsource terms for y-momentum
[in]zmom_srcsource terms for z-momentum
[in]geomcontainer for geometric information
[in]gravitymagnitude of gravity
[in]dtaufast time step
[in]beta_sCoefficient which determines how implicit vs explicit the solve is
[in]facinvinverse factor for time-averaging the momenta
[in]mapfacvector of map factors
[in,out]fr_as_crseYAFluxRegister at level l at level l / l+1 interface
[in,out]fr_as_fineYAFluxRegister at level l at level l-1 / l interface
[in]l_use_moisture
[in]l_refluxshould we add fluxes to the FluxRegisters?
[in]l_implicit_substepping
63 {
64  //
65  // NOTE: for step > 0, S_data and S_prev point to the same MultiFab data!!
66  //
67 
68  BL_PROFILE_REGION("erf_fast_rhs_N()");
69 
70  Real beta_1 = 0.5 * (1.0 - beta_s); // multiplies explicit terms
71  Real beta_2 = 0.5 * (1.0 + beta_s); // multiplies implicit terms
72 
73  // How much do we project forward the (rho theta) that is used in the horizontal momentum equations
74  Real beta_d = 0.1;
75 
76  const Real* dx = geom.CellSize();
77  const GpuArray<Real, AMREX_SPACEDIM> dxInv = geom.InvCellSizeArray();
78 
79  Real dxi = dxInv[0];
80  Real dyi = dxInv[1];
81  Real dzi = dxInv[2];
82 
83  const auto& ba = S_stage_data[IntVars::cons].boxArray();
84  const auto& dm = S_stage_data[IntVars::cons].DistributionMap();
85 
86  MultiFab Delta_rho_theta( ba , dm, 1, 1);
87  MultiFab Delta_rho_w (convert(ba,IntVect(0,0,1)), dm, 1, IntVect(1,1,0));
88 
89  MultiFab coeff_A_mf(fast_coeffs, make_alias, 0, 1);
90  MultiFab inv_coeff_B_mf(fast_coeffs, make_alias, 1, 1);
91  MultiFab coeff_C_mf(fast_coeffs, make_alias, 2, 1);
92  MultiFab coeff_P_mf(fast_coeffs, make_alias, 3, 1);
93  MultiFab coeff_Q_mf(fast_coeffs, make_alias, 4, 1);
94 
95  // *************************************************************************
96  // Set gravity as a vector
97  const Array<Real,AMREX_SPACEDIM> grav{0.0, 0.0, -gravity};
98  const GpuArray<Real,AMREX_SPACEDIM> grav_gpu{grav[0], grav[1], grav[2]};
99 
100  // This will hold theta extrapolated forward in time
101  MultiFab extrap(S_data[IntVars::cons].boxArray(),S_data[IntVars::cons].DistributionMap(),1,1);
102 
103  // This will hold the update for (rho) and (rho theta)
104  MultiFab temp_rhs(S_stage_data[IntVars::zmom].boxArray(),S_stage_data[IntVars::zmom].DistributionMap(),2,0);
105 
106  // This will hold the new x- and y-momenta temporarily (so that we don't overwrite values we need when tiling)
107  MultiFab temp_cur_xmom(S_stage_data[IntVars::xmom].boxArray(),S_stage_data[IntVars::xmom].DistributionMap(),1,0);
108  MultiFab temp_cur_ymom(S_stage_data[IntVars::ymom].boxArray(),S_stage_data[IntVars::ymom].DistributionMap(),1,0);
109 
110  // We assume that in the first step (nrk == 0) we are only doing one substep.
111  AMREX_ALWAYS_ASSERT(nrk > 0 || step == 0);
112 
113  // *************************************************************************
114  // First set up some arrays we'll need
115  // *************************************************************************
116 
117 #ifdef _OPENMP
118 #pragma omp parallel if (Gpu::notInLaunchRegion())
119 #endif
120  for ( MFIter mfi(S_stage_data[IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
121  {
122  const Array4<const Real>& prev_cons = S_prev[IntVars::cons].const_array(mfi);
123  const Array4<const Real>& prev_zmom = S_prev[IntVars::zmom].const_array(mfi);
124 
125  const Array4<const Real>& stage_cons = S_stage_data[IntVars::cons].const_array(mfi);
126  const Array4<const Real>& stage_zmom = S_stage_data[IntVars::zmom].const_array(mfi);
127 
128  const Array4<Real>& prev_drho_w = Delta_rho_w.array(mfi);
129  const Array4<Real>& prev_drho_theta = Delta_rho_theta.array(mfi);
130  const Array4<Real>& lagged_delta_rt = S_scratch[IntVars::cons].array(mfi);
131  const Array4<Real>& theta_extrap = extrap.array(mfi);
132 
133  Box gbx = mfi.growntilebox(1);
134  ParallelFor(gbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
135  {
136  prev_drho_theta(i,j,k) = prev_cons(i,j,k,RhoTheta_comp) - stage_cons(i,j,k,RhoTheta_comp);
137 
138  if (step == 0) {
139  theta_extrap(i,j,k) = prev_drho_theta(i,j,k);
140  } else {
141  theta_extrap(i,j,k) = prev_drho_theta(i,j,k) + beta_d *
142  ( prev_drho_theta(i,j,k) - lagged_delta_rt(i,j,k,RhoTheta_comp) );
143  }
144 
145  // We define lagged_delta_rt for our next step as the current delta_rt
146  // (after using it above to extrapolate theta for this step)
147  lagged_delta_rt(i,j,k,RhoTheta_comp) = prev_drho_theta(i,j,k);
148  });
149 
150  // NOTE: We must do this here because for step > 0, prev_zmom and cur_zmom both point to the same data,
151  // so by the time we would use prev_zmom to define zflux, it would have already been over-written.
152  Box gtbz = mfi.nodaltilebox(2);
153  gtbz.grow(IntVect(1,1,0));
154  ParallelFor(gtbz, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
155  prev_drho_w(i,j,k) = prev_zmom(i,j,k) - stage_zmom(i,j,k);
156  });
157  } // mfi
158 
159  // *************************************************************************
160  // Define updates in the current RK stage
161  // *************************************************************************
162 
163 #ifdef _OPENMP
164 #pragma omp parallel if (Gpu::notInLaunchRegion())
165 #endif
166  for ( MFIter mfi(S_stage_data[IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
167  {
168  Box tbx = mfi.nodaltilebox(0);
169  Box tby = mfi.nodaltilebox(1);
170 
171  const Array4<Real const>& xmom_src_arr = xmom_src.const_array(mfi);
172  const Array4<Real const>& ymom_src_arr = ymom_src.const_array(mfi);
173 
174  const Array4<const Real> & stage_xmom = S_stage_data[IntVars::xmom].const_array(mfi);
175  const Array4<const Real> & stage_ymom = S_stage_data[IntVars::ymom].const_array(mfi);
176  const Array4<const Real> & qt_arr = qt.const_array(mfi);
177 
178  const Array4<const Real>& slow_rhs_rho_u = S_slow_rhs[IntVars::xmom].const_array(mfi);
179  const Array4<const Real>& slow_rhs_rho_v = S_slow_rhs[IntVars::ymom].const_array(mfi);
180 
181  const Array4<Real>& temp_cur_xmom_arr = temp_cur_xmom.array(mfi);
182  const Array4<Real>& temp_cur_ymom_arr = temp_cur_ymom.array(mfi);
183 
184  const Array4<const Real>& prev_xmom = S_prev[IntVars::xmom].const_array(mfi);
185  const Array4<const Real>& prev_ymom = S_prev[IntVars::ymom].const_array(mfi);
186 
187  // These store the advection momenta which we will use to update the slow variables
188  const Array4< Real>& avg_xmom = S_scratch[IntVars::xmom].array(mfi);
189  const Array4< Real>& avg_ymom = S_scratch[IntVars::ymom].array(mfi);
190 
191  const Array4<const Real>& pi_stage_ca = pi_stage.const_array(mfi);
192 
193  const Array4<Real>& theta_extrap = extrap.array(mfi);
194 
195  // Map factors
196  const Array4<const Real>& mf_ux = mapfac[MapFacType::u_x]->const_array(mfi);
197  const Array4<const Real>& mf_vy = mapfac[MapFacType::v_y]->const_array(mfi);
198 
199  // *********************************************************************
200  // Define updates in the RHS of {x, y, z}-momentum equations
201  // *********************************************************************
202  if (nrk == 0 and step == 0) { // prev == stage
203  ParallelFor(tbx, tby,
204  [=] AMREX_GPU_DEVICE (int i, int j, int k)
205  {
206  Real new_drho_u = dtau * slow_rhs_rho_u(i,j,k) + dtau * xmom_src_arr(i,j,k);;
207  avg_xmom(i,j,k) += facinv*new_drho_u;
208  temp_cur_xmom_arr(i,j,k) = stage_xmom(i,j,k) + new_drho_u;
209  },
210  [=] AMREX_GPU_DEVICE (int i, int j, int k)
211  {
212  Real new_drho_v = dtau * slow_rhs_rho_v(i,j,k) + dtau * ymom_src_arr(i,j,k);
213  avg_ymom(i,j,k) += facinv*new_drho_v;
214  temp_cur_ymom_arr(i,j,k) = stage_ymom(i,j,k) + new_drho_v;
215  });
216  } else {
217  ParallelFor(tbx, tby,
218  [=] AMREX_GPU_DEVICE (int i, int j, int k)
219  {
220  // Add (negative) gradient of (rho theta) multiplied by lagged "pi"
221  Real gpx = (theta_extrap(i,j,k) - theta_extrap(i-1,j,k))*dxi;
222  gpx *= mf_ux(i,j,0);
223 
224  Real q = (l_use_moisture) ? 0.5 * (qt_arr(i,j,k) + qt_arr(i-1,j,k)) : 0.0;
225 
226  Real pi_c = 0.5 * (pi_stage_ca(i-1,j,k,0) + pi_stage_ca(i,j,k,0));
227  Real fast_rhs_rho_u = -Gamma * R_d * pi_c * gpx / (1.0 + q);
228 
229  Real new_drho_u = prev_xmom(i,j,k) - stage_xmom(i,j,k)
230  + dtau * fast_rhs_rho_u + dtau * slow_rhs_rho_u(i,j,k)
231  + dtau * xmom_src_arr(i,j,k);
232 
233  avg_xmom(i,j,k) += facinv*new_drho_u;
234 
235  temp_cur_xmom_arr(i,j,k) = stage_xmom(i,j,k) + new_drho_u;
236  },
237  [=] AMREX_GPU_DEVICE (int i, int j, int k)
238  {
239  // Add (negative) gradient of (rho theta) multiplied by lagged "pi"
240  Real gpy = (theta_extrap(i,j,k) - theta_extrap(i,j-1,k))*dyi;
241  gpy *= mf_vy(i,j,0);
242 
243  Real q = (l_use_moisture) ? 0.5 * (qt_arr(i,j,k) + qt_arr(i,j-1,k)) : 0.0;
244 
245  Real pi_c = 0.5 * (pi_stage_ca(i,j-1,k,0) + pi_stage_ca(i,j,k,0));
246  Real fast_rhs_rho_v = -Gamma * R_d * pi_c * gpy / (1.0 + q);
247 
248  Real new_drho_v = prev_ymom(i,j,k) - stage_ymom(i,j,k)
249  + dtau * fast_rhs_rho_v + dtau * slow_rhs_rho_v(i,j,k)
250  + dtau * ymom_src_arr(i,j,k);
251 
252  avg_ymom(i,j,k) += facinv*new_drho_v;
253 
254  temp_cur_ymom_arr(i,j,k) = stage_ymom(i,j,k) + new_drho_v;
255  });
256  } // nrk > 0 and/or step > 0
257  } //mfi
258 
259 #ifdef _OPENMP
260 #pragma omp parallel if (Gpu::notInLaunchRegion())
261 #endif
262  {
263  std::array<FArrayBox,AMREX_SPACEDIM> flux;
264  for ( MFIter mfi(S_stage_data[IntVars::cons],TileNoZ()); mfi.isValid(); ++mfi)
265  {
266  Box bx = mfi.tilebox();
267  Box tbz = surroundingNodes(bx,2);
268 
269  Box vbx = mfi.validbox();
270  const auto& vbx_hi = ubound(vbx);
271 
272  const Array4<Real const>& zmom_src_arr = zmom_src.const_array(mfi);
273 
274  const Array4<const Real>& stage_xmom = S_stage_data[IntVars::xmom].const_array(mfi);
275  const Array4<const Real>& stage_ymom = S_stage_data[IntVars::ymom].const_array(mfi);
276  const Array4<const Real>& stage_zmom = S_stage_data[IntVars::zmom].const_array(mfi);
277  const Array4<const Real> & prim = S_stage_prim.const_array(mfi);
278  const Array4<const Real> & qt_arr = qt.const_array(mfi);
279 
280  const Array4<const Real>& prev_drho_theta = Delta_rho_theta.array(mfi);
281 
282  const Array4<const Real>& prev_cons = S_prev[IntVars::cons].const_array(mfi);
283  const Array4<const Real>& stage_cons = S_stage_data[IntVars::cons].const_array(mfi);
284 
285  const Array4<const Real>& slow_rhs_cons = S_slow_rhs[IntVars::cons].const_array(mfi);
286  const Array4<const Real>& slow_rhs_rho_w = S_slow_rhs[IntVars::zmom].const_array(mfi);
287 
288  const Array4<const Real>& prev_zmom = S_prev[IntVars::zmom].const_array(mfi);
289  const Array4< Real>& cur_zmom = S_data[IntVars::zmom].array(mfi);
290 
291  const Array4<Real>& temp_cur_xmom_arr = temp_cur_xmom.array(mfi);
292  const Array4<Real>& temp_cur_ymom_arr = temp_cur_ymom.array(mfi);
293 
294  // These store the advection momenta which we will use to update the slow variables
295  const Array4< Real>& avg_zmom = S_scratch[IntVars::zmom].array(mfi);
296 
297  // Map factors
298  const Array4<const Real>& mf_mx = mapfac[MapFacType::m_x]->const_array(mfi);
299  const Array4<const Real>& mf_my = mapfac[MapFacType::m_y]->const_array(mfi);
300  const Array4<const Real>& mf_uy = mapfac[MapFacType::u_y]->const_array(mfi);
301  const Array4<const Real>& mf_vx = mapfac[MapFacType::v_x]->const_array(mfi);
302 
303  FArrayBox RHS_fab;
304  RHS_fab.resize(tbz,1, The_Async_Arena());
305 
306  FArrayBox soln_fab;
307  soln_fab.resize(tbz,1, The_Async_Arena());
308 
309  auto const& RHS_a = RHS_fab.array();
310  auto const& soln_a = soln_fab.array();
311 
312  auto const& temp_rhs_arr = temp_rhs.array(mfi);
313 
314  auto const& coeffA_a = coeff_A_mf.array(mfi);
315  auto const& inv_coeffB_a = inv_coeff_B_mf.array(mfi);
316  auto const& coeffC_a = coeff_C_mf.array(mfi);
317  auto const& coeffP_a = coeff_P_mf.array(mfi);
318  auto const& coeffQ_a = coeff_Q_mf.array(mfi);
319 
320  // *************************************************************************
321  // Define flux arrays for use in advection
322  // *************************************************************************
323  for (int dir = 0; dir < AMREX_SPACEDIM; ++dir) {
324  flux[dir].resize(surroundingNodes(bx,dir),2);
325  flux[dir].setVal<RunOn::Device>(0.);
326  }
327  const GpuArray<const Array4<Real>, AMREX_SPACEDIM>
328  flx_arr{{AMREX_D_DECL(flux[0].array(), flux[1].array(), flux[2].array())}};
329 
330  // *********************************************************************
331  ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
332  Real xflux_lo = (temp_cur_xmom_arr(i ,j,k) - stage_xmom(i ,j,k)) / mf_uy(i ,j,0);
333  Real xflux_hi = (temp_cur_xmom_arr(i+1,j,k) - stage_xmom(i+1,j,k)) / mf_uy(i+1,j,0);
334  Real yflux_lo = (temp_cur_ymom_arr(i,j ,k) - stage_ymom(i,j ,k)) / mf_vx(i,j ,0);
335  Real yflux_hi = (temp_cur_ymom_arr(i,j+1,k) - stage_ymom(i,j+1,k)) / mf_vx(i,j+1,0);
336 
337  Real mfsq = mf_mx(i,j,0) * mf_my(i,j,0);
338 
339  temp_rhs_arr(i,j,k,Rho_comp ) = ( xflux_hi - xflux_lo ) * dxi * mfsq
340  + ( yflux_hi - yflux_lo ) * dyi * mfsq;
341  temp_rhs_arr(i,j,k,RhoTheta_comp) = (( xflux_hi * (prim(i,j,k,0) + prim(i+1,j,k,0)) -
342  xflux_lo * (prim(i,j,k,0) + prim(i-1,j,k,0)) ) * dxi * mfsq +
343  ( yflux_hi * (prim(i,j,k,0) + prim(i,j+1,k,0)) -
344  yflux_lo * (prim(i,j,k,0) + prim(i,j-1,k,0)) ) * dyi * mfsq) * 0.5;
345 
346  if (l_reflux) {
347  (flx_arr[0])(i,j,k,0) = xflux_lo;
348  (flx_arr[0])(i,j,k,1) = (flx_arr[0])(i ,j,k,0) * 0.5 * (prim(i,j,k,0) + prim(i-1,j,k,0));
349 
350  (flx_arr[1])(i,j,k,0) = yflux_lo;
351  (flx_arr[1])(i,j,k,1) = (flx_arr[1])(i,j ,k,0) * 0.5 * (prim(i,j,k,0) + prim(i,j-1,k,0));
352 
353  if (i == vbx_hi.x) {
354  (flx_arr[0])(i+1,j,k,0) = xflux_hi;
355  (flx_arr[0])(i+1,j,k,1) = (flx_arr[0])(i+1,j,k,0) * 0.5 * (prim(i,j,k,0) + prim(i+1,j,k,0));
356  }
357  if (j == vbx_hi.y) {
358  (flx_arr[1])(i,j+1,k,0) = yflux_hi;
359  (flx_arr[1])(i,j+1,k,1) = (flx_arr[1])(i,j+1,k,0) * 0.5 * (prim(i,j,k,0) + prim(i,j+1,k,0));
360  }
361  }
362  });
363 
364  Box bx_shrunk_in_k = bx;
365  int klo = tbz.smallEnd(2);
366  int khi = tbz.bigEnd(2);
367  bx_shrunk_in_k.setSmall(2,klo+1);
368  bx_shrunk_in_k.setBig(2,khi-1);
369 
370  // Note that the notes use "g" to mean the magnitude of gravity, so it is positive
371  // We set grav_gpu[2] to be the vector component which is negative
372  // We define halfg to match the notes (which is why we take the absolute value)
373  Real halfg = std::abs(0.5 * grav_gpu[2]);
374 
375  // *********************************************************************
376  // fast_loop_on_shrunk
377  // *********************************************************************
378  //Note we don't act on the bottom or top boundaries of the domain
379  ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (int i, int j, int k)
380  {
381  Real q = (l_use_moisture) ? 0.5 * (qt_arr(i,j,k) + qt_arr(i,j,k-1)) : 0.0;
382 
383  Real coeff_P = coeffP_a(i,j,k) / (1.0 + q);
384  Real coeff_Q = coeffQ_a(i,j,k) / (1.0 + q);
385 
386  Real theta_t_lo = 0.5 * ( prim(i,j,k-2,PrimTheta_comp) + prim(i,j,k-1,PrimTheta_comp) );
387  Real theta_t_mid = 0.5 * ( prim(i,j,k-1,PrimTheta_comp) + prim(i,j,k ,PrimTheta_comp) );
388  Real theta_t_hi = 0.5 * ( prim(i,j,k ,PrimTheta_comp) + prim(i,j,k+1,PrimTheta_comp) );
389 
390  Real Omega_kp1 = prev_zmom(i,j,k+1) - stage_zmom(i,j,k+1);
391  Real Omega_k = prev_zmom(i,j,k ) - stage_zmom(i,j,k );
392  Real Omega_km1 = prev_zmom(i,j,k-1) - stage_zmom(i,j,k-1);
393 
394  // line 2 last two terms (order dtau)
395  Real old_drho_k = prev_cons(i,j,k ,Rho_comp) - stage_cons(i,j,k ,Rho_comp);
396  Real old_drho_km1 = prev_cons(i,j,k-1,Rho_comp) - stage_cons(i,j,k-1,Rho_comp);
397  Real R0_tmp = coeff_P * prev_drho_theta(i,j,k) + coeff_Q * prev_drho_theta(i,j,k-1)
398  - halfg * ( old_drho_k + old_drho_km1 );
399 
400  // lines 3-5 residuals (order dtau^2) 1.0 <-> beta_2
401  Real R1_tmp = halfg * (-slow_rhs_cons(i,j,k ,Rho_comp)
402  -slow_rhs_cons(i,j,k-1,Rho_comp)
403  +temp_rhs_arr(i,j,k,0) + temp_rhs_arr(i,j,k-1) )
404  + ( coeff_P * (slow_rhs_cons(i,j,k ,RhoTheta_comp) - temp_rhs_arr(i,j,k ,RhoTheta_comp)) +
405  coeff_Q * (slow_rhs_cons(i,j,k-1,RhoTheta_comp) - temp_rhs_arr(i,j,k-1,RhoTheta_comp)) );
406 
407  // lines 6&7 consolidated (reuse Omega & metrics) (order dtau^2)
408  R1_tmp += beta_1 * dzi * ( (Omega_kp1 - Omega_km1) * halfg
409  -(Omega_kp1*theta_t_hi - Omega_k *theta_t_mid) * coeff_P
410  -(Omega_k *theta_t_mid - Omega_km1*theta_t_lo ) * coeff_Q );
411 
412  // line 1
413  RHS_a(i,j,k) = Omega_k + dtau * (slow_rhs_rho_w(i,j,k) + R0_tmp + dtau * beta_2 * R1_tmp + zmom_src_arr(i,j,k));
414 
415  }); // bx_shrunk_in_k
416 
417  Box b2d = tbz; // Copy constructor
418  b2d.setRange(2,0);
419 
420  auto const lo = lbound(bx);
421  auto const hi = ubound(bx);
422 
423  ParallelFor(b2d, [=] AMREX_GPU_DEVICE (int i, int j, int)
424  {
425  // w at bottom boundary of grid is 0 if at domain boundary, otherwise w = w_old + dtau * slow_rhs
426  RHS_a (i,j,lo.z) = prev_zmom(i,j,lo.z) - stage_zmom(i,j,lo.z)
427  + dtau * slow_rhs_rho_w(i,j,lo.z)
428  + dtau * zmom_src_arr(i,j,lo.z);
429 
430  // w at top boundary of grid is 0 if at domain boundary, otherwise w = w_old + dtau * slow_rhs
431  RHS_a (i,j,hi.z+1) = prev_zmom(i,j,hi.z+1) - stage_zmom(i,j,hi.z+1)
432  + dtau * slow_rhs_rho_w(i,j,hi.z+1)
433  + dtau * zmom_src_arr(i,j,hi.z+1);
434  }); // b2d
435 
436 #ifdef AMREX_USE_GPU
437  if (l_implicit_substepping) {
438 
439  ParallelFor(b2d, [=] AMREX_GPU_DEVICE (int i, int j, int)
440  {
441  // w = specified Dirichlet value at k = lo.z
442  soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
443  cur_zmom(i,j,lo.z) = stage_zmom(i,j,lo.z) + soln_a(i,j,lo.z);
444 
445  for (int k = lo.z+1; k <= hi.z+1; k++) {
446  soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
447  }
448 
449  cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
450 
451  for (int k = hi.z; k >= lo.z; k--) {
452  soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) *soln_a(i,j,k+1);
453  cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k);
454  }
455  }); // b2d
456 
457  } else { // explicit substepping (beta_1 = 1; beta_2 = 0)
458 
459  ParallelFor(b2d, [=] AMREX_GPU_DEVICE (int i, int j, int)
460  {
461  for (int k = lo.z; k <= hi.z+1; k++) {
462  soln_a(i,j,k) = RHS_a(i,j,k);
463  cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k);
464  }
465  }); // b2d
466  } // end of explicit substepping
467 #else
468  if (l_implicit_substepping) {
469 
470  for (int j = lo.y; j <= hi.y; ++j) {
471  AMREX_PRAGMA_SIMD
472  for (int i = lo.x; i <= hi.x; ++i) {
473  soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
474  }
475  }
476  for (int k = lo.z+1; k <= hi.z+1; ++k) {
477  for (int j = lo.y; j <= hi.y; ++j) {
478  AMREX_PRAGMA_SIMD
479  for (int i = lo.x; i <= hi.x; ++i) {
480  soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
481  }
482  }
483  }
484  for (int j = lo.y; j <= hi.y; ++j) {
485  AMREX_PRAGMA_SIMD
486  for (int i = lo.x; i <= hi.x; ++i) {
487  cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
488  }
489  }
490  for (int k = hi.z; k >= lo.z; --k) {
491  for (int j = lo.y; j <= hi.y; ++j) {
492  AMREX_PRAGMA_SIMD
493  for (int i = lo.x; i <= hi.x; ++i) {
494  soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) * soln_a(i,j,k+1);
495  cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k);
496  }
497  }
498  }
499  } else { // explicit substepping (beta_1 = 1; beta_2 = 0)
500 
501  for (int k = lo.z; k <= hi.z+1; ++k) {
502  for (int j = lo.y; j <= hi.y; ++j) {
503  AMREX_PRAGMA_SIMD
504  for (int i = lo.x; i <= hi.x; ++i) {
505 
506  soln_a(i,j,k) = RHS_a(i,j,k);
507 
508  cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k);
509  }
510  }
511  }
512 
513  } // end of explicit substepping
514 #endif
515 
516  // **************************************************************************
517  // Define updates in the RHS of rho and (rho theta)
518  // **************************************************************************
519  const Array4<Real>& prev_drho_w = Delta_rho_w.array(mfi);
520  ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
521  {
522  Real zflux_lo = beta_2 * soln_a(i,j,k ) + beta_1 * prev_drho_w(i,j,k );
523  Real zflux_hi = beta_2 * soln_a(i,j,k+1) + beta_1 * prev_drho_w(i,j,k+1);
524 
525  avg_zmom(i,j,k) += facinv*zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0));
526  if (l_reflux) {
527  (flx_arr[2])(i,j,k,0) = zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0));
528  (flx_arr[2])(i,j,k,1) = (flx_arr[2])(i,j,k,0) * 0.5 * (prim(i,j,k) + prim(i,j,k-1));
529  }
530 
531  if (k == vbx_hi.z) {
532  avg_zmom(i,j,k+1) += facinv * zflux_hi / (mf_mx(i,j,0) * mf_my(i,j,0));
533  if (l_reflux) {
534  (flx_arr[2])(i,j,k+1,0) = zflux_hi / (mf_mx(i,j,0) * mf_my(i,j,0));
535  (flx_arr[2])(i,j,k+1,1) = (flx_arr[2])(i,j,k+1,0) * 0.5 * (prim(i,j,k) + prim(i,j,k+1));
536  }
537  }
538 
539  temp_rhs_arr(i,j,k,Rho_comp ) += dzi * ( zflux_hi - zflux_lo );
540  temp_rhs_arr(i,j,k,RhoTheta_comp) += 0.5 * dzi * ( zflux_hi * (prim(i,j,k) + prim(i,j,k+1))
541  - zflux_lo * (prim(i,j,k) + prim(i,j,k-1)) );
542  });
543 
544  // We only add to the flux registers in the final RK step
545  if (l_reflux) {
546  int strt_comp_reflux = 0;
547  // For now we don't reflux (rho theta) because it seems to create issues at c/f boundaries
548  int num_comp_reflux = 1;
549  if (level < finest_level) {
550  fr_as_crse->CrseAdd(mfi,
551  {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
552  dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
553  }
554  if (level > 0) {
555  fr_as_fine->FineAdd(mfi,
556  {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
557  dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
558  }
559 
560  // This is necessary here so we don't go on to the next FArrayBox without
561  // having finished copying the fluxes into the FluxRegisters (since the fluxes
562  // are stored in temporary FArrayBox's)
563  Gpu::streamSynchronize();
564 
565  } // two-way coupling
566  } // mfi
567  } // OMP
568 
569 #ifdef _OPENMP
570 #pragma omp parallel if (Gpu::notInLaunchRegion())
571 #endif
572  for ( MFIter mfi(S_stage_data[IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
573  {
574  const Box& bx = mfi.tilebox();
575 
576  const Array4< Real>& cur_cons = S_data[IntVars::cons].array(mfi);
577  const Array4<const Real>& prev_cons = S_prev[IntVars::cons].const_array(mfi);
578  auto const& temp_rhs_arr = temp_rhs.const_array(mfi);
579  auto const& slow_rhs_cons = S_slow_rhs[IntVars::cons].const_array(mfi);
580  const Array4<Real const>& cc_src_arr = cc_src.const_array(mfi);
581 
582  if (step == 0) {
583  ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
584  {
585  cur_cons(i,j,k,Rho_comp) = prev_cons(i,j,k,Rho_comp) +
586  dtau * (slow_rhs_cons(i,j,k,Rho_comp) - temp_rhs_arr(i,j,k,Rho_comp));
587  cur_cons(i,j,k,RhoTheta_comp) = prev_cons(i,j,k,RhoTheta_comp) +
588  dtau * (slow_rhs_cons(i,j,k,RhoTheta_comp) - temp_rhs_arr(i,j,k,RhoTheta_comp));
589 
590  // add in source terms for cell-centered conserved variables
591  cur_cons(i,j,k,Rho_comp) += dtau * cc_src_arr(i,j,k,Rho_comp);
592  cur_cons(i,j,k,RhoTheta_comp) += dtau * cc_src_arr(i,j,k,RhoTheta_comp);
593  });
594  } else {
595  ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
596  {
597  //
598  // We didn't need to set cur_cons = prev_cons above because they point to the same data for step > 0
599  //
600  cur_cons(i,j,k,Rho_comp) += dtau * (slow_rhs_cons(i,j,k,Rho_comp) - temp_rhs_arr(i,j,k,Rho_comp));
601  cur_cons(i,j,k,RhoTheta_comp) += dtau * (slow_rhs_cons(i,j,k,RhoTheta_comp) - temp_rhs_arr(i,j,k,RhoTheta_comp));
602 
603  // add in source terms for cell-centered conserved variables
604  cur_cons(i,j,k,Rho_comp) += dtau * cc_src_arr(i,j,k,Rho_comp);
605  cur_cons(i,j,k,RhoTheta_comp) += dtau * cc_src_arr(i,j,k,RhoTheta_comp);
606  });
607  } // step = 0
608 
609  const Array4<Real>& cur_xmom = S_data[IntVars::xmom].array(mfi);
610  const Array4<Real>& cur_ymom = S_data[IntVars::ymom].array(mfi);
611 
612  const Array4<Real const>& temp_cur_xmom_arr = temp_cur_xmom.const_array(mfi);
613  const Array4<Real const>& temp_cur_ymom_arr = temp_cur_ymom.const_array(mfi);
614 
615  Box tbx = surroundingNodes(bx,0);
616  Box tby = surroundingNodes(bx,1);
617 
618  ParallelFor(tbx, tby,
619  [=] AMREX_GPU_DEVICE (int i, int j, int k)
620  {
621  cur_xmom(i,j,k) = temp_cur_xmom_arr(i,j,k);
622  },
623  [=] AMREX_GPU_DEVICE (int i, int j, int k)
624  {
625  cur_ymom(i,j,k) = temp_cur_ymom_arr(i,j,k);
626  });
627 
628  } // mfi
629 }
constexpr amrex::Real R_d
Definition: ERF_Constants.H:10
constexpr amrex::Real Gamma
Definition: ERF_Constants.H:19
@ v_x
Definition: ERF_DataStruct.H:22
@ u_y
Definition: ERF_DataStruct.H:23
@ v_y
Definition: ERF_DataStruct.H:23
@ m_y
Definition: ERF_DataStruct.H:23
@ u_x
Definition: ERF_DataStruct.H:22
@ m_x
Definition: ERF_DataStruct.H:22
#define Rho_comp
Definition: ERF_IndexDefines.H:36
#define RhoTheta_comp
Definition: ERF_IndexDefines.H:37
#define PrimTheta_comp
Definition: ERF_IndexDefines.H:50
AMREX_FORCE_INLINE amrex::IntVect TileNoZ()
Definition: ERF_TileNoZ.H:11
@ gpy
Definition: ERF_IndexDefines.H:151
@ gpx
Definition: ERF_IndexDefines.H:150
@ ymom
Definition: ERF_IndexDefines.H:160
@ cons
Definition: ERF_IndexDefines.H:158
@ zmom
Definition: ERF_IndexDefines.H:161
@ xmom
Definition: ERF_IndexDefines.H:159
@ qt
Definition: ERF_Kessler.H:27
Here is the call graph for this function: