ERF
Energy Research and Forecasting: An Atmospheric Modeling Code
ERF_Substep_NS.cpp File Reference
Include dependency graph for ERF_Substep_NS.cpp:

Functions

void erf_substep_NS (int step, int nrk, int level, int finest_level, Vector< MultiFab > &S_slow_rhs, const Vector< MultiFab > &S_prev, Vector< MultiFab > &S_stage_data, const MultiFab &S_stage_prim, const MultiFab &qt, const MultiFab &pi_stage, const MultiFab &fast_coeffs, Vector< MultiFab > &S_data, MultiFab &lagged_delta_rt, MultiFab &avg_xmom, MultiFab &avg_ymom, MultiFab &avg_zmom, const MultiFab &cc_src, const MultiFab &xmom_src, const MultiFab &ymom_src, const MultiFab &zmom_src, const Geometry geom, const Real gravity, amrex::Gpu::DeviceVector< amrex::Real > &stretched_dz_d, const Real dtau, const Real beta_s, const Real facinv, Vector< std::unique_ptr< MultiFab >> &mapfac, YAFluxRegister *fr_as_crse, YAFluxRegister *fr_as_fine, bool l_use_moisture, bool l_reflux, const amrex::Real *sinesq_stag_d, const Real l_damp_coef)
 

Function Documentation

◆ erf_substep_NS()

void erf_substep_NS ( int  step,
int  nrk,
int  level,
int  finest_level,
Vector< MultiFab > &  S_slow_rhs,
const Vector< MultiFab > &  S_prev,
Vector< MultiFab > &  S_stage_data,
const MultiFab &  S_stage_prim,
const MultiFab &  qt,
const MultiFab &  pi_stage,
const MultiFab &  fast_coeffs,
Vector< MultiFab > &  S_data,
MultiFab &  lagged_delta_rt,
MultiFab &  avg_xmom,
MultiFab &  avg_ymom,
MultiFab &  avg_zmom,
const MultiFab &  cc_src,
const MultiFab &  xmom_src,
const MultiFab &  ymom_src,
const MultiFab &  zmom_src,
const Geometry  geom,
const Real  gravity,
amrex::Gpu::DeviceVector< amrex::Real > &  stretched_dz_d,
const Real  dtau,
const Real  beta_s,
const Real  facinv,
Vector< std::unique_ptr< MultiFab >> &  mapfac,
YAFluxRegister *  fr_as_crse,
YAFluxRegister *  fr_as_fine,
bool  l_use_moisture,
bool  l_reflux,
const amrex::Real sinesq_stag_d,
const Real  l_damp_coef 
)

Function for computing the fast RHS with no terrain and variable vertical spacing

Parameters
[in]stepwhich fast time step within each Runge-Kutta step
[in]nrkwhich Runge-Kutta step
[in]levellevel of resolution
[in]finest_levelfinest level of resolution
[in]S_slow_rhsslow RHS computed in erf_slow_rhs_pre
[in]S_previf step == 0, this is S_old, else the previous fast solution
[in]S_stage_datasolution at previous RK stage
[in]S_stage_primprimitive variables at previous RK stage
[in]pi_stageExner function at previous RK stage
[in]fast_coeffscoefficients for the tridiagonal solve used in the fast integrator
[out]S_datacurrent solution
[in,out]lagged_delta_rt
[in,out]avg_xmomtime-averaged x-momentum to be used for updating slow variables
[in,out]avg_ymomtime-averaged y-momentum to be used for updating slow variables
[in,out]avg_zmomtime-averaged z-momentum to be used for updating slow variables
[in]cc_srcsource terms for conserved variables
[in]xmom_srcsource terms for x-momentum
[in]ymom_srcsource terms for y-momentum
[in]zmom_srcsource terms for z-momentum
[in]geomcontainer for geometric information
[in]gravitymagnitude of gravity
[in]stretched_dz_d
[in]dtaufast time step
[in]beta_sCoefficient which determines how implicit vs explicit the solve is
[in]facinvinverse factor for time-averaging the momenta
[in]mapfacvector of map factors
[in,out]fr_as_crseYAFluxRegister at level l at level l / l+1 interface
[in,out]fr_as_fineYAFluxRegister at level l at level l-1 / l interface
[in]l_use_moisture
[in]l_refluxshould we add fluxes to the FluxRegisters?
72 {
73  //
74  // NOTE: for step > 0, S_data and S_prev point to the same MultiFab data!!
75  //
76 
77  BL_PROFILE_REGION("erf_substep_S()");
78 
79  Real beta_1 = myhalf * (one - beta_s); // multiplies explicit terms
80  Real beta_2 = myhalf * (one + beta_s); // multiplies implicit terms
81 
82  // How much do we project forward the (rho theta) that is used in the horizontal momentum equations
83  Real beta_d = Real(0.1);
84 
85  Real RvOverRd = R_v / R_d;
86 
87  bool l_rayleigh_impl_for_w = (sinesq_stag_d != nullptr);
88 
89  const Real* dx = geom.CellSize();
90  const GpuArray<Real, AMREX_SPACEDIM> dxInv = geom.InvCellSizeArray();
91 
92  Real dxi = dxInv[0];
93  Real dyi = dxInv[1];
94 
95  auto dz_ptr = stretched_dz_d.data();
96 
97  const auto& ba = S_stage_data[IntVars::cons].boxArray();
98  const auto& dm = S_stage_data[IntVars::cons].DistributionMap();
99 
100  MultiFab Delta_rho_theta( ba , dm, 1, 1);
101  MultiFab Delta_rho_w (convert(ba,IntVect(0,0,1)), dm, 1, IntVect(1,1,0));
102 
103  MultiFab coeff_A_mf(fast_coeffs, make_alias, 0, 1);
104  MultiFab inv_coeff_B_mf(fast_coeffs, make_alias, 1, 1);
105  MultiFab coeff_C_mf(fast_coeffs, make_alias, 2, 1);
106  MultiFab coeff_P_mf(fast_coeffs, make_alias, 3, 1);
107  MultiFab coeff_Q_mf(fast_coeffs, make_alias, 4, 1);
108 
109  // *************************************************************************
110  // Set gravity as a vector
111  const Array<Real,AMREX_SPACEDIM> grav{zero, zero, -gravity};
112  const GpuArray<Real,AMREX_SPACEDIM> grav_gpu{grav[0], grav[1], grav[2]};
113 
114  // This will hold theta extrapolated forward in time
115  MultiFab extrap(S_data[IntVars::cons].boxArray(),S_data[IntVars::cons].DistributionMap(),1,1);
116 
117  // This will hold the update for (rho) and (rho theta)
118  MultiFab temp_rhs(S_stage_data[IntVars::zmom].boxArray(),S_stage_data[IntVars::zmom].DistributionMap(),2,0);
119 
120  // This will hold the new x- and y-momenta temporarily (so that we don't overwrite values we need when tiling)
121  MultiFab temp_cur_xmom(S_stage_data[IntVars::xmom].boxArray(),S_stage_data[IntVars::xmom].DistributionMap(),1,0);
122  MultiFab temp_cur_ymom(S_stage_data[IntVars::ymom].boxArray(),S_stage_data[IntVars::ymom].DistributionMap(),1,0);
123 
124  // We assume that in the first step (nrk == 0) we are only doing one substep.
125  AMREX_ALWAYS_ASSERT(nrk > 0 || step == 0);
126 
127  // *************************************************************************
128  // First set up some arrays we'll need
129  // *************************************************************************
130 
131 #ifdef _OPENMP
132 #pragma omp parallel if (Gpu::notInLaunchRegion())
133 #endif
134  for ( MFIter mfi(S_stage_data[IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
135  {
136  const Array4<const Real>& prev_cons = S_prev[IntVars::cons].const_array(mfi);
137  const Array4<const Real>& prev_zmom = S_prev[IntVars::zmom].const_array(mfi);
138 
139  const Array4<const Real>& stage_cons = S_stage_data[IntVars::cons].const_array(mfi);
140  const Array4<const Real>& stage_zmom = S_stage_data[IntVars::zmom].const_array(mfi);
141 
142  const Array4<Real>& prev_drho_w = Delta_rho_w.array(mfi);
143  const Array4<Real>& prev_drho_theta = Delta_rho_theta.array(mfi);
144  const Array4<Real>& lagged_arr = lagged_delta_rt.array(mfi);
145  const Array4<Real>& theta_extrap = extrap.array(mfi);
146  const Array4<const Real>& prim = S_stage_prim.const_array(mfi);
147 
148  Box gbx = mfi.growntilebox(1);
149  ParallelFor(gbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
150  {
151  prev_drho_theta(i,j,k) = prev_cons(i,j,k,RhoTheta_comp) - stage_cons(i,j,k,RhoTheta_comp);
152 
153  if (step == 0) {
154  theta_extrap(i,j,k) = prev_drho_theta(i,j,k);
155  } else {
156  theta_extrap(i,j,k) = prev_drho_theta(i,j,k) + beta_d *
157  ( prev_drho_theta(i,j,k) - lagged_arr(i,j,k) );
158  }
159 
160  // NOTE: qv is not changing over the fast steps so we use the stage data
161  Real qv = (l_use_moisture) ? prim(i,j,k,PrimQ1_comp) : zero;
162  theta_extrap(i,j,k) *= (one + RvOverRd*qv);
163 
164  // We define lagged_delta_rt for our next step as the current delta_rt
165  // (after using it above to extrapolate theta for this step)
166  lagged_arr(i,j,k) = prev_drho_theta(i,j,k);
167  });
168 
169  // NOTE: We must do this here because for step > 0, prev_zmom and cur_zmom both point to the same data,
170  // so by the time we would use prev_zmom to define zflux, it would have already been over-written.
171  Box gtbz = mfi.nodaltilebox(2);
172  gtbz.grow(IntVect(1,1,0));
173  ParallelFor(gtbz, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
174  prev_drho_w(i,j,k) = prev_zmom(i,j,k) - stage_zmom(i,j,k);
175  });
176  } // mfi
177 
178  // *************************************************************************
179  // Define updates in the current RK stage
180  // *************************************************************************
181 
182 #ifdef _OPENMP
183 #pragma omp parallel if (Gpu::notInLaunchRegion())
184 #endif
185  for ( MFIter mfi(S_stage_data[IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
186  {
187  Box tbx = mfi.nodaltilebox(0);
188  Box tby = mfi.nodaltilebox(1);
189 
190  const Array4<Real const>& xmom_src_arr = xmom_src.const_array(mfi);
191  const Array4<Real const>& ymom_src_arr = ymom_src.const_array(mfi);
192 
193  const Array4<const Real> & stage_xmom = S_stage_data[IntVars::xmom].const_array(mfi);
194  const Array4<const Real> & stage_ymom = S_stage_data[IntVars::ymom].const_array(mfi);
195  const Array4<const Real> & qt_arr = qt.const_array(mfi);
196 
197  const Array4<const Real>& slow_rhs_rho_u = S_slow_rhs[IntVars::xmom].const_array(mfi);
198  const Array4<const Real>& slow_rhs_rho_v = S_slow_rhs[IntVars::ymom].const_array(mfi);
199 
200  const Array4<Real>& temp_cur_xmom_arr = temp_cur_xmom.array(mfi);
201  const Array4<Real>& temp_cur_ymom_arr = temp_cur_ymom.array(mfi);
202 
203  const Array4<const Real>& prev_xmom = S_prev[IntVars::xmom].const_array(mfi);
204  const Array4<const Real>& prev_ymom = S_prev[IntVars::ymom].const_array(mfi);
205 
206  // These store the advection momenta which we will use to update the slow variables
207  const Array4< Real>& avg_xmom_arr = avg_xmom.array(mfi);
208  const Array4< Real>& avg_ymom_arr = avg_ymom.array(mfi);
209 
210  const Array4<const Real>& pi_stage_ca = pi_stage.const_array(mfi);
211 
212  const Array4<Real>& theta_extrap = extrap.array(mfi);
213 
214  // Map factors
215  const Array4<const Real>& mf_ux = mapfac[MapFacType::u_x]->const_array(mfi);
216  const Array4<const Real>& mf_vy = mapfac[MapFacType::v_y]->const_array(mfi);
217 
218  // *********************************************************************
219  // Define updates in the RHS of {x, y, z}-momentum equations
220  // *********************************************************************
221  if (nrk == 0 and step == 0) { // prev == stage
222  ParallelFor(tbx, tby,
223  [=] AMREX_GPU_DEVICE (int i, int j, int k)
224  {
225  Real new_drho_u = dtau * slow_rhs_rho_u(i,j,k) + dtau * xmom_src_arr(i,j,k);;
226  avg_xmom_arr(i,j,k) += facinv*new_drho_u;
227  temp_cur_xmom_arr(i,j,k) = stage_xmom(i,j,k) + new_drho_u;
228  },
229  [=] AMREX_GPU_DEVICE (int i, int j, int k)
230  {
231  Real new_drho_v = dtau * slow_rhs_rho_v(i,j,k) + dtau * ymom_src_arr(i,j,k);
232  avg_ymom_arr(i,j,k) += facinv*new_drho_v;
233  temp_cur_ymom_arr(i,j,k) = stage_ymom(i,j,k) + new_drho_v;
234  });
235  } else {
236  ParallelFor(tbx, tby,
237  [=] AMREX_GPU_DEVICE (int i, int j, int k)
238  {
239  // Add (negative) gradient of (rho theta) multiplied by lagged "pi"
240  Real gpx = (theta_extrap(i,j,k) - theta_extrap(i-1,j,k))*dxi;
241  gpx *= mf_ux(i,j,0);
242 
243  Real q = (l_use_moisture) ? myhalf * (qt_arr(i,j,k) + qt_arr(i-1,j,k)) : zero;
244 
245  Real pi_c = myhalf * (pi_stage_ca(i-1,j,k,0) + pi_stage_ca(i,j,k,0));
246  Real fast_rhs_rho_u = -Gamma * R_d * pi_c * gpx / (one + q);
247 
248  Real new_drho_u = prev_xmom(i,j,k) - stage_xmom(i,j,k)
249  + dtau * fast_rhs_rho_u + dtau * slow_rhs_rho_u(i,j,k)
250  + dtau * xmom_src_arr(i,j,k);
251 
252  avg_xmom_arr(i,j,k) += facinv*new_drho_u;
253 
254  temp_cur_xmom_arr(i,j,k) = stage_xmom(i,j,k) + new_drho_u;
255  },
256  [=] AMREX_GPU_DEVICE (int i, int j, int k)
257  {
258  // Add (negative) gradient of (rho theta) multiplied by lagged "pi"
259  Real gpy = (theta_extrap(i,j,k) - theta_extrap(i,j-1,k))*dyi;
260  gpy *= mf_vy(i,j,0);
261 
262  Real q = (l_use_moisture) ? myhalf * (qt_arr(i,j,k) + qt_arr(i,j-1,k)) : zero;
263 
264  Real pi_c = myhalf * (pi_stage_ca(i,j-1,k,0) + pi_stage_ca(i,j,k,0));
265  Real fast_rhs_rho_v = -Gamma * R_d * pi_c * gpy / (one + q);
266 
267  Real new_drho_v = prev_ymom(i,j,k) - stage_ymom(i,j,k)
268  + dtau * fast_rhs_rho_v + dtau * slow_rhs_rho_v(i,j,k)
269  + dtau * ymom_src_arr(i,j,k);
270 
271  avg_ymom_arr(i,j,k) += facinv*new_drho_v;
272 
273  temp_cur_ymom_arr(i,j,k) = stage_ymom(i,j,k) + new_drho_v;
274  });
275  } // nrk > 0 and/or step > 0
276  } //mfi
277 
278 #ifdef _OPENMP
279 #pragma omp parallel if (Gpu::notInLaunchRegion())
280 #endif
281  {
282  std::array<FArrayBox,AMREX_SPACEDIM> flux;
283  for ( MFIter mfi(S_stage_data[IntVars::cons],TileNoZ()); mfi.isValid(); ++mfi)
284  {
285  Box bx = mfi.tilebox();
286  Box tbz = surroundingNodes(bx,2);
287 
288  Box vbx = mfi.validbox();
289  const auto& vbx_hi = ubound(vbx);
290 
291  const Array4<Real const>& zmom_src_arr = zmom_src.const_array(mfi);
292 
293  const Array4<const Real>& stage_xmom = S_stage_data[IntVars::xmom].const_array(mfi);
294  const Array4<const Real>& stage_ymom = S_stage_data[IntVars::ymom].const_array(mfi);
295  const Array4<const Real>& stage_zmom = S_stage_data[IntVars::zmom].const_array(mfi);
296  const Array4<const Real> & prim = S_stage_prim.const_array(mfi);
297 
298  const Array4<const Real>& prev_drho_theta = Delta_rho_theta.array(mfi);
299 
300  const Array4<const Real>& prev_cons = S_prev[IntVars::cons].const_array(mfi);
301  const Array4<const Real>& stage_cons = S_stage_data[IntVars::cons].const_array(mfi);
302 
303  const Array4<const Real>& slow_rhs_cons = S_slow_rhs[IntVars::cons].const_array(mfi);
304  const Array4<const Real>& slow_rhs_rho_w = S_slow_rhs[IntVars::zmom].const_array(mfi);
305 
306  const Array4<const Real>& prev_zmom = S_prev[IntVars::zmom].const_array(mfi);
307  const Array4< Real>& cur_zmom = S_data[IntVars::zmom].array(mfi);
308 
309  const Array4<Real>& temp_cur_xmom_arr = temp_cur_xmom.array(mfi);
310  const Array4<Real>& temp_cur_ymom_arr = temp_cur_ymom.array(mfi);
311 
312  // These store the advection momenta which we will use to update the slow variables
313  const Array4< Real>& avg_zmom_arr = avg_zmom.array(mfi);
314 
315  // Map factors
316  const Array4<const Real>& mf_mx = mapfac[MapFacType::m_x]->const_array(mfi);
317  const Array4<const Real>& mf_my = mapfac[MapFacType::m_y]->const_array(mfi);
318  const Array4<const Real>& mf_uy = mapfac[MapFacType::u_y]->const_array(mfi);
319  const Array4<const Real>& mf_vx = mapfac[MapFacType::v_x]->const_array(mfi);
320 
321  FArrayBox RHS_fab;
322  RHS_fab.resize(tbz,1, The_Async_Arena());
323 
324  FArrayBox soln_fab;
325  soln_fab.resize(tbz,1, The_Async_Arena());
326 
327  auto const& RHS_a = RHS_fab.array();
328  auto const& soln_a = soln_fab.array();
329 
330  auto const& temp_rhs_arr = temp_rhs.array(mfi);
331 
332  auto const& coeffA_a = coeff_A_mf.array(mfi);
333  auto const& inv_coeffB_a = inv_coeff_B_mf.array(mfi);
334  auto const& coeffC_a = coeff_C_mf.array(mfi);
335  auto const& coeffP_a = coeff_P_mf.array(mfi);
336  auto const& coeffQ_a = coeff_Q_mf.array(mfi);
337 
338  // *************************************************************************
339  // Define flux arrays for use in advection
340  // *************************************************************************
341  for (int dir = 0; dir < AMREX_SPACEDIM; ++dir) {
342  flux[dir].resize(surroundingNodes(bx,dir),2,The_Async_Arena());
343  flux[dir].setVal<RunOn::Device>(0);
344  }
345  const GpuArray<const Array4<Real>, AMREX_SPACEDIM>
346  flx_arr{{AMREX_D_DECL(flux[0].array(), flux[1].array(), flux[2].array())}};
347 
348  // *********************************************************************
349  ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
350  Real xflux_lo = (temp_cur_xmom_arr(i ,j,k) - stage_xmom(i ,j,k)) / mf_uy(i ,j,0);
351  Real xflux_hi = (temp_cur_xmom_arr(i+1,j,k) - stage_xmom(i+1,j,k)) / mf_uy(i+1,j,0);
352  Real yflux_lo = (temp_cur_ymom_arr(i,j ,k) - stage_ymom(i,j ,k)) / mf_vx(i,j ,0);
353  Real yflux_hi = (temp_cur_ymom_arr(i,j+1,k) - stage_ymom(i,j+1,k)) / mf_vx(i,j+1,0);
354 
355  Real mfsq = mf_mx(i,j,0) * mf_my(i,j,0);
356 
357  temp_rhs_arr(i,j,k,Rho_comp ) = ( xflux_hi - xflux_lo ) * dxi * mfsq
358  + ( yflux_hi - yflux_lo ) * dyi * mfsq;
359  temp_rhs_arr(i,j,k,RhoTheta_comp) = (( xflux_hi * (prim(i,j,k,0) + prim(i+1,j,k,0)) -
360  xflux_lo * (prim(i,j,k,0) + prim(i-1,j,k,0)) ) * dxi * mfsq +
361  ( yflux_hi * (prim(i,j,k,0) + prim(i,j+1,k,0)) -
362  yflux_lo * (prim(i,j,k,0) + prim(i,j-1,k,0)) ) * dyi * mfsq) * myhalf;
363 
364  if (l_reflux) {
365  (flx_arr[0])(i,j,k,0) = xflux_lo;
366  (flx_arr[0])(i,j,k,1) = (flx_arr[0])(i ,j,k,0) * myhalf * (prim(i,j,k,0) + prim(i-1,j,k,0));
367 
368  (flx_arr[1])(i,j,k,0) = yflux_lo;
369  (flx_arr[1])(i,j,k,1) = (flx_arr[1])(i,j ,k,0) * myhalf * (prim(i,j,k,0) + prim(i,j-1,k,0));
370 
371  if (i == vbx_hi.x) {
372  (flx_arr[0])(i+1,j,k,0) = xflux_hi;
373  (flx_arr[0])(i+1,j,k,1) = (flx_arr[0])(i+1,j,k,0) * myhalf * (prim(i,j,k,0) + prim(i+1,j,k,0));
374  }
375  if (j == vbx_hi.y) {
376  (flx_arr[1])(i,j+1,k,0) = yflux_hi;
377  (flx_arr[1])(i,j+1,k,1) = (flx_arr[1])(i,j+1,k,0) * myhalf * (prim(i,j,k,0) + prim(i,j+1,k,0));
378  }
379  }
380  });
381 
382  Box bx_shrunk_in_k = bx;
383  int klo = tbz.smallEnd(2);
384  int khi = tbz.bigEnd(2);
385  bx_shrunk_in_k.setSmall(2,klo+1);
386  bx_shrunk_in_k.setBig(2,khi-1);
387 
388  // Note that the notes use "g" to mean the magnitude of gravity, so it is positive
389  // We set grav_gpu[2] to be the vector component which is negative
390  // We define halfg to match the notes (which is why we take the absolute value)
391  Real halfg = std::abs(myhalf * grav_gpu[2]);
392 
393  // *********************************************************************
394  // fast_loop_on_shrunk
395  // *********************************************************************
396  //Note we don't act on the bottom or top boundaries of the domain
397  ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (int i, int j, int k)
398  {
399  Real coeff_P = coeffP_a(i,j,k);
400  Real coeff_Q = coeffQ_a(i,j,k);
401 
402  Real theta_t_lo = myhalf * ( prim(i,j,k-2,PrimTheta_comp) + prim(i,j,k-1,PrimTheta_comp) );
403  Real theta_t_mid = myhalf * ( prim(i,j,k-1,PrimTheta_comp) + prim(i,j,k ,PrimTheta_comp) );
404  Real theta_t_hi = myhalf * ( prim(i,j,k ,PrimTheta_comp) + prim(i,j,k+1,PrimTheta_comp) );
405 
406  Real Omega_kp1 = prev_zmom(i,j,k+1) - stage_zmom(i,j,k+1);
407  Real Omega_k = prev_zmom(i,j,k ) - stage_zmom(i,j,k );
408  Real Omega_km1 = prev_zmom(i,j,k-1) - stage_zmom(i,j,k-1);
409 
410  // line 2 last two terms (order dtau)
411  Real old_drho_k = prev_cons(i,j,k ,Rho_comp) - stage_cons(i,j,k ,Rho_comp);
412  Real old_drho_km1 = prev_cons(i,j,k-1,Rho_comp) - stage_cons(i,j,k-1,Rho_comp);
413  Real R0_tmp = coeff_P * prev_drho_theta(i,j,k) + coeff_Q * prev_drho_theta(i,j,k-1)
414  - halfg * ( old_drho_k + old_drho_km1 );
415 
416  // lines 3-5 residuals (order dtau^2) one <-> beta_2
417  Real R1_tmp = halfg * (-slow_rhs_cons(i,j,k ,Rho_comp)
418  -slow_rhs_cons(i,j,k-1,Rho_comp)
419  +temp_rhs_arr(i,j,k,0) + temp_rhs_arr(i,j,k-1) )
420  + ( coeff_P * (slow_rhs_cons(i,j,k ,RhoTheta_comp) - temp_rhs_arr(i,j,k ,RhoTheta_comp)) +
421  coeff_Q * (slow_rhs_cons(i,j,k-1,RhoTheta_comp) - temp_rhs_arr(i,j,k-1,RhoTheta_comp)) );
422 
423  // lines 6&7 consolidated (reuse Omega & metrics) (order dtau^2)
424  Real dz_inv = one / dz_ptr[k];
425  R1_tmp += beta_1 * dz_inv * ( (Omega_kp1 - Omega_km1) * halfg
426  -(Omega_kp1*theta_t_hi - Omega_k *theta_t_mid) * coeff_P
427  -(Omega_k *theta_t_mid - Omega_km1*theta_t_lo ) * coeff_Q );
428 
429  // line 1
430  RHS_a(i,j,k) = Omega_k + dtau * (slow_rhs_rho_w(i,j,k) + R0_tmp + dtau * beta_2 * R1_tmp + zmom_src_arr(i,j,k));
431 
432  }); // bx_shrunk_in_k
433 
434  Box b2d = tbz; // Copy constructor
435  b2d.setRange(2,0);
436 
437  auto const lo = lbound(bx);
438  auto const hi = ubound(bx);
439 
440  ParallelFor(b2d, [=] AMREX_GPU_DEVICE (int i, int j, int)
441  {
442  // w at bottom boundary of grid is 0 if at domain boundary, otherwise w = w_old + dtau * slow_rhs
443  RHS_a (i,j,lo.z) = prev_zmom(i,j,lo.z) - stage_zmom(i,j,lo.z)
444  + dtau * slow_rhs_rho_w(i,j,lo.z)
445  + dtau * zmom_src_arr(i,j,lo.z);
446 
447  // w at top boundary of grid is 0 if at domain boundary, otherwise w = w_old + dtau * slow_rhs
448  RHS_a (i,j,hi.z+1) = prev_zmom(i,j,hi.z+1) - stage_zmom(i,j,hi.z+1)
449  + dtau * slow_rhs_rho_w(i,j,hi.z+1)
450  + dtau * zmom_src_arr(i,j,hi.z+1);
451  }); // b2d
452 
453 #ifdef AMREX_USE_GPU
454  ParallelFor(b2d, [=] AMREX_GPU_DEVICE (int i, int j, int)
455  {
456  // w = specified Dirichlet value at k = lo.z
457  soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
458  cur_zmom(i,j,lo.z) = stage_zmom(i,j,lo.z) + soln_a(i,j,lo.z);
459 
460  for (int k = lo.z+1; k <= hi.z+1; k++) {
461  soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
462  }
463 
464  cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
465 
466  for (int k = hi.z; k >= lo.z; k--) {
467  soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) *soln_a(i,j,k+1);
468  cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k);
469  }
470  }); // b2d
471 #else
472  for (int j = lo.y; j <= hi.y; ++j) {
473  AMREX_PRAGMA_SIMD
474  for (int i = lo.x; i <= hi.x; ++i) {
475  soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
476  }
477  }
478  for (int k = lo.z+1; k <= hi.z+1; ++k) {
479  for (int j = lo.y; j <= hi.y; ++j) {
480  AMREX_PRAGMA_SIMD
481  for (int i = lo.x; i <= hi.x; ++i) {
482  soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
483  }
484  }
485  }
486  for (int j = lo.y; j <= hi.y; ++j) {
487  AMREX_PRAGMA_SIMD
488  for (int i = lo.x; i <= hi.x; ++i) {
489  cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
490  }
491  }
492  for (int k = hi.z; k >= lo.z; --k) {
493  for (int j = lo.y; j <= hi.y; ++j) {
494  AMREX_PRAGMA_SIMD
495  for (int i = lo.x; i <= hi.x; ++i) {
496  soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) * soln_a(i,j,k+1);
497  cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k);
498  }
499  }
500  }
501 #endif
502  if (l_rayleigh_impl_for_w) {
503  ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (int i, int j, int k)
504  {
505  Real damping_coeff = l_damp_coef * dtau * sinesq_stag_d[k];
506  cur_zmom(i,j,k) /= (one + damping_coeff);
507  });
508  }
509 
510  // **************************************************************************
511  // Define updates in the RHS of rho and (rho theta)
512  // **************************************************************************
513  const Array4<Real>& prev_drho_w = Delta_rho_w.array(mfi);
514  ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
515  {
516  Real zflux_lo = beta_2 * soln_a(i,j,k ) + beta_1 * prev_drho_w(i,j,k );
517  Real zflux_hi = beta_2 * soln_a(i,j,k+1) + beta_1 * prev_drho_w(i,j,k+1);
518 
519  avg_zmom_arr(i,j,k) += facinv*zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0));
520  if (l_reflux) {
521  (flx_arr[2])(i,j,k,0) = zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0));
522  (flx_arr[2])(i,j,k,1) = (flx_arr[2])(i,j,k,0) * myhalf * (prim(i,j,k) + prim(i,j,k-1));
523  }
524 
525  if (k == vbx_hi.z) {
526  avg_zmom_arr(i,j,k+1) += facinv * zflux_hi / (mf_mx(i,j,0) * mf_my(i,j,0));
527  if (l_reflux) {
528  (flx_arr[2])(i,j,k+1,0) = zflux_hi / (mf_mx(i,j,0) * mf_my(i,j,0));
529  (flx_arr[2])(i,j,k+1,1) = (flx_arr[2])(i,j,k+1,0) * myhalf * (prim(i,j,k) + prim(i,j,k+1));
530  }
531  }
532 
533  Real dz_inv = one / dz_ptr[k];
534  temp_rhs_arr(i,j,k,Rho_comp ) += dz_inv * ( zflux_hi - zflux_lo );
535  temp_rhs_arr(i,j,k,RhoTheta_comp) += myhalf * dz_inv * ( zflux_hi * (prim(i,j,k) + prim(i,j,k+1))
536  - zflux_lo * (prim(i,j,k) + prim(i,j,k-1)) );
537  });
538 
539  // We only add to the flux registers in the final RK step
540  if (l_reflux) {
541  int strt_comp_reflux = 0;
542  // For now we don't reflux (rho theta) because it seems to create issues at c/f boundaries
543  int num_comp_reflux = 1;
544  if (level < finest_level) {
545  fr_as_crse->CrseAdd(mfi,
546  {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
547  dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
548  }
549  if (level > 0) {
550  fr_as_fine->FineAdd(mfi,
551  {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
552  dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
553  }
554 
555  // This is necessary here so we don't go on to the next FArrayBox without
556  // having finished copying the fluxes into the FluxRegisters (since the fluxes
557  // are stored in temporary FArrayBox's)
558  Gpu::streamSynchronize();
559 
560  } // two-way coupling
561  } // mfi
562  } // OMP
563 
564 #ifdef _OPENMP
565 #pragma omp parallel if (Gpu::notInLaunchRegion())
566 #endif
567  for ( MFIter mfi(S_stage_data[IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
568  {
569  const Box& bx = mfi.tilebox();
570 
571  const Array4< Real>& cur_cons = S_data[IntVars::cons].array(mfi);
572  const Array4<const Real>& prev_cons = S_prev[IntVars::cons].const_array(mfi);
573  auto const& temp_rhs_arr = temp_rhs.const_array(mfi);
574  auto const& slow_rhs_cons = S_slow_rhs[IntVars::cons].const_array(mfi);
575  const Array4<Real const>& cc_src_arr = cc_src.const_array(mfi);
576 
577  if (step == 0) {
578  ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
579  {
580  cur_cons(i,j,k,Rho_comp) = prev_cons(i,j,k,Rho_comp) +
581  dtau * (slow_rhs_cons(i,j,k,Rho_comp) - temp_rhs_arr(i,j,k,Rho_comp));
582  cur_cons(i,j,k,RhoTheta_comp) = prev_cons(i,j,k,RhoTheta_comp) +
583  dtau * (slow_rhs_cons(i,j,k,RhoTheta_comp) - temp_rhs_arr(i,j,k,RhoTheta_comp));
584 
585  // add in source terms for cell-centered conserved variables
586  cur_cons(i,j,k,Rho_comp) += dtau * cc_src_arr(i,j,k,Rho_comp);
587  cur_cons(i,j,k,RhoTheta_comp) += dtau * cc_src_arr(i,j,k,RhoTheta_comp);
588  });
589  } else {
590  ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
591  {
592  //
593  // We didn't need to set cur_cons = prev_cons above because they point to the same data for step > 0
594  //
595  cur_cons(i,j,k,Rho_comp) += dtau * (slow_rhs_cons(i,j,k,Rho_comp) - temp_rhs_arr(i,j,k,Rho_comp));
596  cur_cons(i,j,k,RhoTheta_comp) += dtau * (slow_rhs_cons(i,j,k,RhoTheta_comp) - temp_rhs_arr(i,j,k,RhoTheta_comp));
597 
598  // add in source terms for cell-centered conserved variables
599  cur_cons(i,j,k,Rho_comp) += dtau * cc_src_arr(i,j,k,Rho_comp);
600  cur_cons(i,j,k,RhoTheta_comp) += dtau * cc_src_arr(i,j,k,RhoTheta_comp);
601  });
602  } // step = 0
603 
604  const Array4<Real>& cur_xmom = S_data[IntVars::xmom].array(mfi);
605  const Array4<Real>& cur_ymom = S_data[IntVars::ymom].array(mfi);
606 
607  const Array4<Real const>& temp_cur_xmom_arr = temp_cur_xmom.const_array(mfi);
608  const Array4<Real const>& temp_cur_ymom_arr = temp_cur_ymom.const_array(mfi);
609 
610  Box tbx = surroundingNodes(bx,0);
611  Box tby = surroundingNodes(bx,1);
612 
613  ParallelFor(tbx, tby,
614  [=] AMREX_GPU_DEVICE (int i, int j, int k)
615  {
616  cur_xmom(i,j,k) = temp_cur_xmom_arr(i,j,k);
617  },
618  [=] AMREX_GPU_DEVICE (int i, int j, int k)
619  {
620  cur_ymom(i,j,k) = temp_cur_ymom_arr(i,j,k);
621  });
622 
623  } // mfi
624 }
constexpr amrex::Real R_v
Definition: ERF_Constants.H:21
constexpr amrex::Real one
Definition: ERF_Constants.H:7
constexpr amrex::Real zero
Definition: ERF_Constants.H:6
constexpr amrex::Real myhalf
Definition: ERF_Constants.H:11
constexpr amrex::Real R_d
Definition: ERF_Constants.H:20
constexpr amrex::Real Gamma
Definition: ERF_Constants.H:29
@ v_x
Definition: ERF_DataStruct.H:24
@ u_y
Definition: ERF_DataStruct.H:25
@ v_y
Definition: ERF_DataStruct.H:25
@ m_y
Definition: ERF_DataStruct.H:25
@ u_x
Definition: ERF_DataStruct.H:24
@ m_x
Definition: ERF_DataStruct.H:24
#define PrimQ1_comp
Definition: ERF_IndexDefines.H:53
#define Rho_comp
Definition: ERF_IndexDefines.H:36
#define RhoTheta_comp
Definition: ERF_IndexDefines.H:37
#define PrimTheta_comp
Definition: ERF_IndexDefines.H:50
amrex::GpuArray< Real, AMREX_SPACEDIM > dxInv
Definition: ERF_InitCustomPertVels_ParticleTests.H:17
const Real dx
Definition: ERF_InitCustomPert_ABL.H:23
const int khi
Definition: ERF_InitCustomPert_Bubble.H:21
AMREX_ALWAYS_ASSERT(bx.length()[2]==khi+1)
ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { const auto prob_lo=geomdata.ProbLo();const auto dx=geomdata.CellSize();const Real x=(prob_lo[0]+(i+myhalf) *dx[0])/mf_m(i, j, 0);const Real z=z_cc(i, j, k);Real L=std::sqrt(std::pow((x - x_c)/x_r, 2)+std::pow((z - z_c)/z_r, 2));if(L<=one) { Real dT=T_pert *(std::cos(PI *L)+one)/two;Real Tbar_hse=p_hse(i, j, k)/(R_d *r_hse(i, j, k));Real theta_perturbed=(Tbar_hse+dT) *std::pow(p_0/p_hse(i, j, k), rdOcp);Real theta_0=(Tbar_hse) *std::pow(p_0/p_hse(i, j, k), rdOcp);if(const_rho) { state_pert(i, j, k, RhoTheta_comp)=r_hse(i, j, k) *(theta_perturbed - theta_0);} else { state_pert(i, j, k, Rho_comp)=getRhoThetagivenP(p_hse(i, j, k))/theta_perturbed - r_hse(i, j, k);} } })
amrex::Real Real
Definition: ERF_ShocInterface.H:19
AMREX_FORCE_INLINE amrex::IntVect TileNoZ()
Definition: ERF_TileNoZ.H:11
@ gpy
Definition: ERF_IndexDefines.H:169
@ gpx
Definition: ERF_IndexDefines.H:168
@ ymom
Definition: ERF_IndexDefines.H:178
@ cons
Definition: ERF_IndexDefines.H:176
@ zmom
Definition: ERF_IndexDefines.H:179
@ xmom
Definition: ERF_IndexDefines.H:177
@ qt
Definition: ERF_Kessler.H:27
@ qv
Definition: ERF_Kessler.H:28
Here is the call graph for this function: