ERF
Energy Research and Forecasting: An Atmospheric Modeling Code
ERF_Substep_T.cpp File Reference
Include dependency graph for ERF_Substep_T.cpp:

Functions

void erf_substep_T (int step, int, int level, int finest_level, Vector< MultiFab > &S_slow_rhs, const Vector< MultiFab > &S_prev, Vector< MultiFab > &S_stage_data, const MultiFab &S_stage_prim, const MultiFab &qt, const MultiFab &pi_stage, const MultiFab &fast_coeffs, Vector< MultiFab > &S_data, MultiFab &lagged_delta_rt, MultiFab &avg_xmom, MultiFab &avg_ymom, MultiFab &avg_zmom, const MultiFab &cc_src, const MultiFab &xmom_src, const MultiFab &ymom_src, const MultiFab &zmom_src, const Geometry geom, const Real gravity, std::unique_ptr< MultiFab > &z_phys_nd, std::unique_ptr< MultiFab > &detJ_cc, const Real dtau, const Real beta_s, const Real facinv, Vector< std::unique_ptr< MultiFab >> &mapfac, YAFluxRegister *fr_as_crse, YAFluxRegister *fr_as_fine, bool l_use_moisture, bool l_reflux, bool l_real_bc, const Real *sinesq_stag_d, const Real l_damp_coef)
 

Function Documentation

◆ erf_substep_T()

void erf_substep_T ( int  step,
int  ,
int  level,
int  finest_level,
Vector< MultiFab > &  S_slow_rhs,
const Vector< MultiFab > &  S_prev,
Vector< MultiFab > &  S_stage_data,
const MultiFab &  S_stage_prim,
const MultiFab &  qt,
const MultiFab &  pi_stage,
const MultiFab &  fast_coeffs,
Vector< MultiFab > &  S_data,
MultiFab &  lagged_delta_rt,
MultiFab &  avg_xmom,
MultiFab &  avg_ymom,
MultiFab &  avg_zmom,
const MultiFab &  cc_src,
const MultiFab &  xmom_src,
const MultiFab &  ymom_src,
const MultiFab &  zmom_src,
const Geometry  geom,
const Real  gravity,
std::unique_ptr< MultiFab > &  z_phys_nd,
std::unique_ptr< MultiFab > &  detJ_cc,
const Real  dtau,
const Real  beta_s,
const Real  facinv,
Vector< std::unique_ptr< MultiFab >> &  mapfac,
YAFluxRegister *  fr_as_crse,
YAFluxRegister *  fr_as_fine,
bool  l_use_moisture,
bool  l_reflux,
bool  l_real_bc,
const Real sinesq_stag_d,
const Real  l_damp_coef 
)

Function for computing the fast RHS with fixed-in-time terrain

Parameters
[in]stepwhich fast time step within each Runge-Kutta step
[in]nrkwhich Runge-Kutta step
[in]levellevel of resolution
[in]finest_levelfinest level of resolution
[in]S_slow_rhsslow RHS computed in erf_slow_rhs_pre
[in]S_prevprevious solution
[in]S_stage_datasolution at previous RK stage
[in]S_stage_primprimitive variables at previous RK stage
[in]pi_stageExner function at previous RK stage
[in]fast_coeffscoefficients for the tridiagonal solve used in the fast integrator
[out]S_datacurrent solution
[in,out]lagged_delta_rt
[in,out]avg_xmomtime-averaged x-momentum to be used for updating slow variables
[in,out]avg_ymomtime-averaged y-momentum to be used for updating slow variables
[in,out]avg_zmomtime-averaged z-momentum to be used for updating slow variables
[in]cc_srcsource terms for conserved variables
[in]xmom_srcsource terms for x-momentum
[in]ymom_srcsource terms for y-momentum
[in]zmom_srcsource terms for z-momentum
[in]geomcontainer for geometric information
[in]gravitymagnitude of gravity
[in]z_phys_ndheight coordinate at nodes
[in]detJ_ccJacobian of the metric transformation
[in]dtaufast time step
[in]beta_sCoefficient which determines how implicit vs explicit the solve is
[in]facinvinverse factor for time-averaging the momenta
[in]mapfacvector of map factors
[in,out]fr_as_crseYAFluxRegister at level l at level l / l+1 interface
[in,out]fr_as_fineYAFluxRegister at level l at level l-1 / l interface
[in]l_use_moisture
[in]l_refluxshould we add fluxes to the FluxRegisters?
[in]l_damp_coef
75 {
76  BL_PROFILE_REGION("erf_substep_T()");
77 
78  const Box& domain = geom.Domain();
79  auto const domlo = lbound(domain);
80  auto const domhi = ubound(domain);
81 
82  int ilo = domlo.x;
83  int ihi = domhi.x + 1;
84  int jlo = domlo.y;
85  int jhi = domhi.y + 1;
86 
87  Real beta_1 = myhalf * (one - beta_s); // multiplies explicit terms
88  Real beta_2 = myhalf * (one + beta_s); // multiplies implicit terms
89 
90  // How much do we project forward the (rho theta) that is used in the horizontal momentum equations
91  Real beta_d = Real(0.1);
92 
93  Real RvOverRd = R_v / R_d;
94 
95  bool l_rayleigh_impl_for_w = (sinesq_stag_d != nullptr);
96 
97  const Real* dx = geom.CellSize();
98  const GpuArray<Real, AMREX_SPACEDIM> dxInv = geom.InvCellSizeArray();
99 
100  Real dxi = dxInv[0];
101  Real dyi = dxInv[1];
102  Real dzi = dxInv[2];
103  const auto& ba = S_stage_data[IntVars::cons].boxArray();
104  const auto& dm = S_stage_data[IntVars::cons].DistributionMap();
105 
106  MultiFab Delta_rho_u( convert(ba,IntVect(1,0,0)), dm, 1, 1);
107  MultiFab Delta_rho_v( convert(ba,IntVect(0,1,0)), dm, 1, 1);
108  MultiFab Delta_rho_w( convert(ba,IntVect(0,0,1)), dm, 1, IntVect(1,1,0));
109  MultiFab Delta_rho ( ba , dm, 1, 1);
110  MultiFab Delta_rho_theta( ba , dm, 1, 1);
111 
112  MultiFab New_rho_u(convert(ba,IntVect(1,0,0)), dm, 1, 1);
113  MultiFab New_rho_v(convert(ba,IntVect(0,1,0)), dm, 1, 1);
114 
115  MultiFab coeff_A_mf(fast_coeffs, make_alias, 0, 1);
116  MultiFab inv_coeff_B_mf(fast_coeffs, make_alias, 1, 1);
117  MultiFab coeff_C_mf(fast_coeffs, make_alias, 2, 1);
118  MultiFab coeff_P_mf(fast_coeffs, make_alias, 3, 1);
119  MultiFab coeff_Q_mf(fast_coeffs, make_alias, 4, 1);
120 
121  // *************************************************************************
122  // Set gravity as a vector
123  const Array<Real,AMREX_SPACEDIM> grav{zero, zero, -gravity};
124  const GpuArray<Real,AMREX_SPACEDIM> grav_gpu{grav[0], grav[1], grav[2]};
125 
126  MultiFab extrap(S_data[IntVars::cons].boxArray(),S_data[IntVars::cons].DistributionMap(),1,1);
127 
128  // *************************************************************************
129  // First set up some arrays we'll need
130  // *************************************************************************
131 
132 #ifdef _OPENMP
133 #pragma omp parallel if (Gpu::notInLaunchRegion())
134 #endif
135  for ( MFIter mfi(S_stage_data[IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
136  {
137  const Array4<Real> & cur_cons = S_data[IntVars::cons].array(mfi);
138  const Array4<const Real>& prev_cons = S_prev[IntVars::cons].const_array(mfi);
139  const Array4<const Real>& stage_cons = S_stage_data[IntVars::cons].const_array(mfi);
140  const Array4<Real>& lagged_arr = lagged_delta_rt.array(mfi);
141 
142  const Array4<Real>& old_drho = Delta_rho.array(mfi);
143  const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
144  const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
145  const Array4<Real>& old_drho_w = Delta_rho_w.array(mfi);
146  const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
147 
148  const Array4<const Real>& prev_xmom = S_prev[IntVars::xmom].const_array(mfi);
149  const Array4<const Real>& prev_ymom = S_prev[IntVars::ymom].const_array(mfi);
150  const Array4<const Real>& prev_zmom = S_prev[IntVars::zmom].const_array(mfi);
151 
152  const Array4<const Real>& stage_xmom = S_stage_data[IntVars::xmom].const_array(mfi);
153  const Array4<const Real>& stage_ymom = S_stage_data[IntVars::ymom].const_array(mfi);
154  const Array4<const Real>& stage_zmom = S_stage_data[IntVars::zmom].const_array(mfi);
155 
156  Box bx = mfi.validbox();
157  Box gbx = mfi.tilebox(); gbx.grow(1);
158 
159  if (step == 0) {
160  ParallelFor(gbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
161  cur_cons(i,j,k,Rho_comp) = prev_cons(i,j,k,Rho_comp);
162  cur_cons(i,j,k,RhoTheta_comp) = prev_cons(i,j,k,RhoTheta_comp);
163  });
164  } // step = 0
165 
166  Box gtbx = mfi.nodaltilebox(0); gtbx.grow(IntVect(1,1,0));
167  Box gtby = mfi.nodaltilebox(1); gtby.grow(IntVect(1,1,0));
168  Box gtbz = mfi.nodaltilebox(2); gtbz.grow(IntVect(1,1,0));
169 
170  const auto& bx_lo = lbound(bx);
171  const auto& bx_hi = ubound(bx);
172 
173  ParallelFor(gtbx, gtby, gtbz,
174  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
175  old_drho_u(i,j,k) = prev_xmom(i,j,k) - stage_xmom(i,j,k);
176  if (k == bx_lo.z && k != domlo.z) {
177  old_drho_u(i,j,k-1) = old_drho_u(i,j,k);
178  } else if (k == bx_hi.z) {
179  old_drho_u(i,j,k+1) = old_drho_u(i,j,k);
180  }
181  },
182  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
183  old_drho_v(i,j,k) = prev_ymom(i,j,k) - stage_ymom(i,j,k);
184  if (k == bx_lo.z && k != domlo.z) {
185  old_drho_v(i,j,k-1) = old_drho_v(i,j,k);
186  } else if (k == bx_hi.z) {
187  old_drho_v(i,j,k+1) = old_drho_v(i,j,k);
188  }
189  },
190  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
191  old_drho_w(i,j,k) = prev_zmom(i,j,k) - stage_zmom(i,j,k);
192  });
193 
194  const Array4<Real>& theta_extrap = extrap.array(mfi);
195  const Array4<const Real>& prim = S_stage_prim.const_array(mfi);
196 
197  ParallelFor(gbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
198  old_drho(i,j,k) = cur_cons(i,j,k,Rho_comp) - stage_cons(i,j,k,Rho_comp);
199  old_drho_theta(i,j,k) = cur_cons(i,j,k,RhoTheta_comp) - stage_cons(i,j,k,RhoTheta_comp);
200  if (step == 0) {
201  theta_extrap(i,j,k) = old_drho_theta(i,j,k);
202  } else {
203  theta_extrap(i,j,k) = old_drho_theta(i,j,k) + beta_d *
204  ( old_drho_theta(i,j,k) - lagged_arr(i,j,k) );
205  }
206 
207  // NOTE: qv is not changing over the fast steps so we use the stage data
208  Real qv = (l_use_moisture) ? prim(i,j,k,PrimQ1_comp) : zero;
209  theta_extrap(i,j,k) *= (one + RvOverRd*qv);
210  });
211  } // mfi
212 
213 #ifdef _OPENMP
214 #pragma omp parallel if (Gpu::notInLaunchRegion())
215 #endif
216  for ( MFIter mfi(S_stage_data[IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
217  {
218  // We define lagged_delta_rt for our next step as the current delta_rt
219  Box gbx = mfi.tilebox(); gbx.grow(1);
220  const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
221  const Array4<Real>& lagged_arr = lagged_delta_rt.array(mfi);
222  ParallelFor(gbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
223  lagged_arr(i,j,k) = old_drho_theta(i,j,k);
224  });
225  } // mfi
226 
227  // *************************************************************************
228  // Define updates in the current RK stage
229  // *************************************************************************
230 
231 #ifdef _OPENMP
232 #pragma omp parallel if (Gpu::notInLaunchRegion())
233 #endif
234  for ( MFIter mfi(S_stage_data[IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
235  {
236  Box bx = mfi.validbox();
237  Box tbx = mfi.nodaltilebox(0);
238  Box tby = mfi.nodaltilebox(1);
239 
240  const Array4<Real const>& xmom_src_arr = xmom_src.const_array(mfi);
241  const Array4<Real const>& ymom_src_arr = ymom_src.const_array(mfi);
242 
243  const Array4<const Real> & stage_xmom = S_stage_data[IntVars::xmom].const_array(mfi);
244  const Array4<const Real> & stage_ymom = S_stage_data[IntVars::ymom].const_array(mfi);
245  const Array4<const Real> & qt_arr = qt.const_array(mfi);
246 
247  const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
248  const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
249 
250  const Array4<const Real>& slow_rhs_rho_u = S_slow_rhs[IntVars::xmom].const_array(mfi);
251  const Array4<const Real>& slow_rhs_rho_v = S_slow_rhs[IntVars::ymom].const_array(mfi);
252 
253  const Array4<Real>& new_drho_u = New_rho_u.array(mfi);
254  const Array4<Real>& new_drho_v = New_rho_v.array(mfi);
255 
256  const Array4<Real>& cur_xmom = S_data[IntVars::xmom].array(mfi);
257  const Array4<Real>& cur_ymom = S_data[IntVars::ymom].array(mfi);
258 
259  // These store the advection momenta which we will use to update the slow variables
260  const Array4<Real>& avg_xmom_arr = avg_xmom.array(mfi);
261  const Array4<Real>& avg_ymom_arr = avg_ymom.array(mfi);
262 
263  const Array4<const Real>& z_nd = z_phys_nd->const_array(mfi);
264 
265  const Array4<const Real>& pi_stage_ca = pi_stage.const_array(mfi);
266 
267  const Array4<Real>& theta_extrap = extrap.array(mfi);
268 
269  // Map factors
270  const Array4<const Real>& mf_ux = mapfac[MapFacType::u_x]->const_array(mfi);
271  const Array4<const Real>& mf_vy = mapfac[MapFacType::v_y]->const_array(mfi);
272 
273  // Create old_drho_u/v/w/theta = U'', V'', W'', Theta'' in the docs
274  // Note that we do the Copy and Subtract including one ghost cell
275  // so that we don't have to fill ghost cells of the new MultiFabs
276  // Initialize New_rho_u/v/w to Delta_rho_u/v/w so that
277  // the ghost cells in New_rho_u/v/w will match old_drho_u/v/w
278 
279  // *********************************************************************
280  // Define updates in the RHS of {x, y, z}-momentum equations
281  // *********************************************************************
282  {
283  BL_PROFILE("substep_xymom_T");
284 
285  const auto& bx_lo = lbound(bx);
286  const auto& bx_hi = ubound(bx);
287 
288  ParallelFor(tbx, tby,
289  [=] AMREX_GPU_DEVICE (int i, int j, int k)
290  {
291  // Add (negative) gradient of (rho theta) multiplied by lagged "pi"
292  Real met_h_xi = Compute_h_xi_AtIface (i, j, k, dxInv, z_nd);
293  Real met_h_zeta = Compute_h_zeta_AtIface(i, j, k, dxInv, z_nd);
294  Real gp_xi = (theta_extrap(i,j,k) - theta_extrap(i-1,j,k)) * dxi;
295  Real gp_zeta_on_iface = (k == 0) ?
296  myhalf * dzi * ( theta_extrap(i-1,j,k+1) + theta_extrap(i,j,k+1)
297  - theta_extrap(i-1,j,k ) - theta_extrap(i,j,k ) ) :
298  fourth * dzi * ( theta_extrap(i-1,j,k+1) + theta_extrap(i,j,k+1)
299  - theta_extrap(i-1,j,k-1) - theta_extrap(i,j,k-1) );
300  Real gpx = (l_real_bc && (level==0) && (i==ilo || i==ihi)) ? Real(0.) :
301  gp_xi - (met_h_xi / met_h_zeta) * gp_zeta_on_iface;
302 
303  gpx *= mf_ux(i,j,0);
304 
305  Real q = (l_use_moisture) ? myhalf * (qt_arr(i,j,k) + qt_arr(i-1,j,k)) : zero;
306 
307  Real pi_c = myhalf * (pi_stage_ca(i-1,j,k,0) + pi_stage_ca(i ,j,k,0));
308  Real fast_rhs_rho_u = -Gamma * R_d * pi_c * gpx / (one + q);
309 
310  new_drho_u(i, j, k) = old_drho_u(i,j,k) + dtau * fast_rhs_rho_u
311  + dtau * slow_rhs_rho_u(i,j,k)
312  + dtau * xmom_src_arr(i,j,k);
313  if (k == bx_lo.z && k != domlo.z) {
314  new_drho_u(i,j,k-1) = new_drho_u(i,j,k);
315  } else if (k == bx_hi.z) {
316  new_drho_u(i,j,k+1) = new_drho_u(i,j,k);
317  }
318 
319  avg_xmom_arr(i,j,k) += facinv*new_drho_u(i,j,k);
320 
321  cur_xmom(i,j,k) = stage_xmom(i,j,k) + new_drho_u(i,j,k);
322  },
323  [=] AMREX_GPU_DEVICE (int i, int j, int k)
324  {
325  // Add (negative) gradient of (rho theta) multiplied by lagged "pi"
326  Real met_h_eta = Compute_h_eta_AtJface(i, j, k, dxInv, z_nd);
327  Real met_h_zeta = Compute_h_zeta_AtJface(i, j, k, dxInv, z_nd);
328  Real gp_eta = (theta_extrap(i,j,k) -theta_extrap(i,j-1,k)) * dyi;
329  Real gp_zeta_on_jface = (k == 0) ?
330  myhalf * dzi * ( theta_extrap(i,j,k+1) + theta_extrap(i,j-1,k+1)
331  - theta_extrap(i,j,k ) - theta_extrap(i,j-1,k ) ) :
332  fourth * dzi * ( theta_extrap(i,j,k+1) + theta_extrap(i,j-1,k+1)
333  - theta_extrap(i,j,k-1) - theta_extrap(i,j-1,k-1) );
334  Real gpy = (l_real_bc && (level==0) && (j==jlo || j==jhi)) ? Real(0.) :
335  gp_eta - (met_h_eta / met_h_zeta) * gp_zeta_on_jface;
336 
337  gpy *= mf_vy(i,j,0);
338 
339  Real q = (l_use_moisture) ? myhalf * (qt_arr(i,j,k) + qt_arr(i,j-1,k)) : zero;
340 
341  Real pi_c = myhalf * (pi_stage_ca(i,j-1,k,0) + pi_stage_ca(i,j ,k,0));
342  Real fast_rhs_rho_v = -Gamma * R_d * pi_c * gpy / (one + q);
343 
344  new_drho_v(i, j, k) = old_drho_v(i,j,k) + dtau * fast_rhs_rho_v
345  + dtau * slow_rhs_rho_v(i,j,k)
346  + dtau * ymom_src_arr(i,j,k);
347 
348  if (k == bx_lo.z && k != domlo.z) {
349  new_drho_v(i,j,k-1) = new_drho_v(i,j,k);
350  } else if (k == bx_hi.z) {
351  new_drho_v(i,j,k+1) = new_drho_v(i,j,k);
352  }
353 
354  avg_ymom_arr(i,j,k) += facinv*new_drho_v(i,j,k);
355 
356  cur_ymom(i,j,k) = stage_ymom(i,j,k) + new_drho_v(i,j,k);
357  });
358  } // end profile
359  }
360 
361  MultiFab Omega(S_data[IntVars::zmom].boxArray(), dm, 1, 1);
362 
363 #ifdef _OPENMP
364 #pragma omp parallel if (Gpu::notInLaunchRegion())
365 #endif
366  {
367  std::array<FArrayBox,AMREX_SPACEDIM> flux;
368  for ( MFIter mfi(S_stage_data[IntVars::cons],TileNoZ()); mfi.isValid(); ++mfi)
369  {
370  Box bx = mfi.tilebox();
371  Box tbz = surroundingNodes(bx,2);
372 
373  Box vbx = mfi.validbox();
374  const auto& vbx_hi = ubound(vbx);
375 
376  const Array4<Real const>& zmom_src_arr = zmom_src.const_array(mfi);
377  const Array4<Real const>& cc_src_arr = cc_src.const_array(mfi);
378 
379  const Array4<const Real> & stage_zmom = S_stage_data[IntVars::zmom].const_array(mfi);
380  const Array4<const Real> & prim = S_stage_prim.const_array(mfi);
381 
382  const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
383  const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
384  const Array4<Real>& old_drho_w = Delta_rho_w.array(mfi);
385  const Array4<Real>& old_drho = Delta_rho.array(mfi);
386  const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
387 
388  const Array4<const Real>& slow_rhs_cons = S_slow_rhs[IntVars::cons].const_array(mfi);
389  const Array4<const Real>& slow_rhs_rho_w = S_slow_rhs[IntVars::zmom].const_array(mfi);
390 
391  const Array4<Real>& new_drho_u = New_rho_u.array(mfi);
392  const Array4<Real>& new_drho_v = New_rho_v.array(mfi);
393 
394  const Array4<Real>& cur_cons = S_data[IntVars::cons].array(mfi);
395  const Array4<Real>& cur_zmom = S_data[IntVars::zmom].array(mfi);
396 
397  // These store the advection momenta which we will use to update the slow variables
398  const Array4<Real>& avg_zmom_arr = avg_zmom.array(mfi);
399 
400  const Array4<const Real>& z_nd = z_phys_nd->const_array(mfi);
401  const Array4<const Real>& detJ = detJ_cc->const_array(mfi);
402 
403  const Array4< Real>& omega_arr = Omega.array(mfi);
404 
405  // Map factors
406  const Array4<const Real>& mf_mx = mapfac[MapFacType::m_x]->const_array(mfi);
407  const Array4<const Real>& mf_my = mapfac[MapFacType::m_y]->const_array(mfi);
408  const Array4<const Real>& mf_ux = mapfac[MapFacType::u_x]->const_array(mfi);
409  const Array4<const Real>& mf_uy = mapfac[MapFacType::u_y]->const_array(mfi);
410  const Array4<const Real>& mf_vx = mapfac[MapFacType::v_x]->const_array(mfi);
411  const Array4<const Real>& mf_vy = mapfac[MapFacType::v_y]->const_array(mfi);
412 
413  // Create old_drho_u/v/w/theta = U'', V'', W'', Theta'' in the docs
414  // Note that we do the Copy and Subtract including one ghost cell
415  // so that we don't have to fill ghost cells of the new MultiFabs
416  // Initialize New_rho_u/v/w to Delta_rho_u/v/w so that
417  // the ghost cells in New_rho_u/v/w will match old_drho_u/v/w
418 
419  FArrayBox temp_rhs_fab;
420  FArrayBox RHS_fab;
421  FArrayBox soln_fab;
422 
423  RHS_fab.resize (tbz,1,The_Async_Arena());
424  soln_fab.resize (tbz,1,The_Async_Arena());
425  temp_rhs_fab.resize(tbz,2,The_Async_Arena());
426 
427  auto const& RHS_a = RHS_fab.array();
428  auto const& soln_a = soln_fab.array();
429  auto const& temp_rhs_arr = temp_rhs_fab.array();
430 
431  auto const& coeffA_a = coeff_A_mf.array(mfi);
432  auto const& inv_coeffB_a = inv_coeff_B_mf.array(mfi);
433  auto const& coeffC_a = coeff_C_mf.array(mfi);
434  auto const& coeffP_a = coeff_P_mf.array(mfi);
435  auto const& coeffQ_a = coeff_Q_mf.array(mfi);
436 
437  // *************************************************************************
438  // Define flux arrays for use in advection
439  // *************************************************************************
440  for (int dir = 0; dir < AMREX_SPACEDIM; ++dir) {
441  flux[dir].resize(surroundingNodes(bx,dir),2,The_Async_Arena());
442  flux[dir].setVal<RunOn::Device>(0);
443  }
444  const GpuArray<const Array4<Real>, AMREX_SPACEDIM>
445  flx_arr{{AMREX_D_DECL(flux[0].array(), flux[1].array(), flux[2].array())}};
446 
447  // *********************************************************************
448  {
449  BL_PROFILE("fast_T_making_rho_rhs");
450  ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
451  Real h_zeta_cc_xface_hi = myhalf * dzi *
452  ( z_nd(i+1,j ,k+1) + z_nd(i+1,j+1,k+1)
453  -z_nd(i+1,j ,k ) - z_nd(i+1,j+1,k ) );
454 
455  Real h_zeta_cc_xface_lo = myhalf * dzi *
456  ( z_nd(i ,j ,k+1) + z_nd(i ,j+1,k+1)
457  -z_nd(i ,j ,k ) - z_nd(i ,j+1,k ) );
458 
459  Real h_zeta_cc_yface_hi = myhalf * dzi *
460  ( z_nd(i ,j+1,k+1) + z_nd(i+1,j+1,k+1)
461  -z_nd(i ,j+1,k ) - z_nd(i+1,j+1,k ) );
462 
463  Real h_zeta_cc_yface_lo = myhalf * dzi *
464  ( z_nd(i ,j ,k+1) + z_nd(i+1,j ,k+1)
465  -z_nd(i ,j ,k ) - z_nd(i+1,j ,k ) );
466 
467  Real xflux_lo = new_drho_u(i ,j,k)*h_zeta_cc_xface_lo / mf_uy(i ,j,0);
468  Real xflux_hi = new_drho_u(i+1,j,k)*h_zeta_cc_xface_hi / mf_uy(i+1,j,0);
469  Real yflux_lo = new_drho_v(i,j ,k)*h_zeta_cc_yface_lo / mf_vx(i,j ,0);
470  Real yflux_hi = new_drho_v(i,j+1,k)*h_zeta_cc_yface_hi / mf_vx(i,j+1,0);
471 
472  Real mfsq = mf_mx(i,j,0) * mf_my(i,j,0);
473 
474  // NOTE: we are saving the (1/J) weighting for later when we add this to rho and theta
475  temp_rhs_arr(i,j,k,0) = ( xflux_hi - xflux_lo ) * dxi * mfsq +
476  ( yflux_hi - yflux_lo ) * dyi * mfsq;
477  temp_rhs_arr(i,j,k,1) = (( xflux_hi * (prim(i,j,k,0) + prim(i+1,j,k,0)) -
478  xflux_lo * (prim(i,j,k,0) + prim(i-1,j,k,0)) ) * dxi * mfsq+
479  ( yflux_hi * (prim(i,j,k,0) + prim(i,j+1,k,0)) -
480  yflux_lo * (prim(i,j,k,0) + prim(i,j-1,k,0)) ) * dyi * mfsq) * myhalf;
481 
482  if (l_reflux) {
483  (flx_arr[0])(i,j,k,0) = xflux_lo;
484  (flx_arr[0])(i,j,k,1) = (flx_arr[0])(i ,j,k,0) * myhalf * (prim(i,j,k,0) + prim(i-1,j,k,0));
485 
486  (flx_arr[1])(i,j,k,0) = yflux_lo;
487  (flx_arr[1])(i,j,k,1) = (flx_arr[1])(i,j ,k,0) * myhalf * (prim(i,j,k,0) + prim(i,j-1,k,0));
488 
489  if (i == vbx_hi.x) {
490  (flx_arr[0])(i+1,j,k,0) = xflux_hi;
491  (flx_arr[0])(i+1,j,k,1) = (flx_arr[0])(i+1,j,k,0) * myhalf * (prim(i,j,k,0) + prim(i+1,j,k,0));
492  }
493  if (j == vbx_hi.y) {
494  (flx_arr[1])(i,j+1,k,0) = yflux_hi;
495  (flx_arr[1])(i,j+1,k,1) = (flx_arr[1])(i,j+1,k,0) * myhalf * (prim(i,j,k,0) + prim(i,j+1,k,0));
496  }
497  }
498  });
499  } // end profile
500 
501  // *********************************************************************
502  {
503  Box gbxo = mfi.nodaltilebox(2);
504  Box gbxo_mid = gbxo;
505 
506  if (gbxo.smallEnd(2) == domlo.z) {
507  Box gbxo_lo = gbxo; gbxo_lo.setBig(2,gbxo.smallEnd(2));
508  gbxo_mid.setSmall(2,gbxo.smallEnd(2)+1);
509  ParallelFor(gbxo_lo, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
510  omega_arr(i,j,k) = zero;
511  });
512  }
513  if (gbxo.bigEnd(2) == domhi.z+1) {
514  Box gbxo_hi = gbxo; gbxo_hi.setSmall(2,gbxo.bigEnd(2));
515  gbxo_mid.setBig(2,gbxo.bigEnd(2)-1);
516  ParallelFor(gbxo_hi, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
517  omega_arr(i,j,k) = old_drho_w(i,j,k);
518  });
519  }
520  ParallelFor(gbxo_mid, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
521  omega_arr(i,j,k) = OmegaFromW(i,j,k,old_drho_w(i,j,k),
522  old_drho_u,old_drho_v,
523  mf_ux,mf_vy,z_nd,dxInv);
524  });
525  } // end profile
526  // *********************************************************************
527 
528  Box bx_shrunk_in_k = bx;
529  int klo = tbz.smallEnd(2);
530  int khi = tbz.bigEnd(2);
531  bx_shrunk_in_k.setSmall(2,klo+1);
532  bx_shrunk_in_k.setBig(2,khi-1);
533 
534  // Note that the notes use "g" to mean the magnitude of gravity, so it is positive
535  // We set grav_gpu[2] to be the vector component which is negative
536  // We define halfg to match the notes (which is why we take the absolute value)
537  Real halfg = std::abs(myhalf * grav_gpu[2]);
538 
539  {
540  BL_PROFILE("fast_loop_on_shrunk_t");
541  //Note we don't act on the bottom or top boundaries of the domain
542  ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (int i, int j, int k)
543  {
544  Real coeff_P = coeffP_a(i,j,k);
545  Real coeff_Q = coeffQ_a(i,j,k);
546 
547  Real theta_t_lo = myhalf * ( prim(i,j,k-2,PrimTheta_comp) + prim(i,j,k-1,PrimTheta_comp) );
548  Real theta_t_mid = myhalf * ( prim(i,j,k-1,PrimTheta_comp) + prim(i,j,k ,PrimTheta_comp) );
549  Real theta_t_hi = myhalf * ( prim(i,j,k ,PrimTheta_comp) + prim(i,j,k+1,PrimTheta_comp) );
550 
551  // line 2 last two terms (order dtau)
552  Real R0_tmp = -halfg * old_drho(i,j,k ) + coeff_P * old_drho_theta(i,j,k )
553  -halfg * old_drho(i,j,k-1) + coeff_Q * old_drho_theta(i,j,k-1);
554 
555  // line 3 residuals (order dtau^2) one <-> beta_2
556  Real R1_tmp = -halfg * ( slow_rhs_cons(i,j,k ,Rho_comp) + slow_rhs_cons(i,j,k-1,Rho_comp) );
557 
558  R1_tmp += coeff_P * slow_rhs_cons(i,j,k ,RhoTheta_comp)
559  + coeff_Q * slow_rhs_cons(i,j,k-1,RhoTheta_comp);
560 
561  Real Omega_kp1 = omega_arr(i,j,k+1);
562  Real Omega_k = omega_arr(i,j,k );
563  Real Omega_km1 = omega_arr(i,j,k-1);
564 
565  Real detJdiff = (detJ(i,j,k) - detJ(i,j,k-1)) / (detJ(i,j,k)*detJ(i,j,k-1));
566 
567  // consolidate lines 4&5 (order dtau^2)
568  R1_tmp += halfg * ( beta_1 * dzi * (Omega_kp1/detJ(i,j,k) + detJdiff*Omega_k - Omega_km1/detJ(i,j,k-1))
569  + temp_rhs_arr(i,j,k,Rho_comp)/detJ(i,j,k) + temp_rhs_arr(i,j,k-1,Rho_comp)/detJ(i,j,k-1) );
570 
571  // consolidate lines 6&7 (order dtau^2)
572  R1_tmp += -( coeff_P/detJ(i,j,k ) * ( beta_1 * dzi * (Omega_kp1*theta_t_hi - Omega_k*theta_t_mid) + temp_rhs_arr(i,j,k ,RhoTheta_comp) )
573  + coeff_Q/detJ(i,j,k-1) * ( beta_1 * dzi * (Omega_k*theta_t_mid - Omega_km1*theta_t_lo) + temp_rhs_arr(i,j,k-1,RhoTheta_comp) ) );
574 
575  // line 1
576  RHS_a(i,j,k) = old_drho_w(i,j,k) + dtau * (slow_rhs_rho_w(i,j,k) + zmom_src_arr(i,j,k) + R0_tmp + dtau*beta_2*R1_tmp);
577 
578  // We cannot use omega_arr here since that was built with old_rho_u and old_rho_v ...
579  RHS_a(i,j,k) += OmegaFromW(i,j,k,zero,
580  new_drho_u,new_drho_v,
581  mf_ux,mf_vy,z_nd,dxInv);
582  });
583  } // end profile
584 
585  Box b2d = tbz; // Copy constructor
586  b2d.setRange(2,0);
587 
588  auto const lo = lbound(bx);
589  auto const hi = ubound(bx);
590 
591  {
592  BL_PROFILE("substep_b2d_loop_t");
593 #ifdef AMREX_USE_GPU
594  ParallelFor(b2d, [=] AMREX_GPU_DEVICE (int i, int j, int)
595  {
596  // w_klo, w_khi given by specified Dirichlet values
597  RHS_a(i,j,lo.z ) = dtau * (slow_rhs_rho_w(i,j,lo.z) + zmom_src_arr(i,j,lo.z));
598  RHS_a(i,j,hi.z+1) = dtau * (slow_rhs_rho_w(i,j,hi.z+1) + zmom_src_arr(i,j,hi.z+1));
599 
600  // w = specified Dirichlet value at k = lo.z
601  soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
602 
603  // Transform the RHS from r_i -> rho_i
604  for (int k = lo.z+1; k <= hi.z+1; k++) {
605  soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
606  }
607 
608  cur_zmom(i,j,lo.z ) = stage_zmom(i,j,lo.z ) + soln_a(i,j,lo.z );
609  cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
610 
611  // Back sweep to obtain the solution
612  for (int k = hi.z; k >= lo.z; k--) {
613  soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) *soln_a(i,j,k+1);
614  }
615  });
616 #else
617  for (int j = lo.y; j <= hi.y; ++j) {
618  AMREX_PRAGMA_SIMD
619  for (int i = lo.x; i <= hi.x; ++i)
620  {
621  RHS_a(i,j,lo.z) = dtau * (slow_rhs_rho_w(i,j,lo.z) + zmom_src_arr(i,j,lo.z));
622  soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
623  }
624 
625  AMREX_PRAGMA_SIMD
626  for (int i = lo.x; i <= hi.x; ++i)
627  {
628  RHS_a(i,j,hi.z+1) = dtau * (slow_rhs_rho_w(i,j,hi.z+1) + zmom_src_arr(i,j,hi.z+1));
629  soln_a(i,j,hi.z+1) = RHS_a(i,j,hi.z+1) * inv_coeffB_a(i,j,hi.z+1);
630  }
631  }
632 
633  for (int k = lo.z+1; k <= hi.z; ++k) {
634  for (int j = lo.y; j <= hi.y; ++j) {
635  AMREX_PRAGMA_SIMD
636  for (int i = lo.x; i <= hi.x; ++i) {
637  soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
638  }
639  }
640  }
641  for (int k = hi.z; k > lo.z; --k) {
642  for (int j = lo.y; j <= hi.y; ++j) {
643  AMREX_PRAGMA_SIMD
644  for (int i = lo.x; i <= hi.x; ++i) {
645  soln_a(i,j,k) -= (coeffC_a(i,j,k) * inv_coeffB_a(i,j,k)) * soln_a(i,j,k+1);
646  }
647  }
648  }
649  if (hi.z == domhi.z) {
650  for (int j = lo.y; j <= hi.y; ++j) {
651  AMREX_PRAGMA_SIMD
652  for (int i = lo.x; i <= hi.x; ++i) {
653  cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
654  }
655  }
656  }
657 #endif
658  } // end profile
659 
660  ParallelFor(tbz, [=] AMREX_GPU_DEVICE (int i, int j, int k)
661  {
662  cur_zmom(i,j,k) = stage_zmom(i,j,k);
663  });
664 
665  if (lo.z == domlo.z) {
666  tbz.setSmall(2,domlo.z+1);
667  }
668  if (hi.z == domhi.z) {
669  tbz.setBig(2,domhi.z);
670  }
671  ParallelFor(tbz, [=] AMREX_GPU_DEVICE (int i, int j, int k)
672  {
673  Real wpp = WFromOmega(i,j,k,soln_a(i,j,k),
674  new_drho_u,new_drho_v,
675  mf_ux,mf_vy,z_nd,dxInv);
676 
677  cur_zmom(i,j,k) += wpp;
678 
679  if (l_rayleigh_impl_for_w) {
680  Real damping_coeff = l_damp_coef * dtau * sinesq_stag_d[k];
681  cur_zmom(i,j,k) /= (one + damping_coeff);
682  }
683  });
684 
685  // **************************************************************************
686  // Define updates in the RHS of rho and (rho theta)
687  // **************************************************************************
688  {
689  BL_PROFILE("fast_rho_final_update");
690  ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
691  {
692  Real zflux_lo = beta_2 * soln_a(i,j,k ) + beta_1 * omega_arr(i,j,k);
693  Real zflux_hi = beta_2 * soln_a(i,j,k+1) + beta_1 * omega_arr(i,j,k+1);
694 
695  // Note that in the solve we effectively impose new_drho_w(i,j,vbx_hi.z+1)=0
696  // so we don't update avg_zmom at k=vbx_hi.z+1
697  avg_zmom_arr(i,j,k) += facinv*zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0));
698  if (l_reflux) {
699  (flx_arr[2])(i,j,k,0) = zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0));
700  }
701 
702  if (k == vbx_hi.z) {
703  avg_zmom_arr(i,j,k+1) += facinv * zflux_hi / (mf_mx(i,j,0) * mf_my(i,j,0));
704  if (l_reflux) {
705  (flx_arr[2])(i,j,k+1,0) = zflux_hi / (mf_mx(i,j,0) * mf_my(i,j,0));
706  (flx_arr[2])(i,j,k+1,1) = (flx_arr[2])(i,j,k+1,0) * myhalf * (prim(i,j,k) + prim(i,j,k+1));
707  }
708  }
709 
710  Real fast_rhs_rho = -(temp_rhs_arr(i,j,k,0) + ( zflux_hi - zflux_lo ) * dzi) / detJ(i,j,k);
711 
712  cur_cons(i,j,k,0) += dtau * (slow_rhs_cons(i,j,k,0) + fast_rhs_rho);
713 
714  Real fast_rhs_rhotheta = -( temp_rhs_arr(i,j,k,1) + myhalf *
715  ( zflux_hi * (prim(i,j,k) + prim(i,j,k+1)) -
716  zflux_lo * (prim(i,j,k) + prim(i,j,k-1)) ) * dzi ) / detJ(i,j,k);
717 
718  cur_cons(i,j,k,1) += dtau * (slow_rhs_cons(i,j,k,1) + fast_rhs_rhotheta);
719 
720  if (l_reflux) {
721  (flx_arr[2])(i,j,k,1) = (flx_arr[2])(i,j,k,0) * myhalf * (prim(i,j,k) + prim(i,j,k-1));
722  }
723 
724  // add in source terms for cell-centered conserved variables
725  cur_cons(i,j,k,Rho_comp) += dtau * cc_src_arr(i,j,k,Rho_comp);
726  cur_cons(i,j,k,RhoTheta_comp) += dtau * cc_src_arr(i,j,k,RhoTheta_comp);
727  });
728  } // end profile
729 
730  // We only add to the flux registers in the final RK step
731  if (l_reflux) {
732  int strt_comp_reflux = 0;
733  // For now we don't reflux (rho theta) because it seems to create issues at c/f boundaries
734  int num_comp_reflux = 1;
735  if (level < finest_level) {
736  fr_as_crse->CrseAdd(mfi,
737  {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
738  dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
739  }
740  if (level > 0) {
741  fr_as_fine->FineAdd(mfi,
742  {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
743  dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
744  }
745 
746  // This is necessary here so we don't go on to the next FArrayBox without
747  // having finished copying the fluxes into the FluxRegisters (since the fluxes
748  // are stored in temporary FArrayBox's)
749  Gpu::streamSynchronize();
750 
751  } // two-way coupling
752  } // mfi
753  } // OMP
754 }
constexpr amrex::Real R_v
Definition: ERF_Constants.H:30
constexpr amrex::Real one
Definition: ERF_Constants.H:7
constexpr amrex::Real fourth
Definition: ERF_Constants.H:12
constexpr amrex::Real zero
Definition: ERF_Constants.H:6
constexpr amrex::Real myhalf
Definition: ERF_Constants.H:11
constexpr amrex::Real R_d
Definition: ERF_Constants.H:29
constexpr amrex::Real Gamma
Definition: ERF_Constants.H:38
@ v_x
Definition: ERF_DataStruct.H:24
@ u_y
Definition: ERF_DataStruct.H:25
@ v_y
Definition: ERF_DataStruct.H:25
@ m_y
Definition: ERF_DataStruct.H:25
@ u_x
Definition: ERF_DataStruct.H:24
@ m_x
Definition: ERF_DataStruct.H:24
#define PrimQ1_comp
Definition: ERF_IndexDefines.H:58
#define Rho_comp
Definition: ERF_IndexDefines.H:36
#define RhoTheta_comp
Definition: ERF_IndexDefines.H:37
#define PrimTheta_comp
Definition: ERF_IndexDefines.H:55
amrex::GpuArray< Real, AMREX_SPACEDIM > dxInv
Definition: ERF_InitCustomPertVels_ParticleTests.H:17
const Real dx
Definition: ERF_InitCustomPert_ABL.H:23
const int khi
Definition: ERF_InitCustomPert_Bubble.H:21
ParallelFor(grown_box, [=] AMREX_GPU_DEVICE(int i, int j, int k) { qrcuten_arr(i, j, k)=Real(0);qscuten_arr(i, j, k)=Real(0);qicuten_arr(i, j, k)=Real(0);})
amrex::Real Real
Definition: ERF_ShocInterface.H:19
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real OmegaFromW(int &i, int &j, int &k, amrex::Real w, const amrex::Array4< const amrex::Real > &u_arr, const amrex::Array4< const amrex::Real > &v_arr, const amrex::Array4< const amrex::Real > &mf_u, const amrex::Array4< const amrex::Real > &mf_v, const amrex::Array4< const amrex::Real > &z_nd, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &dxInv)
Definition: ERF_TerrainMetrics.H:414
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_xi_AtIface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:117
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_zeta_AtIface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:104
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_zeta_AtJface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:144
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_eta_AtJface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:170
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real WFromOmega(int &i, int &j, int &k, amrex::Real omega, const amrex::Array4< const amrex::Real > &u_arr, const amrex::Array4< const amrex::Real > &v_arr, const amrex::Array4< const amrex::Real > &mf_u, const amrex::Array4< const amrex::Real > &mf_v, const amrex::Array4< const amrex::Real > &z_nd, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &dxInv)
Definition: ERF_TerrainMetrics.H:464
AMREX_FORCE_INLINE amrex::IntVect TileNoZ()
Definition: ERF_TileNoZ.H:11
@ gpy
Definition: ERF_IndexDefines.H:185
@ gpx
Definition: ERF_IndexDefines.H:184
@ ymom
Definition: ERF_IndexDefines.H:194
@ cons
Definition: ERF_IndexDefines.H:192
@ zmom
Definition: ERF_IndexDefines.H:195
@ xmom
Definition: ERF_IndexDefines.H:193
@ qt
Definition: ERF_Kessler.H:28
@ qv
Definition: ERF_Kessler.H:29
@ q
Definition: ERF_WSM6.H:169
Here is the call graph for this function: