ERF
Energy Research and Forecasting: An Atmospheric Modeling Code
ERF_Substep_T.cpp File Reference
Include dependency graph for ERF_Substep_T.cpp:

Functions

void erf_substep_T (int step, int, int level, int finest_level, Vector< MultiFab > &S_slow_rhs, const Vector< MultiFab > &S_prev, Vector< MultiFab > &S_stage_data, const MultiFab &S_stage_prim, const MultiFab &qt, const MultiFab &pi_stage, const MultiFab &fast_coeffs, Vector< MultiFab > &S_data, MultiFab &lagged_delta_rt, MultiFab &avg_xmom, MultiFab &avg_ymom, MultiFab &avg_zmom, const MultiFab &cc_src, const MultiFab &xmom_src, const MultiFab &ymom_src, const MultiFab &zmom_src, const Geometry geom, const Real gravity, std::unique_ptr< MultiFab > &z_phys_nd, std::unique_ptr< MultiFab > &detJ_cc, const Real dtau, const Real beta_s, const Real facinv, Vector< std::unique_ptr< MultiFab >> &mapfac, YAFluxRegister *fr_as_crse, YAFluxRegister *fr_as_fine, bool l_use_moisture, bool l_reflux, const Real *sinesq_stag_d, const Real l_damp_coef)
 

Function Documentation

◆ erf_substep_T()

void erf_substep_T ( int  step,
int  ,
int  level,
int  finest_level,
Vector< MultiFab > &  S_slow_rhs,
const Vector< MultiFab > &  S_prev,
Vector< MultiFab > &  S_stage_data,
const MultiFab &  S_stage_prim,
const MultiFab &  qt,
const MultiFab &  pi_stage,
const MultiFab &  fast_coeffs,
Vector< MultiFab > &  S_data,
MultiFab &  lagged_delta_rt,
MultiFab &  avg_xmom,
MultiFab &  avg_ymom,
MultiFab &  avg_zmom,
const MultiFab &  cc_src,
const MultiFab &  xmom_src,
const MultiFab &  ymom_src,
const MultiFab &  zmom_src,
const Geometry  geom,
const Real  gravity,
std::unique_ptr< MultiFab > &  z_phys_nd,
std::unique_ptr< MultiFab > &  detJ_cc,
const Real  dtau,
const Real  beta_s,
const Real  facinv,
Vector< std::unique_ptr< MultiFab >> &  mapfac,
YAFluxRegister *  fr_as_crse,
YAFluxRegister *  fr_as_fine,
bool  l_use_moisture,
bool  l_reflux,
const Real sinesq_stag_d,
const Real  l_damp_coef 
)

Function for computing the fast RHS with fixed-in-time terrain

Parameters
[in]stepwhich fast time step within each Runge-Kutta step
[in]nrkwhich Runge-Kutta step
[in]levellevel of resolution
[in]finest_levelfinest level of resolution
[in]S_slow_rhsslow RHS computed in erf_slow_rhs_pre
[in]S_prevprevious solution
[in]S_stage_datasolution at previous RK stage
[in]S_stage_primprimitive variables at previous RK stage
[in]pi_stageExner function at previous RK stage
[in]fast_coeffscoefficients for the tridiagonal solve used in the fast integrator
[out]S_datacurrent solution
[in,out]lagged_delta_rt
[in,out]avg_xmomtime-averaged x-momentum to be used for updating slow variables
[in,out]avg_ymomtime-averaged y-momentum to be used for updating slow variables
[in,out]avg_zmomtime-averaged z-momentum to be used for updating slow variables
[in]cc_srcsource terms for conserved variables
[in]xmom_srcsource terms for x-momentum
[in]ymom_srcsource terms for y-momentum
[in]zmom_srcsource terms for z-momentum
[in]geomcontainer for geometric information
[in]gravitymagnitude of gravity
[in]z_phys_ndheight coordinate at nodes
[in]detJ_ccJacobian of the metric transformation
[in]dtaufast time step
[in]beta_sCoefficient which determines how implicit vs explicit the solve is
[in]facinvinverse factor for time-averaging the momenta
[in]mapfacvector of map factors
[in,out]fr_as_crseYAFluxRegister at level l at level l / l+1 interface
[in,out]fr_as_fineYAFluxRegister at level l at level l-1 / l interface
[in]l_use_moisture
[in]l_refluxshould we add fluxes to the FluxRegisters?
[in]l_damp_coef
74 {
75  BL_PROFILE_REGION("erf_substep_T()");
76 
77  const Box& domain = geom.Domain();
78  auto const domlo = lbound(domain);
79  auto const domhi = ubound(domain);
80 
81  Real beta_1 = 0.5 * (1.0 - beta_s); // multiplies explicit terms
82  Real beta_2 = 0.5 * (1.0 + beta_s); // multiplies implicit terms
83 
84  // How much do we project forward the (rho theta) that is used in the horizontal momentum equations
85  Real beta_d = 0.1;
86 
87  Real RvOverRd = R_v / R_d;
88 
89  bool l_rayleigh_impl_for_w = (sinesq_stag_d != nullptr);
90 
91  const Real* dx = geom.CellSize();
92  const GpuArray<Real, AMREX_SPACEDIM> dxInv = geom.InvCellSizeArray();
93 
94  Real dxi = dxInv[0];
95  Real dyi = dxInv[1];
96  Real dzi = dxInv[2];
97  const auto& ba = S_stage_data[IntVars::cons].boxArray();
98  const auto& dm = S_stage_data[IntVars::cons].DistributionMap();
99 
100  MultiFab Delta_rho_u( convert(ba,IntVect(1,0,0)), dm, 1, 1);
101  MultiFab Delta_rho_v( convert(ba,IntVect(0,1,0)), dm, 1, 1);
102  MultiFab Delta_rho_w( convert(ba,IntVect(0,0,1)), dm, 1, IntVect(1,1,0));
103  MultiFab Delta_rho ( ba , dm, 1, 1);
104  MultiFab Delta_rho_theta( ba , dm, 1, 1);
105 
106  MultiFab New_rho_u(convert(ba,IntVect(1,0,0)), dm, 1, 1);
107  MultiFab New_rho_v(convert(ba,IntVect(0,1,0)), dm, 1, 1);
108 
109  MultiFab coeff_A_mf(fast_coeffs, make_alias, 0, 1);
110  MultiFab inv_coeff_B_mf(fast_coeffs, make_alias, 1, 1);
111  MultiFab coeff_C_mf(fast_coeffs, make_alias, 2, 1);
112  MultiFab coeff_P_mf(fast_coeffs, make_alias, 3, 1);
113  MultiFab coeff_Q_mf(fast_coeffs, make_alias, 4, 1);
114 
115  // *************************************************************************
116  // Set gravity as a vector
117  const Array<Real,AMREX_SPACEDIM> grav{0.0, 0.0, -gravity};
118  const GpuArray<Real,AMREX_SPACEDIM> grav_gpu{grav[0], grav[1], grav[2]};
119 
120  MultiFab extrap(S_data[IntVars::cons].boxArray(),S_data[IntVars::cons].DistributionMap(),1,1);
121 
122  // *************************************************************************
123  // First set up some arrays we'll need
124  // *************************************************************************
125 
126 #ifdef _OPENMP
127 #pragma omp parallel if (Gpu::notInLaunchRegion())
128 #endif
129  for ( MFIter mfi(S_stage_data[IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
130  {
131  const Array4<Real> & cur_cons = S_data[IntVars::cons].array(mfi);
132  const Array4<const Real>& prev_cons = S_prev[IntVars::cons].const_array(mfi);
133  const Array4<const Real>& stage_cons = S_stage_data[IntVars::cons].const_array(mfi);
134  const Array4<Real>& lagged_arr = lagged_delta_rt.array(mfi);
135 
136  const Array4<Real>& old_drho = Delta_rho.array(mfi);
137  const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
138  const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
139  const Array4<Real>& old_drho_w = Delta_rho_w.array(mfi);
140  const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
141 
142  const Array4<const Real>& prev_xmom = S_prev[IntVars::xmom].const_array(mfi);
143  const Array4<const Real>& prev_ymom = S_prev[IntVars::ymom].const_array(mfi);
144  const Array4<const Real>& prev_zmom = S_prev[IntVars::zmom].const_array(mfi);
145 
146  const Array4<const Real>& stage_xmom = S_stage_data[IntVars::xmom].const_array(mfi);
147  const Array4<const Real>& stage_ymom = S_stage_data[IntVars::ymom].const_array(mfi);
148  const Array4<const Real>& stage_zmom = S_stage_data[IntVars::zmom].const_array(mfi);
149 
150  Box bx = mfi.validbox();
151  Box gbx = mfi.tilebox(); gbx.grow(1);
152 
153  if (step == 0) {
154  ParallelFor(gbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
155  cur_cons(i,j,k,Rho_comp) = prev_cons(i,j,k,Rho_comp);
156  cur_cons(i,j,k,RhoTheta_comp) = prev_cons(i,j,k,RhoTheta_comp);
157  });
158  } // step = 0
159 
160  Box gtbx = mfi.nodaltilebox(0); gtbx.grow(IntVect(1,1,0));
161  Box gtby = mfi.nodaltilebox(1); gtby.grow(IntVect(1,1,0));
162  Box gtbz = mfi.nodaltilebox(2); gtbz.grow(IntVect(1,1,0));
163 
164  const auto& bx_lo = lbound(bx);
165  const auto& bx_hi = ubound(bx);
166 
167  ParallelFor(gtbx, gtby, gtbz,
168  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
169  old_drho_u(i,j,k) = prev_xmom(i,j,k) - stage_xmom(i,j,k);
170  if (k == bx_lo.z && k != domlo.z) {
171  old_drho_u(i,j,k-1) = old_drho_u(i,j,k);
172  } else if (k == bx_hi.z) {
173  old_drho_u(i,j,k+1) = old_drho_u(i,j,k);
174  }
175  },
176  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
177  old_drho_v(i,j,k) = prev_ymom(i,j,k) - stage_ymom(i,j,k);
178  if (k == bx_lo.z && k != domlo.z) {
179  old_drho_v(i,j,k-1) = old_drho_v(i,j,k);
180  } else if (k == bx_hi.z) {
181  old_drho_v(i,j,k+1) = old_drho_v(i,j,k);
182  }
183  },
184  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
185  old_drho_w(i,j,k) = prev_zmom(i,j,k) - stage_zmom(i,j,k);
186  });
187 
188  const Array4<Real>& theta_extrap = extrap.array(mfi);
189  const Array4<const Real>& prim = S_stage_prim.const_array(mfi);
190 
191  ParallelFor(gbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
192  old_drho(i,j,k) = cur_cons(i,j,k,Rho_comp) - stage_cons(i,j,k,Rho_comp);
193  old_drho_theta(i,j,k) = cur_cons(i,j,k,RhoTheta_comp) - stage_cons(i,j,k,RhoTheta_comp);
194  if (step == 0) {
195  theta_extrap(i,j,k) = old_drho_theta(i,j,k);
196  } else {
197  theta_extrap(i,j,k) = old_drho_theta(i,j,k) + beta_d *
198  ( old_drho_theta(i,j,k) - lagged_arr(i,j,k) );
199  }
200 
201  // NOTE: qv is not changing over the fast steps so we use the stage data
202  Real qv = (l_use_moisture) ? prim(i,j,k,PrimQ1_comp) : 0.0;
203  theta_extrap(i,j,k) *= (1.0 + RvOverRd*qv);
204  });
205  } // mfi
206 
207 #ifdef _OPENMP
208 #pragma omp parallel if (Gpu::notInLaunchRegion())
209 #endif
210  for ( MFIter mfi(S_stage_data[IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
211  {
212  // We define lagged_delta_rt for our next step as the current delta_rt
213  Box gbx = mfi.tilebox(); gbx.grow(1);
214  const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
215  const Array4<Real>& lagged_arr = lagged_delta_rt.array(mfi);
216  ParallelFor(gbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
217  lagged_arr(i,j,k) = old_drho_theta(i,j,k);
218  });
219  } // mfi
220 
221  // *************************************************************************
222  // Define updates in the current RK stage
223  // *************************************************************************
224 
225 #ifdef _OPENMP
226 #pragma omp parallel if (Gpu::notInLaunchRegion())
227 #endif
228  for ( MFIter mfi(S_stage_data[IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
229  {
230  Box bx = mfi.validbox();
231  Box tbx = mfi.nodaltilebox(0);
232  Box tby = mfi.nodaltilebox(1);
233 
234  const Array4<Real const>& xmom_src_arr = xmom_src.const_array(mfi);
235  const Array4<Real const>& ymom_src_arr = ymom_src.const_array(mfi);
236 
237  const Array4<const Real> & stage_xmom = S_stage_data[IntVars::xmom].const_array(mfi);
238  const Array4<const Real> & stage_ymom = S_stage_data[IntVars::ymom].const_array(mfi);
239  const Array4<const Real> & qt_arr = qt.const_array(mfi);
240 
241  const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
242  const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
243 
244  const Array4<const Real>& slow_rhs_rho_u = S_slow_rhs[IntVars::xmom].const_array(mfi);
245  const Array4<const Real>& slow_rhs_rho_v = S_slow_rhs[IntVars::ymom].const_array(mfi);
246 
247  const Array4<Real>& new_drho_u = New_rho_u.array(mfi);
248  const Array4<Real>& new_drho_v = New_rho_v.array(mfi);
249 
250  const Array4<Real>& cur_xmom = S_data[IntVars::xmom].array(mfi);
251  const Array4<Real>& cur_ymom = S_data[IntVars::ymom].array(mfi);
252 
253  // These store the advection momenta which we will use to update the slow variables
254  const Array4<Real>& avg_xmom_arr = avg_xmom.array(mfi);
255  const Array4<Real>& avg_ymom_arr = avg_ymom.array(mfi);
256 
257  const Array4<const Real>& z_nd = z_phys_nd->const_array(mfi);
258 
259  const Array4<const Real>& pi_stage_ca = pi_stage.const_array(mfi);
260 
261  const Array4<Real>& theta_extrap = extrap.array(mfi);
262 
263  // Map factors
264  const Array4<const Real>& mf_ux = mapfac[MapFacType::u_x]->const_array(mfi);
265  const Array4<const Real>& mf_vy = mapfac[MapFacType::v_y]->const_array(mfi);
266 
267  // Create old_drho_u/v/w/theta = U'', V'', W'', Theta'' in the docs
268  // Note that we do the Copy and Subtract including one ghost cell
269  // so that we don't have to fill ghost cells of the new MultiFabs
270  // Initialize New_rho_u/v/w to Delta_rho_u/v/w so that
271  // the ghost cells in New_rho_u/v/w will match old_drho_u/v/w
272 
273  // *********************************************************************
274  // Define updates in the RHS of {x, y, z}-momentum equations
275  // *********************************************************************
276  {
277  BL_PROFILE("substep_xymom_T");
278 
279  const auto& bx_lo = lbound(bx);
280  const auto& bx_hi = ubound(bx);
281 
282  ParallelFor(tbx, tby,
283  [=] AMREX_GPU_DEVICE (int i, int j, int k)
284  {
285  // Add (negative) gradient of (rho theta) multiplied by lagged "pi"
286  Real met_h_xi = Compute_h_xi_AtIface (i, j, k, dxInv, z_nd);
287  Real met_h_zeta = Compute_h_zeta_AtIface(i, j, k, dxInv, z_nd);
288  Real gp_xi = (theta_extrap(i,j,k) - theta_extrap(i-1,j,k)) * dxi;
289  Real gp_zeta_on_iface = (k == 0) ?
290  0.5 * dzi * ( theta_extrap(i-1,j,k+1) + theta_extrap(i,j,k+1)
291  -theta_extrap(i-1,j,k ) - theta_extrap(i,j,k ) ) :
292  0.25 * dzi * ( theta_extrap(i-1,j,k+1) + theta_extrap(i,j,k+1)
293  -theta_extrap(i-1,j,k-1) - theta_extrap(i,j,k-1) );
294  Real gpx = gp_xi - (met_h_xi / met_h_zeta) * gp_zeta_on_iface;
295  gpx *= mf_ux(i,j,0);
296 
297  Real q = (l_use_moisture) ? 0.5 * (qt_arr(i,j,k) + qt_arr(i-1,j,k)) : 0.0;
298 
299  Real pi_c = 0.5 * (pi_stage_ca(i-1,j,k,0) + pi_stage_ca(i ,j,k,0));
300  Real fast_rhs_rho_u = -Gamma * R_d * pi_c * gpx / (1.0 + q);
301 
302  new_drho_u(i, j, k) = old_drho_u(i,j,k) + dtau * fast_rhs_rho_u
303  + dtau * slow_rhs_rho_u(i,j,k)
304  + dtau * xmom_src_arr(i,j,k);
305  if (k == bx_lo.z && k != domlo.z) {
306  new_drho_u(i,j,k-1) = new_drho_u(i,j,k);
307  } else if (k == bx_hi.z) {
308  new_drho_u(i,j,k+1) = new_drho_u(i,j,k);
309  }
310 
311  avg_xmom_arr(i,j,k) += facinv*new_drho_u(i,j,k);
312 
313  cur_xmom(i,j,k) = stage_xmom(i,j,k) + new_drho_u(i,j,k);
314  },
315  [=] AMREX_GPU_DEVICE (int i, int j, int k)
316  {
317  // Add (negative) gradient of (rho theta) multiplied by lagged "pi"
318  Real met_h_eta = Compute_h_eta_AtJface(i, j, k, dxInv, z_nd);
319  Real met_h_zeta = Compute_h_zeta_AtJface(i, j, k, dxInv, z_nd);
320  Real gp_eta = (theta_extrap(i,j,k) -theta_extrap(i,j-1,k)) * dyi;
321  Real gp_zeta_on_jface = (k == 0) ?
322  0.5 * dzi * ( theta_extrap(i,j,k+1) + theta_extrap(i,j-1,k+1)
323  -theta_extrap(i,j,k ) - theta_extrap(i,j-1,k ) ) :
324  0.25 * dzi * ( theta_extrap(i,j,k+1) + theta_extrap(i,j-1,k+1)
325  -theta_extrap(i,j,k-1) - theta_extrap(i,j-1,k-1) );
326  Real gpy = gp_eta - (met_h_eta / met_h_zeta) * gp_zeta_on_jface;
327  gpy *= mf_vy(i,j,0);
328 
329  Real q = (l_use_moisture) ? 0.5 * (qt_arr(i,j,k) + qt_arr(i,j-1,k)) : 0.0;
330 
331  Real pi_c = 0.5 * (pi_stage_ca(i,j-1,k,0) + pi_stage_ca(i,j ,k,0));
332  Real fast_rhs_rho_v = -Gamma * R_d * pi_c * gpy / (1.0 + q);
333 
334  new_drho_v(i, j, k) = old_drho_v(i,j,k) + dtau * fast_rhs_rho_v
335  + dtau * slow_rhs_rho_v(i,j,k)
336  + dtau * ymom_src_arr(i,j,k);
337 
338  if (k == bx_lo.z && k != domlo.z) {
339  new_drho_v(i,j,k-1) = new_drho_v(i,j,k);
340  } else if (k == bx_hi.z) {
341  new_drho_v(i,j,k+1) = new_drho_v(i,j,k);
342  }
343 
344  avg_ymom_arr(i,j,k) += facinv*new_drho_v(i,j,k);
345 
346  cur_ymom(i,j,k) = stage_ymom(i,j,k) + new_drho_v(i,j,k);
347  });
348  } // end profile
349  }
350 
351  MultiFab Omega(S_data[IntVars::zmom].boxArray(), dm, 1, 1);
352 
353 #ifdef _OPENMP
354 #pragma omp parallel if (Gpu::notInLaunchRegion())
355 #endif
356  {
357  std::array<FArrayBox,AMREX_SPACEDIM> flux;
358  for ( MFIter mfi(S_stage_data[IntVars::cons],TileNoZ()); mfi.isValid(); ++mfi)
359  {
360  Box bx = mfi.tilebox();
361  Box tbz = surroundingNodes(bx,2);
362 
363  Box vbx = mfi.validbox();
364  const auto& vbx_hi = ubound(vbx);
365 
366  const Array4<Real const>& zmom_src_arr = zmom_src.const_array(mfi);
367  const Array4<Real const>& cc_src_arr = cc_src.const_array(mfi);
368 
369  const Array4<const Real> & stage_zmom = S_stage_data[IntVars::zmom].const_array(mfi);
370  const Array4<const Real> & prim = S_stage_prim.const_array(mfi);
371  const Array4<const Real> & qt_arr = qt.const_array(mfi);
372 
373  const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
374  const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
375  const Array4<Real>& old_drho_w = Delta_rho_w.array(mfi);
376  const Array4<Real>& old_drho = Delta_rho.array(mfi);
377  const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
378 
379  const Array4<const Real>& slow_rhs_cons = S_slow_rhs[IntVars::cons].const_array(mfi);
380  const Array4<const Real>& slow_rhs_rho_w = S_slow_rhs[IntVars::zmom].const_array(mfi);
381 
382  const Array4<Real>& new_drho_u = New_rho_u.array(mfi);
383  const Array4<Real>& new_drho_v = New_rho_v.array(mfi);
384 
385  const Array4<Real>& cur_cons = S_data[IntVars::cons].array(mfi);
386  const Array4<Real>& cur_zmom = S_data[IntVars::zmom].array(mfi);
387 
388  // These store the advection momenta which we will use to update the slow variables
389  const Array4<Real>& avg_zmom_arr = avg_zmom.array(mfi);
390 
391  const Array4<const Real>& z_nd = z_phys_nd->const_array(mfi);
392  const Array4<const Real>& detJ = detJ_cc->const_array(mfi);
393 
394  const Array4< Real>& omega_arr = Omega.array(mfi);
395 
396  // Map factors
397  const Array4<const Real>& mf_mx = mapfac[MapFacType::m_x]->const_array(mfi);
398  const Array4<const Real>& mf_my = mapfac[MapFacType::m_y]->const_array(mfi);
399  const Array4<const Real>& mf_ux = mapfac[MapFacType::u_x]->const_array(mfi);
400  const Array4<const Real>& mf_uy = mapfac[MapFacType::u_y]->const_array(mfi);
401  const Array4<const Real>& mf_vx = mapfac[MapFacType::v_x]->const_array(mfi);
402  const Array4<const Real>& mf_vy = mapfac[MapFacType::v_y]->const_array(mfi);
403 
404  // Create old_drho_u/v/w/theta = U'', V'', W'', Theta'' in the docs
405  // Note that we do the Copy and Subtract including one ghost cell
406  // so that we don't have to fill ghost cells of the new MultiFabs
407  // Initialize New_rho_u/v/w to Delta_rho_u/v/w so that
408  // the ghost cells in New_rho_u/v/w will match old_drho_u/v/w
409 
410  FArrayBox temp_rhs_fab;
411  FArrayBox RHS_fab;
412  FArrayBox soln_fab;
413 
414  RHS_fab.resize (tbz,1,The_Async_Arena());
415  soln_fab.resize (tbz,1,The_Async_Arena());
416  temp_rhs_fab.resize(tbz,2,The_Async_Arena());
417 
418  auto const& RHS_a = RHS_fab.array();
419  auto const& soln_a = soln_fab.array();
420  auto const& temp_rhs_arr = temp_rhs_fab.array();
421 
422  auto const& coeffA_a = coeff_A_mf.array(mfi);
423  auto const& inv_coeffB_a = inv_coeff_B_mf.array(mfi);
424  auto const& coeffC_a = coeff_C_mf.array(mfi);
425  auto const& coeffP_a = coeff_P_mf.array(mfi);
426  auto const& coeffQ_a = coeff_Q_mf.array(mfi);
427 
428  // *************************************************************************
429  // Define flux arrays for use in advection
430  // *************************************************************************
431  for (int dir = 0; dir < AMREX_SPACEDIM; ++dir) {
432  flux[dir].resize(surroundingNodes(bx,dir),2,The_Async_Arena());
433  flux[dir].setVal<RunOn::Device>(0.);
434  }
435  const GpuArray<const Array4<Real>, AMREX_SPACEDIM>
436  flx_arr{{AMREX_D_DECL(flux[0].array(), flux[1].array(), flux[2].array())}};
437 
438  // *********************************************************************
439  {
440  BL_PROFILE("fast_T_making_rho_rhs");
441  ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
442  Real h_zeta_cc_xface_hi = 0.5 * dzi *
443  ( z_nd(i+1,j ,k+1) + z_nd(i+1,j+1,k+1)
444  -z_nd(i+1,j ,k ) - z_nd(i+1,j+1,k ) );
445 
446  Real h_zeta_cc_xface_lo = 0.5 * dzi *
447  ( z_nd(i ,j ,k+1) + z_nd(i ,j+1,k+1)
448  -z_nd(i ,j ,k ) - z_nd(i ,j+1,k ) );
449 
450  Real h_zeta_cc_yface_hi = 0.5 * dzi *
451  ( z_nd(i ,j+1,k+1) + z_nd(i+1,j+1,k+1)
452  -z_nd(i ,j+1,k ) - z_nd(i+1,j+1,k ) );
453 
454  Real h_zeta_cc_yface_lo = 0.5 * dzi *
455  ( z_nd(i ,j ,k+1) + z_nd(i+1,j ,k+1)
456  -z_nd(i ,j ,k ) - z_nd(i+1,j ,k ) );
457 
458  Real xflux_lo = new_drho_u(i ,j,k)*h_zeta_cc_xface_lo / mf_uy(i ,j,0);
459  Real xflux_hi = new_drho_u(i+1,j,k)*h_zeta_cc_xface_hi / mf_uy(i+1,j,0);
460  Real yflux_lo = new_drho_v(i,j ,k)*h_zeta_cc_yface_lo / mf_vx(i,j ,0);
461  Real yflux_hi = new_drho_v(i,j+1,k)*h_zeta_cc_yface_hi / mf_vx(i,j+1,0);
462 
463  Real mfsq = mf_mx(i,j,0) * mf_my(i,j,0);
464 
465  // NOTE: we are saving the (1/J) weighting for later when we add this to rho and theta
466  temp_rhs_arr(i,j,k,0) = ( xflux_hi - xflux_lo ) * dxi * mfsq +
467  ( yflux_hi - yflux_lo ) * dyi * mfsq;
468  temp_rhs_arr(i,j,k,1) = (( xflux_hi * (prim(i,j,k,0) + prim(i+1,j,k,0)) -
469  xflux_lo * (prim(i,j,k,0) + prim(i-1,j,k,0)) ) * dxi * mfsq+
470  ( yflux_hi * (prim(i,j,k,0) + prim(i,j+1,k,0)) -
471  yflux_lo * (prim(i,j,k,0) + prim(i,j-1,k,0)) ) * dyi * mfsq) * 0.5;
472 
473  if (l_reflux) {
474  (flx_arr[0])(i,j,k,0) = xflux_lo;
475  (flx_arr[0])(i,j,k,1) = (flx_arr[0])(i ,j,k,0) * 0.5 * (prim(i,j,k,0) + prim(i-1,j,k,0));
476 
477  (flx_arr[1])(i,j,k,0) = yflux_lo;
478  (flx_arr[1])(i,j,k,1) = (flx_arr[1])(i,j ,k,0) * 0.5 * (prim(i,j,k,0) + prim(i,j-1,k,0));
479 
480  if (i == vbx_hi.x) {
481  (flx_arr[0])(i+1,j,k,0) = xflux_hi;
482  (flx_arr[0])(i+1,j,k,1) = (flx_arr[0])(i+1,j,k,0) * 0.5 * (prim(i,j,k,0) + prim(i+1,j,k,0));
483  }
484  if (j == vbx_hi.y) {
485  (flx_arr[1])(i,j+1,k,0) = yflux_hi;
486  (flx_arr[1])(i,j+1,k,1) = (flx_arr[1])(i,j+1,k,0) * 0.5 * (prim(i,j,k,0) + prim(i,j+1,k,0));
487  }
488  }
489  });
490  } // end profile
491 
492  // *********************************************************************
493  {
494  Box gbxo = mfi.nodaltilebox(2);
495  Box gbxo_mid = gbxo;
496 
497  if (gbxo.smallEnd(2) == domlo.z) {
498  Box gbxo_lo = gbxo; gbxo_lo.setBig(2,gbxo.smallEnd(2));
499  gbxo_mid.setSmall(2,gbxo.smallEnd(2)+1);
500  ParallelFor(gbxo_lo, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
501  omega_arr(i,j,k) = 0.;
502  });
503  }
504  if (gbxo.bigEnd(2) == domhi.z+1) {
505  Box gbxo_hi = gbxo; gbxo_hi.setSmall(2,gbxo.bigEnd(2));
506  gbxo_mid.setBig(2,gbxo.bigEnd(2)-1);
507  ParallelFor(gbxo_hi, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
508  omega_arr(i,j,k) = old_drho_w(i,j,k);
509  });
510  }
511  ParallelFor(gbxo_mid, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
512  omega_arr(i,j,k) = OmegaFromW(i,j,k,old_drho_w(i,j,k),
513  old_drho_u,old_drho_v,
514  mf_ux,mf_vy,z_nd,dxInv);
515  });
516  } // end profile
517  // *********************************************************************
518 
519  Box bx_shrunk_in_k = bx;
520  int klo = tbz.smallEnd(2);
521  int khi = tbz.bigEnd(2);
522  bx_shrunk_in_k.setSmall(2,klo+1);
523  bx_shrunk_in_k.setBig(2,khi-1);
524 
525  // Note that the notes use "g" to mean the magnitude of gravity, so it is positive
526  // We set grav_gpu[2] to be the vector component which is negative
527  // We define halfg to match the notes (which is why we take the absolute value)
528  Real halfg = std::abs(0.5 * grav_gpu[2]);
529 
530  {
531  BL_PROFILE("fast_loop_on_shrunk_t");
532  //Note we don't act on the bottom or top boundaries of the domain
533  ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (int i, int j, int k)
534  {
535  Real q = (l_use_moisture) ? 0.5 * (qt_arr(i,j,k) + qt_arr(i,j,k-1)) : 0.0;
536 
537  Real coeff_P = coeffP_a(i,j,k) / (1.0 + q);
538  Real coeff_Q = coeffQ_a(i,j,k) / (1.0 + q);
539 
540  Real theta_t_lo = 0.5 * ( prim(i,j,k-2,PrimTheta_comp) + prim(i,j,k-1,PrimTheta_comp) );
541  Real theta_t_mid = 0.5 * ( prim(i,j,k-1,PrimTheta_comp) + prim(i,j,k ,PrimTheta_comp) );
542  Real theta_t_hi = 0.5 * ( prim(i,j,k ,PrimTheta_comp) + prim(i,j,k+1,PrimTheta_comp) );
543 
544  // line 2 last two terms (order dtau)
545  Real R0_tmp = -halfg * old_drho(i,j,k ) + coeff_P * old_drho_theta(i,j,k )
546  -halfg * old_drho(i,j,k-1) + coeff_Q * old_drho_theta(i,j,k-1);
547 
548  // line 3 residuals (order dtau^2) 1.0 <-> beta_2
549  Real R1_tmp = -halfg * ( slow_rhs_cons(i,j,k ,Rho_comp) + slow_rhs_cons(i,j,k-1,Rho_comp) )
550  + coeff_P * slow_rhs_cons(i,j,k ,RhoTheta_comp)
551  + coeff_Q * slow_rhs_cons(i,j,k-1,RhoTheta_comp);
552 
553  Real Omega_kp1 = omega_arr(i,j,k+1);
554  Real Omega_k = omega_arr(i,j,k );
555  Real Omega_km1 = omega_arr(i,j,k-1);
556 
557  Real detJdiff = (detJ(i,j,k) - detJ(i,j,k-1)) / (detJ(i,j,k)*detJ(i,j,k-1));
558 
559  // consolidate lines 4&5 (order dtau^2)
560  R1_tmp += halfg * ( beta_1 * dzi * (Omega_kp1/detJ(i,j,k) + detJdiff*Omega_k - Omega_km1/detJ(i,j,k-1))
561  + temp_rhs_arr(i,j,k,Rho_comp)/detJ(i,j,k) + temp_rhs_arr(i,j,k-1,Rho_comp)/detJ(i,j,k-1) );
562 
563  // consolidate lines 6&7 (order dtau^2)
564  R1_tmp += -( coeff_P/detJ(i,j,k ) * ( beta_1 * dzi * (Omega_kp1*theta_t_hi - Omega_k*theta_t_mid) + temp_rhs_arr(i,j,k ,RhoTheta_comp) )
565  + coeff_Q/detJ(i,j,k-1) * ( beta_1 * dzi * (Omega_k*theta_t_mid - Omega_km1*theta_t_lo) + temp_rhs_arr(i,j,k-1,RhoTheta_comp) ) );
566 
567  // line 1
568  RHS_a(i,j,k) = old_drho_w(i,j,k)
569  + dtau * (slow_rhs_rho_w(i,j,k) + zmom_src_arr(i,j,k) + R0_tmp + dtau*beta_2*R1_tmp);
570 
571  // We cannot use omega_arr here since that was built with old_rho_u and old_rho_v ...
572  RHS_a(i,j,k) += OmegaFromW(i,j,k,0.,
573  new_drho_u,new_drho_v,
574  mf_ux,mf_vy,z_nd,dxInv);
575  });
576  } // end profile
577 
578  Box b2d = tbz; // Copy constructor
579  b2d.setRange(2,0);
580 
581  auto const lo = lbound(bx);
582  auto const hi = ubound(bx);
583 
584  {
585  BL_PROFILE("substep_b2d_loop_t");
586 #ifdef AMREX_USE_GPU
587  ParallelFor(b2d, [=] AMREX_GPU_DEVICE (int i, int j, int)
588  {
589  // w_klo, w_khi given by specified Dirichlet values
590  RHS_a(i,j,lo.z ) = dtau * (slow_rhs_rho_w(i,j,lo.z) + zmom_src_arr(i,j,lo.z));
591  RHS_a(i,j,hi.z+1) = dtau * (slow_rhs_rho_w(i,j,hi.z+1) + zmom_src_arr(i,j,hi.z+1));
592 
593  // w = specified Dirichlet value at k = lo.z
594  soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
595 
596  for (int k = lo.z+1; k <= hi.z+1; k++) {
597  soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
598  }
599 
600  cur_zmom(i,j,lo.z ) = stage_zmom(i,j,lo.z ) + soln_a(i,j,lo.z );
601  cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
602 
603  for (int k = hi.z; k >= lo.z; k--) {
604  soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) *soln_a(i,j,k+1);
605  }
606  });
607 #else
608  for (int j = lo.y; j <= hi.y; ++j) {
609  AMREX_PRAGMA_SIMD
610  for (int i = lo.x; i <= hi.x; ++i)
611  {
612  RHS_a(i,j,lo.z) = dtau * (slow_rhs_rho_w(i,j,lo.z) + zmom_src_arr(i,j,lo.z));
613  soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
614  }
615 
616  AMREX_PRAGMA_SIMD
617  for (int i = lo.x; i <= hi.x; ++i)
618  {
619  RHS_a(i,j,hi.z+1) = dtau * (slow_rhs_rho_w(i,j,hi.z+1) + zmom_src_arr(i,j,hi.z+1));
620  soln_a(i,j,hi.z+1) = RHS_a(i,j,hi.z+1) * inv_coeffB_a(i,j,hi.z+1);
621  }
622  }
623 
624  for (int k = lo.z+1; k <= hi.z; ++k) {
625  for (int j = lo.y; j <= hi.y; ++j) {
626  AMREX_PRAGMA_SIMD
627  for (int i = lo.x; i <= hi.x; ++i) {
628  soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
629  }
630  }
631  }
632  for (int k = hi.z; k > lo.z; --k) {
633  for (int j = lo.y; j <= hi.y; ++j) {
634  AMREX_PRAGMA_SIMD
635  for (int i = lo.x; i <= hi.x; ++i) {
636  soln_a(i,j,k) -= (coeffC_a(i,j,k) * inv_coeffB_a(i,j,k)) * soln_a(i,j,k+1);
637  }
638  }
639  }
640  if (hi.z == domhi.z) {
641  for (int j = lo.y; j <= hi.y; ++j) {
642  AMREX_PRAGMA_SIMD
643  for (int i = lo.x; i <= hi.x; ++i) {
644  cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
645  }
646  }
647  }
648 #endif
649  } // end profile
650 
651  ParallelFor(tbz, [=] AMREX_GPU_DEVICE (int i, int j, int k)
652  {
653  cur_zmom(i,j,k) = stage_zmom(i,j,k);
654  });
655 
656  if (lo.z == domlo.z) {
657  tbz.setSmall(2,domlo.z+1);
658  }
659  if (hi.z == domhi.z) {
660  tbz.setBig(2,domhi.z);
661  }
662  ParallelFor(tbz, [=] AMREX_GPU_DEVICE (int i, int j, int k)
663  {
664  Real wpp = WFromOmega(i,j,k,soln_a(i,j,k),
665  new_drho_u,new_drho_v,
666  mf_ux,mf_vy,z_nd,dxInv);
667 
668  cur_zmom(i,j,k) += wpp;
669 
670  if (l_rayleigh_impl_for_w) {
671  Real damping_coeff = l_damp_coef * dtau * sinesq_stag_d[k];
672  cur_zmom(i,j,k) /= (1.0 + damping_coeff);
673  }
674  });
675 
676  // **************************************************************************
677  // Define updates in the RHS of rho and (rho theta)
678  // **************************************************************************
679  {
680  BL_PROFILE("fast_rho_final_update");
681  ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
682  {
683  Real zflux_lo = beta_2 * soln_a(i,j,k ) + beta_1 * omega_arr(i,j,k);
684  Real zflux_hi = beta_2 * soln_a(i,j,k+1) + beta_1 * omega_arr(i,j,k+1);
685 
686  // Note that in the solve we effectively impose new_drho_w(i,j,vbx_hi.z+1)=0
687  // so we don't update avg_zmom at k=vbx_hi.z+1
688  avg_zmom_arr(i,j,k) += facinv*zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0));
689  if (l_reflux) {
690  (flx_arr[2])(i,j,k,0) = zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0));
691  }
692 
693  if (k == vbx_hi.z) {
694  avg_zmom_arr(i,j,k+1) += facinv * zflux_hi / (mf_mx(i,j,0) * mf_my(i,j,0));
695  if (l_reflux) {
696  (flx_arr[2])(i,j,k+1,0) = zflux_hi / (mf_mx(i,j,0) * mf_my(i,j,0));
697  (flx_arr[2])(i,j,k+1,1) = (flx_arr[2])(i,j,k+1,0) * 0.5 * (prim(i,j,k) + prim(i,j,k+1));
698  }
699  }
700 
701  Real fast_rhs_rho = -(temp_rhs_arr(i,j,k,0) + ( zflux_hi - zflux_lo ) * dzi) / detJ(i,j,k);
702  cur_cons(i,j,k,0) += dtau * (slow_rhs_cons(i,j,k,0) + fast_rhs_rho);
703 
704  Real fast_rhs_rhotheta = -( temp_rhs_arr(i,j,k,1) + 0.5 *
705  ( zflux_hi * (prim(i,j,k) + prim(i,j,k+1)) -
706  zflux_lo * (prim(i,j,k) + prim(i,j,k-1)) ) * dzi ) / detJ(i,j,k);
707 
708  cur_cons(i,j,k,1) += dtau * (slow_rhs_cons(i,j,k,1) + fast_rhs_rhotheta);
709 
710  if (l_reflux) {
711  (flx_arr[2])(i,j,k,1) = (flx_arr[2])(i,j,k,0) * 0.5 * (prim(i,j,k) + prim(i,j,k-1));
712  }
713 
714  // add in source terms for cell-centered conserved variables
715  cur_cons(i,j,k,Rho_comp) += dtau * cc_src_arr(i,j,k,Rho_comp);
716  cur_cons(i,j,k,RhoTheta_comp) += dtau * cc_src_arr(i,j,k,RhoTheta_comp);
717  });
718  } // end profile
719 
720  // We only add to the flux registers in the final RK step
721  if (l_reflux) {
722  int strt_comp_reflux = 0;
723  // For now we don't reflux (rho theta) because it seems to create issues at c/f boundaries
724  int num_comp_reflux = 1;
725  if (level < finest_level) {
726  fr_as_crse->CrseAdd(mfi,
727  {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
728  dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
729  }
730  if (level > 0) {
731  fr_as_fine->FineAdd(mfi,
732  {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
733  dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
734  }
735 
736  // This is necessary here so we don't go on to the next FArrayBox without
737  // having finished copying the fluxes into the FluxRegisters (since the fluxes
738  // are stored in temporary FArrayBox's)
739  Gpu::streamSynchronize();
740 
741  } // two-way coupling
742  } // mfi
743  } // OMP
744 }
constexpr amrex::Real R_v
Definition: ERF_Constants.H:11
constexpr amrex::Real R_d
Definition: ERF_Constants.H:10
constexpr amrex::Real Gamma
Definition: ERF_Constants.H:19
@ v_x
Definition: ERF_DataStruct.H:23
@ u_y
Definition: ERF_DataStruct.H:24
@ v_y
Definition: ERF_DataStruct.H:24
@ m_y
Definition: ERF_DataStruct.H:24
@ u_x
Definition: ERF_DataStruct.H:23
@ m_x
Definition: ERF_DataStruct.H:23
#define PrimQ1_comp
Definition: ERF_IndexDefines.H:53
#define Rho_comp
Definition: ERF_IndexDefines.H:36
#define RhoTheta_comp
Definition: ERF_IndexDefines.H:37
#define PrimTheta_comp
Definition: ERF_IndexDefines.H:50
amrex::Real Real
Definition: ERF_ShocInterface.H:19
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real OmegaFromW(int &i, int &j, int &k, amrex::Real w, const amrex::Array4< const amrex::Real > &u_arr, const amrex::Array4< const amrex::Real > &v_arr, const amrex::Array4< const amrex::Real > &mf_u, const amrex::Array4< const amrex::Real > &mf_v, const amrex::Array4< const amrex::Real > &z_nd, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &dxInv)
Definition: ERF_TerrainMetrics.H:412
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_xi_AtIface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:115
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_zeta_AtIface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:102
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_zeta_AtJface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:142
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_eta_AtJface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:168
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real WFromOmega(int &i, int &j, int &k, amrex::Real omega, const amrex::Array4< const amrex::Real > &u_arr, const amrex::Array4< const amrex::Real > &v_arr, const amrex::Array4< const amrex::Real > &mf_u, const amrex::Array4< const amrex::Real > &mf_v, const amrex::Array4< const amrex::Real > &z_nd, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &dxInv)
Definition: ERF_TerrainMetrics.H:462
AMREX_FORCE_INLINE amrex::IntVect TileNoZ()
Definition: ERF_TileNoZ.H:11
@ gpy
Definition: ERF_IndexDefines.H:151
@ gpx
Definition: ERF_IndexDefines.H:150
@ ymom
Definition: ERF_IndexDefines.H:160
@ cons
Definition: ERF_IndexDefines.H:158
@ zmom
Definition: ERF_IndexDefines.H:161
@ xmom
Definition: ERF_IndexDefines.H:159
@ qt
Definition: ERF_Kessler.H:27
@ qv
Definition: ERF_Kessler.H:28
Here is the call graph for this function: