ERF
Energy Research and Forecasting: An Atmospheric Modeling Code
ERF_Substep_MT.cpp File Reference
Include dependency graph for ERF_Substep_MT.cpp:

Functions

void erf_substep_MT (int step, int, int level, int finest_level, Vector< MultiFab > &S_slow_rhs, const Vector< MultiFab > &S_prev, Vector< MultiFab > &S_stg_data, const MultiFab &S_stg_prim, const MultiFab &qt, const MultiFab &pi_stage, const MultiFab &fast_coeffs, Vector< MultiFab > &S_data, MultiFab &lagged_delta_rt, MultiFab &avg_xmom, MultiFab &avg_ymom, MultiFab &avg_zmom, const MultiFab &cc_src, const MultiFab &xmom_src, const MultiFab &ymom_src, const MultiFab &zmom_src, const Geometry geom, const Real gravity, const bool use_lagged_delta_rt, std::unique_ptr< MultiFab > &z_t_rk, const MultiFab *z_t_pert, std::unique_ptr< MultiFab > &z_phys_nd_old, std::unique_ptr< MultiFab > &z_phys_nd_new, std::unique_ptr< MultiFab > &z_phys_nd_stg, std::unique_ptr< MultiFab > &detJ_cc_old, std::unique_ptr< MultiFab > &detJ_cc_new, std::unique_ptr< MultiFab > &detJ_cc_stg, const Real dtau, const Real beta_s, const Real facinv, Vector< std::unique_ptr< MultiFab >> &mapfac, YAFluxRegister *fr_as_crse, YAFluxRegister *fr_as_fine, bool l_use_moisture, bool l_reflux, const Real *sinesq_stag_d, const Real l_damp_coef)
 

Function Documentation

◆ erf_substep_MT()

void erf_substep_MT ( int  step,
int  ,
int  level,
int  finest_level,
Vector< MultiFab > &  S_slow_rhs,
const Vector< MultiFab > &  S_prev,
Vector< MultiFab > &  S_stg_data,
const MultiFab &  S_stg_prim,
const MultiFab &  qt,
const MultiFab &  pi_stage,
const MultiFab &  fast_coeffs,
Vector< MultiFab > &  S_data,
MultiFab &  lagged_delta_rt,
MultiFab &  avg_xmom,
MultiFab &  avg_ymom,
MultiFab &  avg_zmom,
const MultiFab &  cc_src,
const MultiFab &  xmom_src,
const MultiFab &  ymom_src,
const MultiFab &  zmom_src,
const Geometry  geom,
const Real  gravity,
const bool  use_lagged_delta_rt,
std::unique_ptr< MultiFab > &  z_t_rk,
const MultiFab *  z_t_pert,
std::unique_ptr< MultiFab > &  z_phys_nd_old,
std::unique_ptr< MultiFab > &  z_phys_nd_new,
std::unique_ptr< MultiFab > &  z_phys_nd_stg,
std::unique_ptr< MultiFab > &  detJ_cc_old,
std::unique_ptr< MultiFab > &  detJ_cc_new,
std::unique_ptr< MultiFab > &  detJ_cc_stg,
const Real  dtau,
const Real  beta_s,
const Real  facinv,
Vector< std::unique_ptr< MultiFab >> &  mapfac,
YAFluxRegister *  fr_as_crse,
YAFluxRegister *  fr_as_fine,
bool  l_use_moisture,
bool  l_reflux,
const Real sinesq_stag_d,
const Real  l_damp_coef 
)

Function for computing the fast RHS with moving terrain

Parameters
[in]stepwhich fast time step within each Runge-Kutta step
[in]nrkwhich Runge-Kutta step
[in]levellevel of resolution
[in]finest_levelfinest level of resolution
[in]S_slow_rhsslow RHS computed in erf_slow_rhs_pre
[in]S_prevprevious solution
[in]S_stg_datasolution at previous RK stage
[in]S_stg_primprimitive variables at previous RK stage
[in]pi_stageExner function at previous RK stage
[in]fast_coeffscoefficients for the tridiagonal solve used in the fast integrator
[out]S_datacurrent solution
[in,out]lagged_delta_rt
[in,out]avg_xmomtime-averaged x-momentum to be used for updating slow variables
[in,out]avg_ymomtime-averaged y-momentum to be used for updating slow variables
[in,out]avg_zmomtime-averaged z-momentum to be used for updating slow variables
[in]cc_srcsource terms for conserved variables
[in]xmom_srcsource terms for x-momentum
[in]ymom_srcsource terms for y-momentum
[in]zmom_srcsource terms for z-momentum
[in]geomcontainer for geometric information
[in]gravityMagnitude of gravity
[in]use_lagged_delta_rtdefine lagged_delta_rt for our next step
[in]z_t_rkrate of change of grid height – only relevant for moving terrain
[in]z_t_pertrate of change of grid height – interpolated between RK stages
[in]z_phys_nd_oldheight coordinate at nodes at old time
[in]z_phys_nd_newheight coordinate at nodes at new time
[in]z_phys_nd_stgheight coordinate at nodes at previous stage
[in]detJ_cc_oldJacobian of the metric transformation at old time
[in]detJ_cc_newJacobian of the metric transformation at new time
[in]detJ_cc_stgJacobian of the metric transformation at previous stage
[in]dtaufast time step
[in]beta_sCoefficient which determines how implicit vs explicit the solve is
[in]facinvinverse factor for time-averaging the momenta
[in]mapfacvector of map factors
[in,out]fr_as_crseYAFluxRegister at level l at level l / l+1 interface
[in,out]fr_as_fineYAFluxRegister at level l at level l-1 / l interface
[in]l_use_moisture
[in]l_refluxshould we add fluxes to the FluxRegisters?
[in]l_damp_coef
88 {
89  BL_PROFILE_REGION("erf_substep_MT()");
90 
91  Real beta_1 = myhalf * (one - beta_s); // multiplies explicit terms
92  Real beta_2 = myhalf * (one + beta_s); // multiplies implicit terms
93 
94  // How much do we project forward the (rho theta) that is used in the horizontal momentum equations
95  Real beta_d = Real(0.1);
96 
97  Real RvOverRd = R_v / R_d;
98 
99  bool l_rayleigh_impl_for_w = (sinesq_stag_d != nullptr);
100 
101  const Real* dx = geom.CellSize();
102  const GpuArray<Real, AMREX_SPACEDIM> dxInv = geom.InvCellSizeArray();
103 
104  Real dxi = dxInv[0];
105  Real dyi = dxInv[1];
106  Real dzi = dxInv[2];
107 
108  MultiFab coeff_A_mf(fast_coeffs, make_alias, 0, 1);
109  MultiFab inv_coeff_B_mf(fast_coeffs, make_alias, 1, 1);
110  MultiFab coeff_C_mf(fast_coeffs, make_alias, 2, 1);
111  MultiFab coeff_P_mf(fast_coeffs, make_alias, 3, 1);
112  MultiFab coeff_Q_mf(fast_coeffs, make_alias, 4, 1);
113 
114  // *************************************************************************
115  // Set gravity as a vector
116  const Array<Real,AMREX_SPACEDIM> grav{zero, zero, -gravity};
117  const GpuArray<Real,AMREX_SPACEDIM> grav_gpu{grav[0], grav[1], grav[2]};
118 
119  MultiFab extrap(S_data[IntVars::cons].boxArray(),S_data[IntVars::cons].DistributionMap(),1,1);
120 
121  MultiFab Omega(S_data[IntVars::zmom].boxArray(), S_data[IntVars::zmom].DistributionMap(), 1, 1);
122 
123  // *************************************************************************
124  // Define updates in the current RK stg
125  // *************************************************************************
126 #ifdef _OPENMP
127 #pragma omp parallel if (Gpu::notInLaunchRegion())
128 #endif
129  {
130  FArrayBox temp_rhs_fab;
131 
132  FArrayBox RHS_fab;
133  FArrayBox soln_fab;
134 
135  std::array<FArrayBox,AMREX_SPACEDIM> flux;
136 
137  // NOTE: we leave tiling off here for efficiency -- to make this loop work with tiling
138  // will require additional changes
139  for ( MFIter mfi(S_stg_data[IntVars::cons],false); mfi.isValid(); ++mfi)
140  {
141  Box bx = mfi.tilebox();
142  Box tbx = surroundingNodes(bx,0);
143  Box tby = surroundingNodes(bx,1);
144  Box tbz = surroundingNodes(bx,2);
145 
146  Box vbx = mfi.validbox();
147  const auto& vbx_hi = ubound(vbx);
148 
149  const Array4<Real const>& xmom_src_arr = xmom_src.const_array(mfi);
150  const Array4<Real const>& ymom_src_arr = ymom_src.const_array(mfi);
151  const Array4<Real const>& zmom_src_arr = zmom_src.const_array(mfi);
152  const Array4<Real const>& cc_src_arr = cc_src.const_array(mfi);
153 
154  const Array4<const Real> & stg_cons = S_stg_data[IntVars::cons].const_array(mfi);
155  const Array4<const Real> & stg_xmom = S_stg_data[IntVars::xmom].const_array(mfi);
156  const Array4<const Real> & stg_ymom = S_stg_data[IntVars::ymom].const_array(mfi);
157  const Array4<const Real> & stg_zmom = S_stg_data[IntVars::zmom].const_array(mfi);
158  const Array4<const Real> & prim = S_stg_prim.const_array(mfi);
159  const Array4<const Real> & qt_arr = qt.const_array(mfi);
160 
161  const Array4<const Real>& slow_rhs_cons = S_slow_rhs[IntVars::cons].const_array(mfi);
162  const Array4<const Real>& slow_rhs_rho_u = S_slow_rhs[IntVars::xmom].const_array(mfi);
163  const Array4<const Real>& slow_rhs_rho_v = S_slow_rhs[IntVars::ymom].const_array(mfi);
164  const Array4<const Real>& slow_rhs_rho_w = S_slow_rhs[IntVars::zmom].const_array(mfi);
165 
166  const Array4<Real>& cur_cons = S_data[IntVars::cons].array(mfi);
167  const Array4<Real>& cur_xmom = S_data[IntVars::xmom].array(mfi);
168  const Array4<Real>& cur_ymom = S_data[IntVars::ymom].array(mfi);
169  const Array4<Real>& cur_zmom = S_data[IntVars::zmom].array(mfi);
170 
171  const Array4<Real>& lagged = lagged_delta_rt.array(mfi);
172 
173  const Array4<const Real>& prev_cons = S_prev[IntVars::cons].const_array(mfi);
174  const Array4<const Real>& prev_xmom = S_prev[IntVars::xmom].const_array(mfi);
175  const Array4<const Real>& prev_ymom = S_prev[IntVars::ymom].const_array(mfi);
176  const Array4<const Real>& prev_zmom = S_prev[IntVars::zmom].const_array(mfi);
177 
178  // These store the advection momenta which we will use to update the slow variables
179  const Array4<Real>& avg_xmom_arr = avg_xmom.array(mfi);
180  const Array4<Real>& avg_ymom_arr = avg_ymom.array(mfi);
181  const Array4<Real>& avg_zmom_arr = avg_zmom.array(mfi);
182 
183  const Array4<const Real>& z_nd_old = z_phys_nd_old->const_array(mfi);
184  const Array4<const Real>& z_nd_new = z_phys_nd_new->const_array(mfi);
185  const Array4<const Real>& z_nd_stg = z_phys_nd_stg->const_array(mfi);
186  const Array4<const Real>& detJ_old = detJ_cc_old->const_array(mfi);
187  const Array4<const Real>& detJ_new = detJ_cc_new->const_array(mfi);
188  const Array4<const Real>& detJ_stg = detJ_cc_stg->const_array(mfi);
189 
190  const Array4<const Real>& z_t_arr = z_t_rk->const_array(mfi);
191  const Array4<const Real>& zp_t_arr = z_t_pert->const_array(mfi);
192 
193  const Array4< Real>& omega_arr = Omega.array(mfi);
194 
195  // Map factors
196  const Array4<const Real>& mf_mx = mapfac[MapFacType::m_x]->const_array(mfi);
197  const Array4<const Real>& mf_my = mapfac[MapFacType::m_y]->const_array(mfi);
198  const Array4<const Real>& mf_ux = mapfac[MapFacType::u_x]->const_array(mfi);
199  const Array4<const Real>& mf_vy = mapfac[MapFacType::v_y]->const_array(mfi);
200 
201  // *********************************************************************
202  // This must be done before we set cur_xmom and cur_ymom, since those
203  // in fact point to the same array as prev_xmom and prev_ymom
204  // *********************************************************************
205  Box gbxo = mfi.nodaltilebox(2);
206  {
207  BL_PROFILE("fast_MT_making_omega");
208  Box gbxo_lo = gbxo; gbxo_lo.setBig(2,0);
209  ParallelFor(gbxo_lo, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
210  omega_arr(i,j,k) = zero;
211  });
212  Box gbxo_hi = gbxo; gbxo_hi.setSmall(2,gbxo.bigEnd(2));
213  ParallelFor(gbxo_hi, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
214  omega_arr(i,j,k) = prev_zmom(i,j,k) - stg_zmom(i,j,k) - zp_t_arr(i,j,k);
215  });
216  Box gbxo_mid = gbxo; gbxo_mid.setSmall(2,1); gbxo_mid.setBig(2,gbxo.bigEnd(2)-1);
217  ParallelFor(gbxo_mid, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
218  omega_arr(i,j,k) =
219  ( OmegaFromW(i,j,k,prev_zmom(i,j,k),prev_xmom,prev_ymom,mf_ux,mf_vy,z_nd_old,dxInv)
220  -OmegaFromW(i,j,k, stg_zmom(i,j,k), stg_xmom, stg_ymom,mf_ux,mf_vy,z_nd_old,dxInv) )
221  - zp_t_arr(i,j,k);
222  });
223  } // end profile
224  // *********************************************************************
225 
226  const Array4<const Real>& pi_stage_ca = pi_stage.const_array(mfi);
227 
228  const Array4<Real>& theta_extrap = extrap.array(mfi);
229 
230  // Note: it is important to grow the tilebox rather than use growntilebox because
231  // we need to fill the ghost cells of the tilebox so we can use them below
232  Box gbx = mfi.tilebox(); gbx.grow(1);
233  Box gtbx = mfi.nodaltilebox(0); gtbx.grow(1); gtbx.setSmall(2,0);
234  Box gtby = mfi.nodaltilebox(1); gtby.grow(1); gtby.setSmall(2,0);
235 
236  if (step == 0) {
237  ParallelFor(gbx,
238  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
239  cur_cons(i,j,k,Rho_comp) = prev_cons(i,j,k,Rho_comp);
240  cur_cons(i,j,k,RhoTheta_comp) = prev_cons(i,j,k,RhoTheta_comp);
241 
242  Real delta_rt = cur_cons(i,j,k,RhoTheta_comp) - stg_cons(i,j,k,RhoTheta_comp);
243  theta_extrap(i,j,k) = delta_rt;
244 
245  // NOTE: qv is not changing over the fast steps so we use the stage data
246  Real qv = (l_use_moisture) ? prim(i,j,k,PrimQ1_comp) : zero;
247  theta_extrap(i,j,k) *= (one + RvOverRd*qv);
248 
249  // We define lagged_delta_rt for our next step as the current delta_rt
250  lagged(i,j,k) = delta_rt;
251  });
252  } else if (use_lagged_delta_rt) {
253  // This is the default for cases with no or static terrain
254  ParallelFor(gbx,
255  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
256  Real delta_rt = cur_cons(i,j,k,RhoTheta_comp) - stg_cons(i,j,k,RhoTheta_comp);
257  theta_extrap(i,j,k) = delta_rt + beta_d * (delta_rt - lagged(i,j,k));
258 
259  // NOTE: qv is not changing over the fast steps so we use the stage data
260  Real qv = (l_use_moisture) ? prim(i,j,k,PrimQ1_comp) : zero;
261  theta_extrap(i,j,k) *= (one + RvOverRd*qv);
262 
263  // We define lagged_delta_rt for our next step as the current delta_rt
264  lagged(i,j,k) = delta_rt;
265  });
266  } else {
267  // For the moving wave problem, this choice seems more robust
268  ParallelFor(gbx,
269  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
270  theta_extrap(i,j,k) = cur_cons(i,j,k,RhoTheta_comp) - stg_cons(i,j,k,RhoTheta_comp);
271 
272  // NOTE: qv is not changing over the fast steps so we use the stage data
273  Real qv = (l_use_moisture) ? prim(i,j,k,PrimQ1_comp) : zero;
274  theta_extrap(i,j,k) *= (one + RvOverRd*qv);
275  });
276  } // if step
277 
278  RHS_fab.resize (tbz,1, The_Async_Arena());
279  soln_fab.resize (tbz,1, The_Async_Arena());
280  temp_rhs_fab.resize(tbz,2, The_Async_Arena());
281 
282  auto const& RHS_a = RHS_fab.array();
283  auto const& soln_a = soln_fab.array();
284  auto const& temp_rhs_arr = temp_rhs_fab.array();
285 
286  auto const& coeffA_a = coeff_A_mf.array(mfi);
287  auto const& inv_coeffB_a = inv_coeff_B_mf.array(mfi);
288  auto const& coeffC_a = coeff_C_mf.array(mfi);
289  auto const& coeffP_a = coeff_P_mf.array(mfi);
290  auto const& coeffQ_a = coeff_Q_mf.array(mfi);
291 
292  // *********************************************************************
293  // Define updates in the RHS of {x, y, z}-momentum equations
294  // *********************************************************************
295  {
296  BL_PROFILE("substep_xymom_T");
297  ParallelFor(tbx, tby,
298  [=] AMREX_GPU_DEVICE (int i, int j, int k)
299  {
300  // Add (negative) gradient of (rho theta) multiplied by lagged "pi"
301  Real h_xi_old = Compute_h_xi_AtIface(i, j, k, dxInv, z_nd_old);
302  Real h_zeta_old = Compute_h_zeta_AtIface(i, j, k, dxInv, z_nd_old);
303  Real gp_xi = (theta_extrap(i,j,k) - theta_extrap(i-1,j,k)) * dxi;
304  Real gp_zeta_on_iface = (k == 0) ?
305  myhalf * dzi * ( theta_extrap(i-1,j,k+1) + theta_extrap(i,j,k+1)
306  - theta_extrap(i-1,j,k ) - theta_extrap(i,j,k ) ) :
307  fourth * dzi * ( theta_extrap(i-1,j,k+1) + theta_extrap(i,j,k+1)
308  - theta_extrap(i-1,j,k-1) - theta_extrap(i,j,k-1) );
309  Real gpx = h_zeta_old * gp_xi - h_xi_old * gp_zeta_on_iface;
310  gpx *= mf_ux(i,j,0);
311 
312  Real q = (l_use_moisture) ? myhalf * (qt_arr(i-1,j,k) + qt_arr(i,j,k)) : zero;
313 
314  Real pi_c = myhalf * (pi_stage_ca(i-1,j,k) + pi_stage_ca(i ,j,k));
315  Real fast_rhs_rho_u = -Gamma * R_d * pi_c * gpx / (one + q);
316 
317  // We have already scaled the source terms to have the extra factor of dJ
318  cur_xmom(i,j,k) = h_zeta_old * prev_xmom(i,j,k) + dtau * fast_rhs_rho_u
319  + dtau * slow_rhs_rho_u(i,j,k)
320  + dtau * xmom_src_arr(i,j,k);
321  },
322  [=] AMREX_GPU_DEVICE (int i, int j, int k)
323  {
324  // Add (negative) gradient of (rho theta) multiplied by lagged "pi"
325  Real h_eta_old = Compute_h_eta_AtJface(i, j, k, dxInv, z_nd_old);
326  Real h_zeta_old = Compute_h_zeta_AtJface(i, j, k, dxInv, z_nd_old);
327  Real gp_eta = (theta_extrap(i,j,k) -theta_extrap(i,j-1,k)) * dyi;
328  Real gp_zeta_on_jface = (k == 0) ?
329  myhalf * dzi * ( theta_extrap(i,j,k+1) + theta_extrap(i,j-1,k+1)
330  - theta_extrap(i,j,k ) - theta_extrap(i,j-1,k ) ) :
331  fourth * dzi * ( theta_extrap(i,j,k+1) + theta_extrap(i,j-1,k+1)
332  - theta_extrap(i,j,k-1) - theta_extrap(i,j-1,k-1) );
333  Real gpy = h_zeta_old * gp_eta - h_eta_old * gp_zeta_on_jface;
334  gpy *= mf_vy(i,j,0);
335 
336  Real q = (l_use_moisture) ? myhalf * (qt_arr(i,j-1,k) + qt_arr(i,j,k)) : zero;
337 
338  Real pi_c = myhalf * (pi_stage_ca(i,j-1,k) + pi_stage_ca(i,j ,k));
339  Real fast_rhs_rho_v = -Gamma * R_d * pi_c * gpy / (one + q);
340 
341  // We have already scaled the source terms to have the extra factor of dJ
342  cur_ymom(i, j, k) = h_zeta_old * prev_ymom(i,j,k) + dtau * fast_rhs_rho_v
343  + dtau * slow_rhs_rho_v(i,j,k)
344  + dtau * ymom_src_arr(i,j,k);
345  });
346  } // end profile
347 
348  // *************************************************************************
349  // Define flux arrays for use in advection
350  // *************************************************************************
351  for (int dir = 0; dir < AMREX_SPACEDIM; ++dir) {
352  flux[dir].resize(surroundingNodes(bx,dir),2,The_Async_Arena());
353  flux[dir].setVal<RunOn::Device>(0);
354  }
355  const GpuArray<const Array4<Real>, AMREX_SPACEDIM>
356  flx_arr{{AMREX_D_DECL(flux[0].array(), flux[1].array(), flux[2].array())}};
357 
358  // *********************************************************************
359  {
360  BL_PROFILE("fast_T_making_rho_rhs");
361  ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
362  {
363  Real h_zeta_stg_xlo = Compute_h_zeta_AtIface(i, j , k, dxInv, z_nd_stg);
364  Real h_zeta_stg_xhi = Compute_h_zeta_AtIface(i+1,j , k, dxInv, z_nd_stg);
365  Real xflux_lo = cur_xmom(i ,j,k) - stg_xmom(i ,j,k)*h_zeta_stg_xlo;
366  Real xflux_hi = cur_xmom(i+1,j,k) - stg_xmom(i+1,j,k)*h_zeta_stg_xhi;
367 
368  Real h_zeta_stg_yhi = Compute_h_zeta_AtJface(i, j+1, k, dxInv, z_nd_stg);
369  Real h_zeta_stg_ylo = Compute_h_zeta_AtJface(i, j , k, dxInv, z_nd_stg);
370  Real yflux_lo = cur_ymom(i,j ,k) - stg_ymom(i,j ,k)*h_zeta_stg_ylo;
371  Real yflux_hi = cur_ymom(i,j+1,k) - stg_ymom(i,j+1,k)*h_zeta_stg_yhi;
372 
373  // NOTE: we are saving the (1/J) weighting for later when we add this to rho and theta
374  temp_rhs_arr(i,j,k,0) = ( xflux_hi - xflux_lo ) * dxi + ( yflux_hi - yflux_lo ) * dyi;
375  temp_rhs_arr(i,j,k,1) = (( xflux_hi * (prim(i,j,k,0) + prim(i+1,j,k,0)) -
376  xflux_lo * (prim(i,j,k,0) + prim(i-1,j,k,0)) ) * dxi +
377  ( yflux_hi * (prim(i,j,k,0) + prim(i,j+1,k,0)) -
378  yflux_lo * (prim(i,j,k,0) + prim(i,j-1,k,0)) ) * dyi) * myhalf;
379 
380  if (l_reflux) {
381  (flx_arr[0])(i,j,k,0) = xflux_lo;
382  (flx_arr[0])(i,j,k,1) = (flx_arr[0])(i ,j,k,0) * myhalf * (prim(i,j,k,0) + prim(i-1,j,k,0));
383 
384  (flx_arr[1])(i,j,k,0) = yflux_lo;
385  (flx_arr[1])(i,j,k,1) = (flx_arr[1])(i,j ,k,0) * myhalf * (prim(i,j,k,0) + prim(i,j-1,k,0));
386 
387  if (i == vbx_hi.x) {
388  (flx_arr[0])(i+1,j,k,0) = xflux_hi;
389  (flx_arr[0])(i+1,j,k,1) = (flx_arr[0])(i+1,j,k,0) * myhalf * (prim(i,j,k,0) + prim(i+1,j,k,0));
390  }
391  if (j == vbx_hi.y) {
392  (flx_arr[1])(i,j+1,k,0) = yflux_hi;
393  (flx_arr[1])(i,j+1,k,1) = (flx_arr[1])(i,j+1,k,0) * myhalf * (prim(i,j,k,0) + prim(i,j+1,k,0));
394  }
395  }
396  });
397  } // end profile
398 
399  ParallelFor(tbx, tby,
400  [=] AMREX_GPU_DEVICE (int i, int j, int k)
401  {
402  Real h_zeta_new = Compute_h_zeta_AtIface(i, j, k, dxInv, z_nd_new);
403  cur_xmom(i, j, k) /= h_zeta_new;
404  avg_xmom_arr(i,j,k) += facinv*(cur_xmom(i,j,k) - stg_xmom(i,j,k));
405  },
406  [=] AMREX_GPU_DEVICE (int i, int j, int k)
407  {
408  Real h_zeta_new = Compute_h_zeta_AtJface(i, j, k, dxInv, z_nd_new);
409  cur_ymom(i, j, k) /= h_zeta_new;
410  avg_ymom_arr(i,j,k) += facinv*(cur_ymom(i,j,k) - stg_ymom(i,j,k));
411  });
412 
413  Box bx_shrunk_in_k = bx;
414  int klo = tbz.smallEnd(2);
415  int khi = tbz.bigEnd(2);
416  bx_shrunk_in_k.setSmall(2,klo+1);
417  bx_shrunk_in_k.setBig(2,khi-1);
418 
419  // Note that the notes use "g" to mean the magnitude of gravity, so it is positive
420  // We set grav_gpu[2] to be the vector component which is negative
421  // We define halfg to match the notes (which is why we take the absolute value)
422  Real halfg = std::abs(myhalf * grav_gpu[2]);
423 
424  {
425  BL_PROFILE("fast_loop_on_shrunk_t");
426  //Note we don't act on the bottom or top boundaries of the domain
427  ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (int i, int j, int k)
428  {
429  Real dJ_old_kface = 0.5 * (detJ_old(i,j,k) + detJ_old(i,j,k-1));
430  Real dJ_new_kface = 0.5 * (detJ_new(i,j,k) + detJ_new(i,j,k-1));
431  Real dJ_stg_kface = 0.5 * (detJ_stg(i,j,k) + detJ_stg(i,j,k-1));
432 
433  Real coeff_P = coeffP_a(i,j,k);
434  Real coeff_Q = coeffQ_a(i,j,k);
435 
436  Real theta_t_lo = myhalf * ( prim(i,j,k-2,PrimTheta_comp) + prim(i,j,k-1,PrimTheta_comp) );
437  Real theta_t_mid = myhalf * ( prim(i,j,k-1,PrimTheta_comp) + prim(i,j,k ,PrimTheta_comp) );
438  Real theta_t_hi = myhalf * ( prim(i,j,k ,PrimTheta_comp) + prim(i,j,k+1,PrimTheta_comp) );
439 
440  // line 2 last two terms (order dtau)
441  Real R0_tmp = coeff_P * cur_cons(i,j,k ,RhoTheta_comp)
442  + coeff_Q * cur_cons(i,j,k-1,RhoTheta_comp)
443  - coeff_P * stg_cons(i,j,k ,RhoTheta_comp) * (dJ_stg_kface/dJ_old_kface)
444  - coeff_Q * stg_cons(i,j,k-1,RhoTheta_comp) * (dJ_stg_kface/dJ_old_kface)
445  - halfg * ( cur_cons(i,j,k,Rho_comp) + cur_cons(i,j,k-1,Rho_comp) )
446  + halfg * ( stg_cons(i,j,k,Rho_comp) + stg_cons(i,j,k-1,Rho_comp) ) * (dJ_stg_kface/dJ_old_kface);
447 
448  // line 3 residuals (order dtau^2) one <-> beta_2
449  Real R1_tmp = - halfg * ( slow_rhs_cons(i,j,k ,Rho_comp) + slow_rhs_cons(i,j,k-1,Rho_comp))
450  + coeff_P * slow_rhs_cons(i,j,k ,RhoTheta_comp) + coeff_Q * slow_rhs_cons(i,j,k-1,RhoTheta_comp);
451 
452  Real Omega_kp1 = omega_arr(i,j,k+1);
453  Real Omega_k = omega_arr(i,j,k );
454  Real Omega_km1 = omega_arr(i,j,k-1);
455 
456  Real detJdiff = (detJ_old(i,j,k) - detJ_old(i,j,k-1)) / (detJ_old(i,j,k)*detJ_old(i,j,k-1));
457 
458  // consolidate lines 4&5 (order dtau^2)
459  R1_tmp += halfg * ( beta_1 * dzi * (Omega_kp1/detJ_old(i,j,k) + detJdiff*Omega_k - Omega_km1/detJ_old(i,j,k-1))
460  + temp_rhs_arr(i,j,k,Rho_comp)/detJ_old(i,j,k) + temp_rhs_arr(i,j,k-1,Rho_comp)/detJ_old(i,j,k-1) );
461 
462  // consolidate lines 6&7 (order dtau^2)
463  R1_tmp += -(
464  coeff_P/detJ_old(i,j,k ) * ( beta_1 * dzi * (Omega_kp1*theta_t_hi - Omega_k*theta_t_mid)
465  +temp_rhs_arr(i,j,k ,RhoTheta_comp) ) +
466  coeff_Q/detJ_old(i,j,k-1) * ( beta_1 * dzi * (Omega_k*theta_t_mid - Omega_km1*theta_t_lo)
467  +temp_rhs_arr(i,j,k-1,RhoTheta_comp) ) );
468 
469  // line 1
470  RHS_a(i,j,k) = prev_zmom(i,j,k) - (dJ_stg_kface/dJ_old_kface) * stg_zmom(i,j,k)
471  + dtau * slow_rhs_rho_w(i,j,k) / dJ_stg_kface
472  + dtau * zmom_src_arr(i,j,k);
473 
474  RHS_a(i,j,k) += dtau * R0_tmp;
475 
476  RHS_a(i,j,k) += dtau * dtau*beta_2*R1_tmp;
477 
478  // We cannot use omega_arr here since that was built with old_rho_u and old_rho_v ...
479  Real UppVpp = (dJ_new_kface/dJ_old_kface) * OmegaFromW(i,j,k,0.,cur_xmom,cur_ymom,mf_ux,mf_vy,z_nd_new,dxInv)
480  -(dJ_stg_kface/dJ_old_kface) * OmegaFromW(i,j,k,0.,stg_xmom,stg_ymom,mf_ux,mf_vy,z_nd_stg,dxInv);
481  RHS_a(i,j,k) += UppVpp;
482  });
483  } // end profile
484 
485  Box b2d = tbz; // Copy constructor
486  b2d.setRange(2,0);
487 
488  auto const lo = lbound(bx);
489  auto const hi = ubound(bx);
490 
491  {
492  BL_PROFILE("substep_b2d_loop_t");
493 
494 #ifdef AMREX_USE_GPU
495  ParallelFor(b2d, [=] AMREX_GPU_DEVICE (int i, int j, int)
496  {
497  // Moving terrain
498  Real rho_on_bdy = myhalf * ( prev_cons(i,j,lo.z) + prev_cons(i,j,lo.z-1) );
499  RHS_a(i,j,lo.z) = rho_on_bdy * zp_t_arr(i,j,lo.z);
500 
501  soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
502 
503  RHS_a(i,j,hi.z+1) = dtau * (slow_rhs_rho_w(i,j,hi.z+1) + zmom_src_arr(i,j,hi.z+1));
504 
505  for (int k = lo.z+1; k <= hi.z+1; k++) {
506  soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
507  }
508 
509  for (int k = hi.z; k >= lo.z; k--) {
510  soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) * soln_a(i,j,k+1);
511  }
512 
513  // We assume that Omega == w at the top boundary and that changes in J there are irrelevant
514  cur_zmom(i,j,hi.z+1) = stg_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
515  });
516 #else
517  for (int j = lo.y; j <= hi.y; ++j) {
518  AMREX_PRAGMA_SIMD
519  for (int i = lo.x; i <= hi.x; ++i) {
520 
521  Real rho_on_bdy = myhalf * ( prev_cons(i,j,lo.z) + prev_cons(i,j,lo.z-1) );
522  RHS_a(i,j,lo.z) = rho_on_bdy * zp_t_arr(i,j,lo.z);
523 
524  soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
525  }
526  }
527 
528  for (int j = lo.y; j <= hi.y; ++j) {
529  AMREX_PRAGMA_SIMD
530  for (int i = lo.x; i <= hi.x; ++i) {
531  RHS_a(i,j,hi.z+1) = dtau * (slow_rhs_rho_w(i,j,hi.z+1) + zmom_src_arr(i,j,hi.z+1));
532  }
533  }
534  for (int k = lo.z+1; k <= hi.z+1; ++k) {
535  for (int j = lo.y; j <= hi.y; ++j) {
536  AMREX_PRAGMA_SIMD
537  for (int i = lo.x; i <= hi.x; ++i) {
538  soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
539  }
540  }
541  }
542  for (int k = hi.z; k >= lo.z; --k) {
543  for (int j = lo.y; j <= hi.y; ++j) {
544  AMREX_PRAGMA_SIMD
545  for (int i = lo.x; i <= hi.x; ++i) {
546  soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) * soln_a(i,j,k+1);
547  }
548  }
549  }
550 
551  // We assume that Omega == w at the top boundary and that changes in J there are irrelevant
552  for (int j = lo.y; j <= hi.y; ++j) {
553  AMREX_PRAGMA_SIMD
554  for (int i = lo.x; i <= hi.x; ++i) {
555  cur_zmom(i,j,hi.z+1) = stg_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
556  }
557  }
558 #endif
559  } // end profile
560 
561  {
562  BL_PROFILE("substep_new_drhow");
563  tbz.setBig(2,hi.z);
564  ParallelFor(tbz, [=] AMREX_GPU_DEVICE (int i, int j, int k)
565  {
566  Real rho_on_face = myhalf * (cur_cons(i,j,k,Rho_comp) + cur_cons(i,j,k-1,Rho_comp));
567 
568  if (k == lo.z) {
569  cur_zmom(i,j,k) = WFromOmega(i,j,k,rho_on_face*(z_t_arr(i,j,k)+zp_t_arr(i,j,k)),
570  cur_xmom,cur_ymom,mf_ux,mf_vy,z_nd_new,dxInv);
571 
572  // We need to set this here because it is used to define zflux_lo below
573  soln_a(i,j,k) = zero;
574 
575  } else {
576 
577  Real UppVpp = WFromOmega(i,j,k,zero,cur_xmom,cur_ymom,mf_ux,mf_vy,z_nd_new,dxInv)
578  - WFromOmega(i,j,k,zero,stg_xmom,stg_ymom,mf_ux,mf_vy,z_nd_stg,dxInv);
579  Real wpp = soln_a(i,j,k) + UppVpp;
580  Real dJ_old_kface = myhalf * (detJ_old(i,j,k) + detJ_old(i,j,k-1));
581  Real dJ_new_kface = myhalf * (detJ_new(i,j,k) + detJ_new(i,j,k-1));
582 
583  cur_zmom(i,j,k) = dJ_old_kface * (stg_zmom(i,j,k) + wpp);
584  cur_zmom(i,j,k) /= dJ_new_kface;
585 
586  soln_a(i,j,k) = OmegaFromW(i,j,k,cur_zmom(i,j,k),cur_xmom,cur_ymom,mf_ux,mf_vy,z_nd_new,dxInv)
587  - OmegaFromW(i,j,k,stg_zmom(i,j,k),stg_xmom,stg_ymom,mf_ux,mf_vy,z_nd_stg,dxInv);
588  soln_a(i,j,k) -= rho_on_face * zp_t_arr(i,j,k);
589  }
590 
591  if (l_rayleigh_impl_for_w && k > 0) {
592  Real damping_coeff = l_damp_coef * dtau * sinesq_stag_d[k];
593  cur_zmom(i,j,k) /= (one + damping_coeff);
594  }
595  });
596  } // end profile
597 
598  // **************************************************************************
599  // Define updates in the RHS of rho and (rho theta)
600  // **************************************************************************
601  {
602  BL_PROFILE("fast_rho_final_update");
603  ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
604  {
605  Real zflux_lo = beta_2 * soln_a(i,j,k ) + beta_1 * omega_arr(i,j,k);
606  Real zflux_hi = beta_2 * soln_a(i,j,k+1) + beta_1 * omega_arr(i,j,k+1);
607 
608  // Note that in the solve we effectively impose new_drho_w(i,j,vbx_hi.z+1)=0
609  // so we don't update avg_zmom at k=vbx_hi.z+1
610  avg_zmom_arr(i,j,k) += facinv*zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0));
611  if (l_reflux) {
612  (flx_arr[2])(i,j,k,0) = zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0));
613  }
614 
615  // Note that the factor of (1/J) in the fast source term is canceled
616  // when we multiply old and new by detJ_old and detJ_new , respectively
617  // We have already scaled the slow source term to have the extra factor of dJ
618  Real fast_rhs_rho = -(temp_rhs_arr(i,j,k,0) + ( zflux_hi - zflux_lo ) * dzi);
619  Real fast_rhs_rhotheta = -( temp_rhs_arr(i,j,k,1) + myhalf *
620  ( zflux_hi * (prim(i,j,k) + prim(i,j,k+1))
621  - zflux_lo * (prim(i,j,k) + prim(i,j,k-1)) ) * dzi );
622 
623  cur_cons(i,j,k,0) *= (detJ_old(i,j,k)/detJ_new(i,j,k));
624  cur_cons(i,j,k,1) *= (detJ_old(i,j,k)/detJ_new(i,j,k));
625 
626  cur_cons(i,j,k,0) += dtau * ( slow_rhs_cons(i,j,k,0) + fast_rhs_rho / detJ_new(i,j,k));
627  cur_cons(i,j,k,1) += dtau * ( slow_rhs_cons(i,j,k,1) + fast_rhs_rhotheta / detJ_new(i,j,k));
628 
629  if (l_reflux) {
630  (flx_arr[2])(i,j,k,1) = (flx_arr[2])(i,j,k,0) * myhalf * (prim(i,j,k) + prim(i,j,k-1));
631  }
632 
633  if (k == vbx_hi.z) {
634  avg_zmom_arr(i,j,k+1) += facinv * zflux_hi / (mf_mx(i,j,0) * mf_my(i,j,0));
635  if (l_reflux) {
636  (flx_arr[2])(i,j,k+1,0) = zflux_hi / (mf_mx(i,j,0) * mf_my(i,j,0));
637  (flx_arr[2])(i,j,k+1,1) = (flx_arr[2])(i,j,k+1,0) * myhalf * (prim(i,j,k) + prim(i,j,k+1));
638  }
639  }
640 
641  // add in source terms for cell-centered conserved variables
642  cur_cons(i,j,k,Rho_comp) += dtau * cc_src_arr(i,j,k,Rho_comp);
643  cur_cons(i,j,k,RhoTheta_comp) += dtau * cc_src_arr(i,j,k,RhoTheta_comp);
644  });
645  } // end profile
646 
647  // We only add to the flux registers in the final RK step
648  if (l_reflux) {
649  int strt_comp_reflux = 0;
650  // For now we don't reflux (rho theta) because it seems to create issues at c/f boundaries
651  int num_comp_reflux = 1;
652  if (level < finest_level) {
653  fr_as_crse->CrseAdd(mfi,
654  {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
655  dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
656  }
657  if (level > 0) {
658  fr_as_fine->FineAdd(mfi,
659  {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
660  dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
661  }
662 
663  // This is necessary here so we don't go on to the next FArrayBox without
664  // having finished copying the fluxes into the FluxRegisters (since the fluxes
665  // are stored in temporary FArrayBox's)
666  Gpu::streamSynchronize();
667 
668  } // two-way coupling
669 
670  } // mfi
671  } // OMP
672 }
constexpr amrex::Real R_v
Definition: ERF_Constants.H:21
constexpr amrex::Real one
Definition: ERF_Constants.H:7
constexpr amrex::Real fourth
Definition: ERF_Constants.H:12
constexpr amrex::Real zero
Definition: ERF_Constants.H:6
constexpr amrex::Real myhalf
Definition: ERF_Constants.H:11
constexpr amrex::Real R_d
Definition: ERF_Constants.H:20
constexpr amrex::Real Gamma
Definition: ERF_Constants.H:29
@ v_y
Definition: ERF_DataStruct.H:25
@ m_y
Definition: ERF_DataStruct.H:25
@ u_x
Definition: ERF_DataStruct.H:24
@ m_x
Definition: ERF_DataStruct.H:24
#define PrimQ1_comp
Definition: ERF_IndexDefines.H:53
#define Rho_comp
Definition: ERF_IndexDefines.H:36
#define RhoTheta_comp
Definition: ERF_IndexDefines.H:37
#define PrimTheta_comp
Definition: ERF_IndexDefines.H:50
amrex::GpuArray< Real, AMREX_SPACEDIM > dxInv
Definition: ERF_InitCustomPertVels_ParticleTests.H:17
const Real dx
Definition: ERF_InitCustomPert_ABL.H:23
const int khi
Definition: ERF_InitCustomPert_Bubble.H:21
ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { const auto prob_lo=geomdata.ProbLo();const auto dx=geomdata.CellSize();const Real x=(prob_lo[0]+(i+myhalf) *dx[0])/mf_m(i, j, 0);const Real z=z_cc(i, j, k);Real L=std::sqrt(std::pow((x - x_c)/x_r, 2)+std::pow((z - z_c)/z_r, 2));if(L<=one) { Real dT=T_pert *(std::cos(PI *L)+one)/two;Real Tbar_hse=p_hse(i, j, k)/(R_d *r_hse(i, j, k));Real theta_perturbed=(Tbar_hse+dT) *std::pow(p_0/p_hse(i, j, k), rdOcp);Real theta_0=(Tbar_hse) *std::pow(p_0/p_hse(i, j, k), rdOcp);if(const_rho) { state_pert(i, j, k, RhoTheta_comp)=r_hse(i, j, k) *(theta_perturbed - theta_0);} else { state_pert(i, j, k, Rho_comp)=getRhoThetagivenP(p_hse(i, j, k))/theta_perturbed - r_hse(i, j, k);} } })
amrex::Real Real
Definition: ERF_ShocInterface.H:19
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real OmegaFromW(int &i, int &j, int &k, amrex::Real w, const amrex::Array4< const amrex::Real > &u_arr, const amrex::Array4< const amrex::Real > &v_arr, const amrex::Array4< const amrex::Real > &mf_u, const amrex::Array4< const amrex::Real > &mf_v, const amrex::Array4< const amrex::Real > &z_nd, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &dxInv)
Definition: ERF_TerrainMetrics.H:414
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_xi_AtIface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:117
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_zeta_AtIface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:104
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_zeta_AtJface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:144
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_eta_AtJface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:170
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real WFromOmega(int &i, int &j, int &k, amrex::Real omega, const amrex::Array4< const amrex::Real > &u_arr, const amrex::Array4< const amrex::Real > &v_arr, const amrex::Array4< const amrex::Real > &mf_u, const amrex::Array4< const amrex::Real > &mf_v, const amrex::Array4< const amrex::Real > &z_nd, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &dxInv)
Definition: ERF_TerrainMetrics.H:464
@ gpy
Definition: ERF_IndexDefines.H:169
@ gpx
Definition: ERF_IndexDefines.H:168
@ ymom
Definition: ERF_IndexDefines.H:178
@ cons
Definition: ERF_IndexDefines.H:176
@ zmom
Definition: ERF_IndexDefines.H:179
@ xmom
Definition: ERF_IndexDefines.H:177
@ qt
Definition: ERF_Kessler.H:27
@ qv
Definition: ERF_Kessler.H:28
Here is the call graph for this function: