ERF
Energy Research and Forecasting: An Atmospheric Modeling Code
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
ERF_FastRhs_T.cpp File Reference
Include dependency graph for ERF_FastRhs_T.cpp:

Functions

void erf_fast_rhs_T (int step, int, int level, int finest_level, Vector< MultiFab > &S_slow_rhs, const Vector< MultiFab > &S_prev, Vector< MultiFab > &S_stage_data, const MultiFab &S_stage_prim, const MultiFab &qt, const MultiFab &pi_stage, const MultiFab &fast_coeffs, Vector< MultiFab > &S_data, Vector< MultiFab > &S_scratch, const MultiFab &cc_src, const MultiFab &xmom_src, const MultiFab &ymom_src, const MultiFab &zmom_src, const Geometry geom, const Real gravity, std::unique_ptr< MultiFab > &z_phys_nd, std::unique_ptr< MultiFab > &detJ_cc, const Real dtau, const Real beta_s, const Real facinv, Vector< std::unique_ptr< MultiFab >> &mapfac, YAFluxRegister *fr_as_crse, YAFluxRegister *fr_as_fine, bool l_use_moisture, bool l_reflux, bool)
 

Function Documentation

◆ erf_fast_rhs_T()

void erf_fast_rhs_T ( int  step,
int  ,
int  level,
int  finest_level,
Vector< MultiFab > &  S_slow_rhs,
const Vector< MultiFab > &  S_prev,
Vector< MultiFab > &  S_stage_data,
const MultiFab &  S_stage_prim,
const MultiFab &  qt,
const MultiFab &  pi_stage,
const MultiFab &  fast_coeffs,
Vector< MultiFab > &  S_data,
Vector< MultiFab > &  S_scratch,
const MultiFab &  cc_src,
const MultiFab &  xmom_src,
const MultiFab &  ymom_src,
const MultiFab &  zmom_src,
const Geometry  geom,
const Real  gravity,
std::unique_ptr< MultiFab > &  z_phys_nd,
std::unique_ptr< MultiFab > &  detJ_cc,
const Real  dtau,
const Real  beta_s,
const Real  facinv,
Vector< std::unique_ptr< MultiFab >> &  mapfac,
YAFluxRegister *  fr_as_crse,
YAFluxRegister *  fr_as_fine,
bool  l_use_moisture,
bool  l_reflux,
bool   
)

Function for computing the fast RHS with fixed terrain

Parameters
[in]stepwhich fast time step within each Runge-Kutta step
[in]nrkwhich Runge-Kutta step
[in]levellevel of resolution
[in]finest_levelfinest level of resolution
[in]S_slow_rhsslow RHS computed in erf_slow_rhs_pre
[in]S_prevprevious solution
[in]S_stage_datasolution at previous RK stage
[in]S_stage_primprimitive variables at previous RK stage
[in]pi_stageExner function at previous RK stage
[in]fast_coeffscoefficients for the tridiagonal solve used in the fast integrator
[out]S_datacurrent solution
[in]S_scratchscratch space
[in]cc_srcsource terms for conserved variables
[in]xmom_srcsource terms for x-momentum
[in]ymom_srcsource terms for y-momentum
[in]zmom_srcsource terms for z-momentum
[in]geomcontainer for geometric information
[in]gravitymagnitude of gravity
[in]z_phys_ndheight coordinate at nodes
[in]detJ_ccJacobian of the metric transformation
[in]dtaufast time step
[in]beta_sCoefficient which determines how implicit vs explicit the solve is
[in]facinvinverse factor for time-averaging the momenta
[in]mapfacmap factors
[in,out]fr_as_crseYAFluxRegister at level l at level l / l+1 interface
[in,out]fr_as_fineYAFluxRegister at level l at level l-1 / l interface
[in]l_use_moisture
[in]l_refluxshould we add fluxes to the FluxRegisters?
[in]l_implicit_substepping
67 {
68  BL_PROFILE_REGION("erf_fast_rhs_T()");
69 
70  const Box& domain = geom.Domain();
71  auto const domlo = lbound(domain);
72  auto const domhi = ubound(domain);
73 
74  Real beta_1 = 0.5 * (1.0 - beta_s); // multiplies explicit terms
75  Real beta_2 = 0.5 * (1.0 + beta_s); // multiplies implicit terms
76 
77  // How much do we project forward the (rho theta) that is used in the horizontal momentum equations
78  Real beta_d = 0.1;
79 
80  const Real* dx = geom.CellSize();
81  const GpuArray<Real, AMREX_SPACEDIM> dxInv = geom.InvCellSizeArray();
82 
83  Real dxi = dxInv[0];
84  Real dyi = dxInv[1];
85  Real dzi = dxInv[2];
86  const auto& ba = S_stage_data[IntVars::cons].boxArray();
87  const auto& dm = S_stage_data[IntVars::cons].DistributionMap();
88 
89  MultiFab Delta_rho_u( convert(ba,IntVect(1,0,0)), dm, 1, 1);
90  MultiFab Delta_rho_v( convert(ba,IntVect(0,1,0)), dm, 1, 1);
91  MultiFab Delta_rho_w( convert(ba,IntVect(0,0,1)), dm, 1, IntVect(1,1,0));
92  MultiFab Delta_rho ( ba , dm, 1, 1);
93  MultiFab Delta_rho_theta( ba , dm, 1, 1);
94 
95  MultiFab New_rho_u(convert(ba,IntVect(1,0,0)), dm, 1, 1);
96  MultiFab New_rho_v(convert(ba,IntVect(0,1,0)), dm, 1, 1);
97 
98  MultiFab coeff_A_mf(fast_coeffs, make_alias, 0, 1);
99  MultiFab inv_coeff_B_mf(fast_coeffs, make_alias, 1, 1);
100  MultiFab coeff_C_mf(fast_coeffs, make_alias, 2, 1);
101  MultiFab coeff_P_mf(fast_coeffs, make_alias, 3, 1);
102  MultiFab coeff_Q_mf(fast_coeffs, make_alias, 4, 1);
103 
104  // *************************************************************************
105  // Set gravity as a vector
106  const Array<Real,AMREX_SPACEDIM> grav{0.0, 0.0, -gravity};
107  const GpuArray<Real,AMREX_SPACEDIM> grav_gpu{grav[0], grav[1], grav[2]};
108 
109  MultiFab extrap(S_data[IntVars::cons].boxArray(),S_data[IntVars::cons].DistributionMap(),1,1);
110 
111  // *************************************************************************
112  // First set up some arrays we'll need
113  // *************************************************************************
114 
115 #ifdef _OPENMP
116 #pragma omp parallel if (Gpu::notInLaunchRegion())
117 #endif
118  for ( MFIter mfi(S_stage_data[IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
119  {
120  const Array4<Real> & cur_cons = S_data[IntVars::cons].array(mfi);
121  const Array4<const Real>& prev_cons = S_prev[IntVars::cons].const_array(mfi);
122  const Array4<const Real>& stage_cons = S_stage_data[IntVars::cons].const_array(mfi);
123  const Array4<Real>& lagged_delta_rt = S_scratch[IntVars::cons].array(mfi);
124 
125  const Array4<Real>& old_drho = Delta_rho.array(mfi);
126  const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
127  const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
128  const Array4<Real>& old_drho_w = Delta_rho_w.array(mfi);
129  const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
130 
131  const Array4<const Real>& prev_xmom = S_prev[IntVars::xmom].const_array(mfi);
132  const Array4<const Real>& prev_ymom = S_prev[IntVars::ymom].const_array(mfi);
133  const Array4<const Real>& prev_zmom = S_prev[IntVars::zmom].const_array(mfi);
134 
135  const Array4<const Real>& stage_xmom = S_stage_data[IntVars::xmom].const_array(mfi);
136  const Array4<const Real>& stage_ymom = S_stage_data[IntVars::ymom].const_array(mfi);
137  const Array4<const Real>& stage_zmom = S_stage_data[IntVars::zmom].const_array(mfi);
138 
139  Box bx = mfi.validbox();
140  Box gbx = mfi.tilebox(); gbx.grow(1);
141 
142  if (step == 0) {
143  ParallelFor(gbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
144  cur_cons(i,j,k,Rho_comp) = prev_cons(i,j,k,Rho_comp);
145  cur_cons(i,j,k,RhoTheta_comp) = prev_cons(i,j,k,RhoTheta_comp);
146  });
147  } // step = 0
148 
149  Box gtbx = mfi.nodaltilebox(0); gtbx.grow(IntVect(1,1,0));
150  Box gtby = mfi.nodaltilebox(1); gtby.grow(IntVect(1,1,0));
151  Box gtbz = mfi.nodaltilebox(2); gtbz.grow(IntVect(1,1,0));
152 
153  const auto& bx_lo = lbound(bx);
154  const auto& bx_hi = ubound(bx);
155 
156  ParallelFor(gtbx, gtby, gtbz,
157  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
158  old_drho_u(i,j,k) = prev_xmom(i,j,k) - stage_xmom(i,j,k);
159  if (k == bx_lo.z && k != domlo.z) {
160  old_drho_u(i,j,k-1) = old_drho_u(i,j,k);
161  } else if (k == bx_hi.z) {
162  old_drho_u(i,j,k+1) = old_drho_u(i,j,k);
163  }
164  },
165  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
166  old_drho_v(i,j,k) = prev_ymom(i,j,k) - stage_ymom(i,j,k);
167  if (k == bx_lo.z && k != domlo.z) {
168  old_drho_v(i,j,k-1) = old_drho_v(i,j,k);
169  } else if (k == bx_hi.z) {
170  old_drho_v(i,j,k+1) = old_drho_v(i,j,k);
171  }
172  },
173  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
174  old_drho_w(i,j,k) = prev_zmom(i,j,k) - stage_zmom(i,j,k);
175  });
176 
177  const Array4<Real>& theta_extrap = extrap.array(mfi);
178 
179  ParallelFor(gbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
180  old_drho(i,j,k) = cur_cons(i,j,k,Rho_comp) - stage_cons(i,j,k,Rho_comp);
181  old_drho_theta(i,j,k) = cur_cons(i,j,k,RhoTheta_comp) - stage_cons(i,j,k,RhoTheta_comp);
182  if (step == 0) {
183  theta_extrap(i,j,k) = old_drho_theta(i,j,k);
184  } else {
185  theta_extrap(i,j,k) = old_drho_theta(i,j,k) + beta_d *
186  ( old_drho_theta(i,j,k) - lagged_delta_rt(i,j,k,RhoTheta_comp) );
187  }
188  });
189  } // mfi
190 
191 #ifdef _OPENMP
192 #pragma omp parallel if (Gpu::notInLaunchRegion())
193 #endif
194  for ( MFIter mfi(S_stage_data[IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
195  {
196  // We define lagged_delta_rt for our next step as the current delta_rt
197  Box gbx = mfi.tilebox(); gbx.grow(1);
198  const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
199  const Array4<Real>& lagged_delta_rt = S_scratch[IntVars::cons].array(mfi);
200  ParallelFor(gbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
201  lagged_delta_rt(i,j,k,RhoTheta_comp) = old_drho_theta(i,j,k);
202  });
203  } // mfi
204 
205  // *************************************************************************
206  // Define updates in the current RK stage
207  // *************************************************************************
208 
209 #ifdef _OPENMP
210 #pragma omp parallel if (Gpu::notInLaunchRegion())
211 #endif
212  for ( MFIter mfi(S_stage_data[IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
213  {
214  Box bx = mfi.validbox();
215  Box tbx = mfi.nodaltilebox(0);
216  Box tby = mfi.nodaltilebox(1);
217 
218  const Array4<Real const>& xmom_src_arr = xmom_src.const_array(mfi);
219  const Array4<Real const>& ymom_src_arr = ymom_src.const_array(mfi);
220 
221  const Array4<const Real> & stage_xmom = S_stage_data[IntVars::xmom].const_array(mfi);
222  const Array4<const Real> & stage_ymom = S_stage_data[IntVars::ymom].const_array(mfi);
223  const Array4<const Real> & qt_arr = qt.const_array(mfi);
224 
225  const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
226  const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
227 
228  const Array4<const Real>& slow_rhs_rho_u = S_slow_rhs[IntVars::xmom].const_array(mfi);
229  const Array4<const Real>& slow_rhs_rho_v = S_slow_rhs[IntVars::ymom].const_array(mfi);
230 
231  const Array4<Real>& new_drho_u = New_rho_u.array(mfi);
232  const Array4<Real>& new_drho_v = New_rho_v.array(mfi);
233 
234  const Array4<Real>& cur_xmom = S_data[IntVars::xmom].array(mfi);
235  const Array4<Real>& cur_ymom = S_data[IntVars::ymom].array(mfi);
236 
237  // These store the advection momenta which we will use to update the slow variables
238  const Array4<Real>& avg_xmom = S_scratch[IntVars::xmom].array(mfi);
239  const Array4<Real>& avg_ymom = S_scratch[IntVars::ymom].array(mfi);
240 
241  const Array4<const Real>& z_nd = z_phys_nd->const_array(mfi);
242 
243  const Array4<const Real>& pi_stage_ca = pi_stage.const_array(mfi);
244 
245  const Array4<Real>& theta_extrap = extrap.array(mfi);
246 
247  // Map factors
248  const Array4<const Real>& mf_ux = mapfac[MapFacType::u_x]->const_array(mfi);
249  const Array4<const Real>& mf_vy = mapfac[MapFacType::v_y]->const_array(mfi);
250 
251  // Create old_drho_u/v/w/theta = U'', V'', W'', Theta'' in the docs
252  // Note that we do the Copy and Subtract including one ghost cell
253  // so that we don't have to fill ghost cells of the new MultiFabs
254  // Initialize New_rho_u/v/w to Delta_rho_u/v/w so that
255  // the ghost cells in New_rho_u/v/w will match old_drho_u/v/w
256 
257  // *********************************************************************
258  // Define updates in the RHS of {x, y, z}-momentum equations
259  // *********************************************************************
260  {
261  BL_PROFILE("fast_rhs_xymom_T");
262 
263  const auto& bx_lo = lbound(bx);
264  const auto& bx_hi = ubound(bx);
265 
266  ParallelFor(tbx, tby,
267  [=] AMREX_GPU_DEVICE (int i, int j, int k)
268  {
269  // Add (negative) gradient of (rho theta) multiplied by lagged "pi"
270  Real met_h_xi = Compute_h_xi_AtIface (i, j, k, dxInv, z_nd);
271  Real met_h_zeta = Compute_h_zeta_AtIface(i, j, k, dxInv, z_nd);
272  Real gp_xi = (theta_extrap(i,j,k) - theta_extrap(i-1,j,k)) * dxi;
273  Real gp_zeta_on_iface = (k == 0) ?
274  0.5 * dzi * ( theta_extrap(i-1,j,k+1) + theta_extrap(i,j,k+1)
275  -theta_extrap(i-1,j,k ) - theta_extrap(i,j,k ) ) :
276  0.25 * dzi * ( theta_extrap(i-1,j,k+1) + theta_extrap(i,j,k+1)
277  -theta_extrap(i-1,j,k-1) - theta_extrap(i,j,k-1) );
278  Real gpx = gp_xi - (met_h_xi / met_h_zeta) * gp_zeta_on_iface;
279  gpx *= mf_ux(i,j,0);
280 
281  Real q = (l_use_moisture) ? 0.5 * (qt_arr(i,j,k) + qt_arr(i-1,j,k)) : 0.0;
282 
283  Real pi_c = 0.5 * (pi_stage_ca(i-1,j,k,0) + pi_stage_ca(i ,j,k,0));
284  Real fast_rhs_rho_u = -Gamma * R_d * pi_c * gpx / (1.0 + q);
285 
286  new_drho_u(i, j, k) = old_drho_u(i,j,k) + dtau * fast_rhs_rho_u
287  + dtau * slow_rhs_rho_u(i,j,k)
288  + dtau * xmom_src_arr(i,j,k);
289  if (k == bx_lo.z && k != domlo.z) {
290  new_drho_u(i,j,k-1) = new_drho_u(i,j,k);
291  } else if (k == bx_hi.z) {
292  new_drho_u(i,j,k+1) = new_drho_u(i,j,k);
293  }
294 
295  avg_xmom(i,j,k) += facinv*new_drho_u(i,j,k);
296 
297  cur_xmom(i,j,k) = stage_xmom(i,j,k) + new_drho_u(i,j,k);
298  },
299  [=] AMREX_GPU_DEVICE (int i, int j, int k)
300  {
301  // Add (negative) gradient of (rho theta) multiplied by lagged "pi"
302  Real met_h_eta = Compute_h_eta_AtJface(i, j, k, dxInv, z_nd);
303  Real met_h_zeta = Compute_h_zeta_AtJface(i, j, k, dxInv, z_nd);
304  Real gp_eta = (theta_extrap(i,j,k) -theta_extrap(i,j-1,k)) * dyi;
305  Real gp_zeta_on_jface = (k == 0) ?
306  0.5 * dzi * ( theta_extrap(i,j,k+1) + theta_extrap(i,j-1,k+1)
307  -theta_extrap(i,j,k ) - theta_extrap(i,j-1,k ) ) :
308  0.25 * dzi * ( theta_extrap(i,j,k+1) + theta_extrap(i,j-1,k+1)
309  -theta_extrap(i,j,k-1) - theta_extrap(i,j-1,k-1) );
310  Real gpy = gp_eta - (met_h_eta / met_h_zeta) * gp_zeta_on_jface;
311  gpy *= mf_vy(i,j,0);
312 
313  Real q = (l_use_moisture) ? 0.5 * (qt_arr(i,j,k) + qt_arr(i,j-1,k)) : 0.0;
314 
315  Real pi_c = 0.5 * (pi_stage_ca(i,j-1,k,0) + pi_stage_ca(i,j ,k,0));
316  Real fast_rhs_rho_v = -Gamma * R_d * pi_c * gpy / (1.0 + q);
317 
318  new_drho_v(i, j, k) = old_drho_v(i,j,k) + dtau * fast_rhs_rho_v
319  + dtau * slow_rhs_rho_v(i,j,k)
320  + dtau * ymom_src_arr(i,j,k);
321 
322  if (k == bx_lo.z && k != domlo.z) {
323  new_drho_v(i,j,k-1) = new_drho_v(i,j,k);
324  } else if (k == bx_hi.z) {
325  new_drho_v(i,j,k+1) = new_drho_v(i,j,k);
326  }
327 
328  avg_ymom(i,j,k) += facinv*new_drho_v(i,j,k);
329 
330  cur_ymom(i,j,k) = stage_ymom(i,j,k) + new_drho_v(i,j,k);
331  });
332  } // end profile
333  }
334 
335  MultiFab Omega(S_data[IntVars::zmom].boxArray(), dm, 1, 1);
336 
337 #ifdef _OPENMP
338 #pragma omp parallel if (Gpu::notInLaunchRegion())
339 #endif
340  {
341  std::array<FArrayBox,AMREX_SPACEDIM> flux;
342  for ( MFIter mfi(S_stage_data[IntVars::cons],TileNoZ()); mfi.isValid(); ++mfi)
343  {
344  Box bx = mfi.tilebox();
345  Box tbz = surroundingNodes(bx,2);
346 
347  Box vbx = mfi.validbox();
348  const auto& vbx_hi = ubound(vbx);
349 
350  const Array4<Real const>& zmom_src_arr = zmom_src.const_array(mfi);
351  const Array4<Real const>& cc_src_arr = cc_src.const_array(mfi);
352 
353  const Array4<const Real> & stage_zmom = S_stage_data[IntVars::zmom].const_array(mfi);
354  const Array4<const Real> & prim = S_stage_prim.const_array(mfi);
355  const Array4<const Real> & qt_arr = qt.const_array(mfi);
356 
357  const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
358  const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
359  const Array4<Real>& old_drho_w = Delta_rho_w.array(mfi);
360  const Array4<Real>& old_drho = Delta_rho.array(mfi);
361  const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
362 
363  const Array4<const Real>& slow_rhs_cons = S_slow_rhs[IntVars::cons].const_array(mfi);
364  const Array4<const Real>& slow_rhs_rho_w = S_slow_rhs[IntVars::zmom].const_array(mfi);
365 
366  const Array4<Real>& new_drho_u = New_rho_u.array(mfi);
367  const Array4<Real>& new_drho_v = New_rho_v.array(mfi);
368 
369  const Array4<Real>& cur_cons = S_data[IntVars::cons].array(mfi);
370  const Array4<Real>& cur_zmom = S_data[IntVars::zmom].array(mfi);
371 
372  // These store the advection momenta which we will use to update the slow variables
373  const Array4<Real>& avg_zmom = S_scratch[IntVars::zmom].array(mfi);
374 
375  const Array4<const Real>& z_nd = z_phys_nd->const_array(mfi);
376  const Array4<const Real>& detJ = detJ_cc->const_array(mfi);
377 
378  const Array4< Real>& omega_arr = Omega.array(mfi);
379 
380  // Map factors
381  const Array4<const Real>& mf_mx = mapfac[MapFacType::m_x]->const_array(mfi);
382  const Array4<const Real>& mf_my = mapfac[MapFacType::m_y]->const_array(mfi);
383  const Array4<const Real>& mf_ux = mapfac[MapFacType::u_x]->const_array(mfi);
384  const Array4<const Real>& mf_uy = mapfac[MapFacType::u_y]->const_array(mfi);
385  const Array4<const Real>& mf_vx = mapfac[MapFacType::v_x]->const_array(mfi);
386  const Array4<const Real>& mf_vy = mapfac[MapFacType::v_y]->const_array(mfi);
387 
388  // Create old_drho_u/v/w/theta = U'', V'', W'', Theta'' in the docs
389  // Note that we do the Copy and Subtract including one ghost cell
390  // so that we don't have to fill ghost cells of the new MultiFabs
391  // Initialize New_rho_u/v/w to Delta_rho_u/v/w so that
392  // the ghost cells in New_rho_u/v/w will match old_drho_u/v/w
393 
394  FArrayBox temp_rhs_fab;
395  FArrayBox RHS_fab;
396  FArrayBox soln_fab;
397 
398  RHS_fab.resize (tbz,1,The_Async_Arena());
399  soln_fab.resize (tbz,1,The_Async_Arena());
400  temp_rhs_fab.resize(tbz,2,The_Async_Arena());
401 
402  auto const& RHS_a = RHS_fab.array();
403  auto const& soln_a = soln_fab.array();
404  auto const& temp_rhs_arr = temp_rhs_fab.array();
405 
406  auto const& coeffA_a = coeff_A_mf.array(mfi);
407  auto const& inv_coeffB_a = inv_coeff_B_mf.array(mfi);
408  auto const& coeffC_a = coeff_C_mf.array(mfi);
409  auto const& coeffP_a = coeff_P_mf.array(mfi);
410  auto const& coeffQ_a = coeff_Q_mf.array(mfi);
411 
412  // *************************************************************************
413  // Define flux arrays for use in advection
414  // *************************************************************************
415  for (int dir = 0; dir < AMREX_SPACEDIM; ++dir) {
416  flux[dir].resize(surroundingNodes(bx,dir),2);
417  flux[dir].setVal<RunOn::Device>(0.);
418  }
419  const GpuArray<const Array4<Real>, AMREX_SPACEDIM>
420  flx_arr{{AMREX_D_DECL(flux[0].array(), flux[1].array(), flux[2].array())}};
421 
422  // *********************************************************************
423  {
424  BL_PROFILE("fast_T_making_rho_rhs");
425  ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
426  Real h_zeta_cc_xface_hi = 0.5 * dzi *
427  ( z_nd(i+1,j ,k+1) + z_nd(i+1,j+1,k+1)
428  -z_nd(i+1,j ,k ) - z_nd(i+1,j+1,k ) );
429 
430  Real h_zeta_cc_xface_lo = 0.5 * dzi *
431  ( z_nd(i ,j ,k+1) + z_nd(i ,j+1,k+1)
432  -z_nd(i ,j ,k ) - z_nd(i ,j+1,k ) );
433 
434  Real h_zeta_cc_yface_hi = 0.5 * dzi *
435  ( z_nd(i ,j+1,k+1) + z_nd(i+1,j+1,k+1)
436  -z_nd(i ,j+1,k ) - z_nd(i+1,j+1,k ) );
437 
438  Real h_zeta_cc_yface_lo = 0.5 * dzi *
439  ( z_nd(i ,j ,k+1) + z_nd(i+1,j ,k+1)
440  -z_nd(i ,j ,k ) - z_nd(i+1,j ,k ) );
441 
442  Real xflux_lo = new_drho_u(i ,j,k)*h_zeta_cc_xface_lo / mf_uy(i ,j,0);
443  Real xflux_hi = new_drho_u(i+1,j,k)*h_zeta_cc_xface_hi / mf_uy(i+1,j,0);
444  Real yflux_lo = new_drho_v(i,j ,k)*h_zeta_cc_yface_lo / mf_vx(i,j ,0);
445  Real yflux_hi = new_drho_v(i,j+1,k)*h_zeta_cc_yface_hi / mf_vx(i,j+1,0);
446 
447  Real mfsq = mf_mx(i,j,0) * mf_my(i,j,0);
448 
449  // NOTE: we are saving the (1/J) weighting for later when we add this to rho and theta
450  temp_rhs_arr(i,j,k,0) = ( xflux_hi - xflux_lo ) * dxi * mfsq +
451  ( yflux_hi - yflux_lo ) * dyi * mfsq;
452  temp_rhs_arr(i,j,k,1) = (( xflux_hi * (prim(i,j,k,0) + prim(i+1,j,k,0)) -
453  xflux_lo * (prim(i,j,k,0) + prim(i-1,j,k,0)) ) * dxi * mfsq+
454  ( yflux_hi * (prim(i,j,k,0) + prim(i,j+1,k,0)) -
455  yflux_lo * (prim(i,j,k,0) + prim(i,j-1,k,0)) ) * dyi * mfsq) * 0.5;
456 
457  if (l_reflux) {
458  (flx_arr[0])(i,j,k,0) = xflux_lo;
459  (flx_arr[0])(i,j,k,1) = (flx_arr[0])(i ,j,k,0) * 0.5 * (prim(i,j,k,0) + prim(i-1,j,k,0));
460 
461  (flx_arr[1])(i,j,k,0) = yflux_lo;
462  (flx_arr[1])(i,j,k,1) = (flx_arr[1])(i,j ,k,0) * 0.5 * (prim(i,j,k,0) + prim(i,j-1,k,0));
463 
464  if (i == vbx_hi.x) {
465  (flx_arr[0])(i+1,j,k,0) = xflux_hi;
466  (flx_arr[0])(i+1,j,k,1) = (flx_arr[0])(i+1,j,k,0) * 0.5 * (prim(i,j,k,0) + prim(i+1,j,k,0));
467  }
468  if (j == vbx_hi.y) {
469  (flx_arr[1])(i,j+1,k,0) = yflux_hi;
470  (flx_arr[1])(i,j+1,k,1) = (flx_arr[1])(i,j+1,k,0) * 0.5 * (prim(i,j,k,0) + prim(i,j+1,k,0));
471  }
472  }
473  });
474  } // end profile
475 
476  // *********************************************************************
477  {
478  Box gbxo = mfi.nodaltilebox(2);
479  Box gbxo_mid = gbxo;
480 
481  if (gbxo.smallEnd(2) == domlo.z) {
482  Box gbxo_lo = gbxo; gbxo_lo.setBig(2,gbxo.smallEnd(2));
483  gbxo_mid.setSmall(2,gbxo.smallEnd(2)+1);
484  ParallelFor(gbxo_lo, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
485  omega_arr(i,j,k) = 0.;
486  });
487  }
488  if (gbxo.bigEnd(2) == domhi.z+1) {
489  Box gbxo_hi = gbxo; gbxo_hi.setSmall(2,gbxo.bigEnd(2));
490  gbxo_mid.setBig(2,gbxo.bigEnd(2)-1);
491  ParallelFor(gbxo_hi, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
492  omega_arr(i,j,k) = old_drho_w(i,j,k);
493  });
494  }
495  ParallelFor(gbxo_mid, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
496  omega_arr(i,j,k) = OmegaFromW(i,j,k,old_drho_w(i,j,k),
497  old_drho_u,old_drho_v,
498  mf_ux,mf_vy,z_nd,dxInv);
499  });
500  } // end profile
501  // *********************************************************************
502 
503  Box bx_shrunk_in_k = bx;
504  int klo = tbz.smallEnd(2);
505  int khi = tbz.bigEnd(2);
506  bx_shrunk_in_k.setSmall(2,klo+1);
507  bx_shrunk_in_k.setBig(2,khi-1);
508 
509  // Note that the notes use "g" to mean the magnitude of gravity, so it is positive
510  // We set grav_gpu[2] to be the vector component which is negative
511  // We define halfg to match the notes (which is why we take the absolute value)
512  Real halfg = std::abs(0.5 * grav_gpu[2]);
513 
514  {
515  BL_PROFILE("fast_loop_on_shrunk_t");
516  //Note we don't act on the bottom or top boundaries of the domain
517  ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (int i, int j, int k)
518  {
519  Real detJ_on_kface = 0.5 * (detJ(i,j,k) + detJ(i,j,k-1));
520 
521  Real q = (l_use_moisture) ? 0.5 * (qt_arr(i,j,k) + qt_arr(i,j,k-1)) : 0.0;
522 
523  Real coeff_P = coeffP_a(i,j,k) / (1.0 + q);
524  Real coeff_Q = coeffQ_a(i,j,k) / (1.0 + q);
525 
526  Real theta_t_lo = 0.5 * ( prim(i,j,k-2,PrimTheta_comp) + prim(i,j,k-1,PrimTheta_comp) );
527  Real theta_t_mid = 0.5 * ( prim(i,j,k-1,PrimTheta_comp) + prim(i,j,k ,PrimTheta_comp) );
528  Real theta_t_hi = 0.5 * ( prim(i,j,k ,PrimTheta_comp) + prim(i,j,k+1,PrimTheta_comp) );
529 
530  // line 2 last two terms (order dtau)
531  Real R0_tmp = (-halfg * old_drho(i,j,k ) + coeff_P * old_drho_theta(i,j,k )) * detJ(i,j,k )
532  + (-halfg * old_drho(i,j,k-1) + coeff_Q * old_drho_theta(i,j,k-1)) * detJ(i,j,k-1);
533 
534  // line 3 residuals (order dtau^2) 1.0 <-> beta_2
535  Real R1_tmp = - halfg * ( slow_rhs_cons(i,j,k ,Rho_comp ) * detJ(i,j,k ) +
536  slow_rhs_cons(i,j,k-1,Rho_comp ) * detJ(i,j,k-1) )
537  + ( coeff_P * slow_rhs_cons(i,j,k ,RhoTheta_comp) * detJ(i,j,k ) +
538  coeff_Q * slow_rhs_cons(i,j,k-1,RhoTheta_comp) * detJ(i,j,k-1) );
539 
540  Real Omega_kp1 = omega_arr(i,j,k+1);
541  Real Omega_k = omega_arr(i,j,k );
542  Real Omega_km1 = omega_arr(i,j,k-1);
543 
544  // consolidate lines 4&5 (order dtau^2)
545  R1_tmp += ( halfg ) *
546  ( beta_1 * dzi * (Omega_kp1 - Omega_km1) + temp_rhs_arr(i,j,k,Rho_comp) + temp_rhs_arr(i,j,k-1,Rho_comp));
547 
548  // consolidate lines 6&7 (order dtau^2)
549  R1_tmp += -(
550  coeff_P * ( beta_1 * dzi * (Omega_kp1*theta_t_hi - Omega_k*theta_t_mid) + temp_rhs_arr(i,j,k ,RhoTheta_comp) ) +
551  coeff_Q * ( beta_1 * dzi * (Omega_k*theta_t_mid - Omega_km1*theta_t_lo) + temp_rhs_arr(i,j,k-1,RhoTheta_comp) ) );
552 
553  // line 1
554  RHS_a(i,j,k) = detJ_on_kface * old_drho_w(i,j,k) + dtau * (
555  detJ_on_kface * (slow_rhs_rho_w(i,j,k)+zmom_src_arr(i,j,k)) + R0_tmp + dtau*beta_2*R1_tmp);
556 
557  // We cannot use omega_arr here since that was built with old_rho_u and old_rho_v ...
558  RHS_a(i,j,k) += detJ_on_kface * OmegaFromW(i,j,k,0.,
559  new_drho_u,new_drho_v,
560  mf_ux,mf_vy,z_nd,dxInv);
561  });
562  } // end profile
563 
564  Box b2d = tbz; // Copy constructor
565  b2d.setRange(2,0);
566 
567  auto const lo = lbound(bx);
568  auto const hi = ubound(bx);
569 
570  {
571  BL_PROFILE("fast_rhs_b2d_loop_t");
572 #ifdef AMREX_USE_GPU
573  ParallelFor(b2d, [=] AMREX_GPU_DEVICE (int i, int j, int)
574  {
575  // w_klo, w_khi given by specified Dirichlet values
576  RHS_a(i,j,lo.z ) = dtau * (slow_rhs_rho_w(i,j,lo.z) + zmom_src_arr(i,j,lo.z));
577  RHS_a(i,j,hi.z+1) = dtau * (slow_rhs_rho_w(i,j,hi.z+1) + zmom_src_arr(i,j,hi.z+1));
578 
579  // w = specified Dirichlet value at k = lo.z
580  soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
581 
582  for (int k = lo.z+1; k <= hi.z+1; k++) {
583  soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
584  }
585 
586  cur_zmom(i,j,lo.z ) = stage_zmom(i,j,lo.z ) + soln_a(i,j,lo.z );
587  cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
588 
589  for (int k = hi.z; k >= lo.z; k--) {
590  soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) *soln_a(i,j,k+1);
591  }
592  });
593 #else
594  for (int j = lo.y; j <= hi.y; ++j) {
595  AMREX_PRAGMA_SIMD
596  for (int i = lo.x; i <= hi.x; ++i)
597  {
598  RHS_a(i,j,lo.z) = dtau * (slow_rhs_rho_w(i,j,lo.z) + zmom_src_arr(i,j,lo.z));
599  soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
600  }
601 
602  AMREX_PRAGMA_SIMD
603  for (int i = lo.x; i <= hi.x; ++i)
604  {
605  RHS_a(i,j,hi.z+1) = dtau * (slow_rhs_rho_w(i,j,hi.z+1) + zmom_src_arr(i,j,hi.z+1));
606  soln_a(i,j,hi.z+1) = RHS_a(i,j,hi.z+1) * inv_coeffB_a(i,j,hi.z+1);
607  }
608  }
609 
610  for (int k = lo.z+1; k <= hi.z; ++k) {
611  for (int j = lo.y; j <= hi.y; ++j) {
612  AMREX_PRAGMA_SIMD
613  for (int i = lo.x; i <= hi.x; ++i) {
614  soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
615  }
616  }
617  }
618  for (int k = hi.z; k > lo.z; --k) {
619  for (int j = lo.y; j <= hi.y; ++j) {
620  AMREX_PRAGMA_SIMD
621  for (int i = lo.x; i <= hi.x; ++i) {
622  soln_a(i,j,k) -= (coeffC_a(i,j,k) * inv_coeffB_a(i,j,k)) * soln_a(i,j,k+1);
623  }
624  }
625  }
626  if (hi.z == domhi.z) {
627  for (int j = lo.y; j <= hi.y; ++j) {
628  AMREX_PRAGMA_SIMD
629  for (int i = lo.x; i <= hi.x; ++i) {
630  cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
631  }
632  }
633  }
634 #endif
635  } // end profile
636 
637  ParallelFor(tbz, [=] AMREX_GPU_DEVICE (int i, int j, int k)
638  {
639  cur_zmom(i,j,k) = stage_zmom(i,j,k);
640  });
641 
642  if (lo.z == domlo.z) {
643  tbz.setSmall(2,domlo.z+1);
644  }
645  if (hi.z == domhi.z) {
646  tbz.setBig(2,domhi.z);
647  }
648  ParallelFor(tbz, [=] AMREX_GPU_DEVICE (int i, int j, int k)
649  {
650  Real wpp = WFromOmega(i,j,k,soln_a(i,j,k),
651  new_drho_u,new_drho_v,
652  mf_ux,mf_vy,z_nd,dxInv);
653  cur_zmom(i,j,k) += wpp;
654  });
655 
656  // **************************************************************************
657  // Define updates in the RHS of rho and (rho theta)
658  // **************************************************************************
659  {
660  BL_PROFILE("fast_rho_final_update");
661  ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
662  {
663  Real zflux_lo = beta_2 * soln_a(i,j,k ) + beta_1 * omega_arr(i,j,k);
664  Real zflux_hi = beta_2 * soln_a(i,j,k+1) + beta_1 * omega_arr(i,j,k+1);
665 
666  // Note that in the solve we effectively impose new_drho_w(i,j,vbx_hi.z+1)=0
667  // so we don't update avg_zmom at k=vbx_hi.z+1
668  avg_zmom(i,j,k) += facinv*zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0));
669  if (l_reflux) {
670  (flx_arr[2])(i,j,k,0) = zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0));
671  }
672 
673  if (k == vbx_hi.z) {
674  avg_zmom(i,j,k+1) += facinv * zflux_hi / (mf_mx(i,j,0) * mf_my(i,j,0));
675  if (l_reflux) {
676  (flx_arr[2])(i,j,k+1,0) = zflux_hi / (mf_mx(i,j,0) * mf_my(i,j,0));
677  (flx_arr[2])(i,j,k+1,1) = (flx_arr[2])(i,j,k+1,0) * 0.5 * (prim(i,j,k) + prim(i,j,k+1));
678  }
679  }
680 
681  Real fast_rhs_rho = -(temp_rhs_arr(i,j,k,0) + ( zflux_hi - zflux_lo ) * dzi) / detJ(i,j,k);
682  cur_cons(i,j,k,0) += dtau * (slow_rhs_cons(i,j,k,0) + fast_rhs_rho);
683 
684  Real fast_rhs_rhotheta = -( temp_rhs_arr(i,j,k,1) + 0.5 *
685  ( zflux_hi * (prim(i,j,k) + prim(i,j,k+1)) -
686  zflux_lo * (prim(i,j,k) + prim(i,j,k-1)) ) * dzi ) / detJ(i,j,k);
687 
688  cur_cons(i,j,k,1) += dtau * (slow_rhs_cons(i,j,k,1) + fast_rhs_rhotheta);
689 
690  if (l_reflux) {
691  (flx_arr[2])(i,j,k,1) = (flx_arr[2])(i,j,k,0) * 0.5 * (prim(i,j,k) + prim(i,j,k-1));
692  }
693 
694  // add in source terms for cell-centered conserved variables
695  cur_cons(i,j,k,Rho_comp) += dtau * cc_src_arr(i,j,k,Rho_comp);
696  cur_cons(i,j,k,RhoTheta_comp) += dtau * cc_src_arr(i,j,k,RhoTheta_comp);
697  });
698  } // end profile
699 
700  // We only add to the flux registers in the final RK step
701  if (l_reflux) {
702  int strt_comp_reflux = 0;
703  // For now we don't reflux (rho theta) because it seems to create issues at c/f boundaries
704  int num_comp_reflux = 1;
705  if (level < finest_level) {
706  fr_as_crse->CrseAdd(mfi,
707  {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
708  dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
709  }
710  if (level > 0) {
711  fr_as_fine->FineAdd(mfi,
712  {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
713  dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
714  }
715 
716  // This is necessary here so we don't go on to the next FArrayBox without
717  // having finished copying the fluxes into the FluxRegisters (since the fluxes
718  // are stored in temporary FArrayBox's)
719  Gpu::streamSynchronize();
720 
721  } // two-way coupling
722  } // mfi
723  } // OMP
724 }
constexpr amrex::Real R_d
Definition: ERF_Constants.H:10
constexpr amrex::Real Gamma
Definition: ERF_Constants.H:19
@ v_x
Definition: ERF_DataStruct.H:22
@ u_y
Definition: ERF_DataStruct.H:23
@ v_y
Definition: ERF_DataStruct.H:23
@ m_y
Definition: ERF_DataStruct.H:23
@ u_x
Definition: ERF_DataStruct.H:22
@ m_x
Definition: ERF_DataStruct.H:22
#define Rho_comp
Definition: ERF_IndexDefines.H:36
#define RhoTheta_comp
Definition: ERF_IndexDefines.H:37
#define PrimTheta_comp
Definition: ERF_IndexDefines.H:50
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real OmegaFromW(int &i, int &j, int &k, amrex::Real w, const amrex::Array4< const amrex::Real > &u_arr, const amrex::Array4< const amrex::Real > &v_arr, const amrex::Array4< const amrex::Real > &mf_u, const amrex::Array4< const amrex::Real > &mf_v, const amrex::Array4< const amrex::Real > &z_nd, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &dxInv)
Definition: ERF_TerrainMetrics.H:415
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_xi_AtIface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:110
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_zeta_AtIface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:96
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_zeta_AtJface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:139
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_eta_AtJface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:167
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real WFromOmega(int &i, int &j, int &k, amrex::Real omega, const amrex::Array4< const amrex::Real > &u_arr, const amrex::Array4< const amrex::Real > &v_arr, const amrex::Array4< const amrex::Real > &mf_u, const amrex::Array4< const amrex::Real > &mf_v, const amrex::Array4< const amrex::Real > &z_nd, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &dxInv)
Definition: ERF_TerrainMetrics.H:465
AMREX_FORCE_INLINE amrex::IntVect TileNoZ()
Definition: ERF_TileNoZ.H:11
@ gpy
Definition: ERF_IndexDefines.H:151
@ gpx
Definition: ERF_IndexDefines.H:150
@ ymom
Definition: ERF_IndexDefines.H:160
@ cons
Definition: ERF_IndexDefines.H:158
@ zmom
Definition: ERF_IndexDefines.H:161
@ xmom
Definition: ERF_IndexDefines.H:159
@ qt
Definition: ERF_Kessler.H:27
Here is the call graph for this function: