ERF
Energy Research and Forecasting: An Atmospheric Modeling Code
ERF_FastRhs_T.cpp File Reference
Include dependency graph for ERF_FastRhs_T.cpp:

Functions

void erf_fast_rhs_T (int step, int nrk, int level, int finest_level, Vector< MultiFab > &S_slow_rhs, const Vector< MultiFab > &S_prev, Vector< MultiFab > &S_stage_data, const MultiFab &S_stage_prim, const MultiFab &pi_stage, const MultiFab &fast_coeffs, Vector< MultiFab > &S_data, Vector< MultiFab > &S_scratch, const Geometry geom, const Real gravity, std::unique_ptr< MultiFab > &z_phys_nd, std::unique_ptr< MultiFab > &detJ_cc, const Real dtau, const Real beta_s, const Real facinv, std::unique_ptr< MultiFab > &mapfac_m, std::unique_ptr< MultiFab > &mapfac_u, std::unique_ptr< MultiFab > &mapfac_v, YAFluxRegister *fr_as_crse, YAFluxRegister *fr_as_fine, bool l_use_moisture, bool l_reflux, bool)
 

Function Documentation

◆ erf_fast_rhs_T()

void erf_fast_rhs_T ( int  step,
int  nrk,
int  level,
int  finest_level,
Vector< MultiFab > &  S_slow_rhs,
const Vector< MultiFab > &  S_prev,
Vector< MultiFab > &  S_stage_data,
const MultiFab &  S_stage_prim,
const MultiFab &  pi_stage,
const MultiFab &  fast_coeffs,
Vector< MultiFab > &  S_data,
Vector< MultiFab > &  S_scratch,
const Geometry  geom,
const Real  gravity,
std::unique_ptr< MultiFab > &  z_phys_nd,
std::unique_ptr< MultiFab > &  detJ_cc,
const Real  dtau,
const Real  beta_s,
const Real  facinv,
std::unique_ptr< MultiFab > &  mapfac_m,
std::unique_ptr< MultiFab > &  mapfac_u,
std::unique_ptr< MultiFab > &  mapfac_v,
YAFluxRegister *  fr_as_crse,
YAFluxRegister *  fr_as_fine,
bool  l_use_moisture,
bool  l_reflux,
bool   
)

Function for computing the fast RHS with fixed terrain

Parameters
[in]stepwhich fast time step within each Runge-Kutta step
[in]nrkwhich Runge-Kutta step
[in]levellevel of resolution
[in]finest_levelfinest level of resolution
[in]S_slow_rhsslow RHS computed in erf_slow_rhs_pre
[in]S_prevprevious solution
[in]S_stage_datasolution at previous RK stage
[in]S_stage_primprimitive variables at previous RK stage
[in]pi_stageExner function at previous RK stage
[in]fast_coeffscoefficients for the tridiagonal solve used in the fast integrator
[out]S_datacurrent solution
[in]S_scratchscratch space
[in]geomcontainer for geometric information
[in]gravitymagnitude of gravity
[in]z_phys_ndheight coordinate at nodes
[in]detJ_ccJacobian of the metric transformation
[in]dtaufast time step
[in]beta_sCoefficient which determines how implicit vs explicit the solve is
[in]facinvinverse factor for time-averaging the momenta
[in]mapfac_mmap factor at cell centers
[in]mapfac_umap factor at x-faces
[in]mapfac_vmap factor at y-faces
[in,out]fr_as_crseYAFluxRegister at level l at level l / l+1 interface
[in,out]fr_as_fineYAFluxRegister at level l at level l-1 / l interface
[in]l_use_moisture
[in]l_refluxshould we add fluxes to the FluxRegisters?
[in]l_implicit_substepping
62 {
63  BL_PROFILE_REGION("erf_fast_rhs_T()");
64 
65  const Box& domain = geom.Domain();
66  auto const domlo = lbound(domain);
67  auto const domhi = ubound(domain);
68 
69  Real beta_1 = 0.5 * (1.0 - beta_s); // multiplies explicit terms
70  Real beta_2 = 0.5 * (1.0 + beta_s); // multiplies implicit terms
71 
72  // How much do we project forward the (rho theta) that is used in the horizontal momentum equations
73  Real beta_d = 0.1;
74 
75  const Real* dx = geom.CellSize();
76  const GpuArray<Real, AMREX_SPACEDIM> dxInv = geom.InvCellSizeArray();
77 
78  Real dxi = dxInv[0];
79  Real dyi = dxInv[1];
80  Real dzi = dxInv[2];
81  const auto& ba = S_stage_data[IntVars::cons].boxArray();
82  const auto& dm = S_stage_data[IntVars::cons].DistributionMap();
83 
84  MultiFab Delta_rho_u( convert(ba,IntVect(1,0,0)), dm, 1, 1);
85  MultiFab Delta_rho_v( convert(ba,IntVect(0,1,0)), dm, 1, 1);
86  MultiFab Delta_rho_w( convert(ba,IntVect(0,0,1)), dm, 1, IntVect(1,1,0));
87  MultiFab Delta_rho ( ba , dm, 1, 1);
88  MultiFab Delta_rho_theta( ba , dm, 1, 1);
89 
90  MultiFab New_rho_u(convert(ba,IntVect(1,0,0)), dm, 1, 1);
91  MultiFab New_rho_v(convert(ba,IntVect(0,1,0)), dm, 1, 1);
92 
93  MultiFab coeff_A_mf(fast_coeffs, make_alias, 0, 1);
94  MultiFab inv_coeff_B_mf(fast_coeffs, make_alias, 1, 1);
95  MultiFab coeff_C_mf(fast_coeffs, make_alias, 2, 1);
96  MultiFab coeff_P_mf(fast_coeffs, make_alias, 3, 1);
97  MultiFab coeff_Q_mf(fast_coeffs, make_alias, 4, 1);
98 
99  // *************************************************************************
100  // Set gravity as a vector
101  const Array<Real,AMREX_SPACEDIM> grav{0.0, 0.0, -gravity};
102  const GpuArray<Real,AMREX_SPACEDIM> grav_gpu{grav[0], grav[1], grav[2]};
103 
104  MultiFab extrap(S_data[IntVars::cons].boxArray(),S_data[IntVars::cons].DistributionMap(),1,1);
105 
106  // *************************************************************************
107  // First set up some arrays we'll need
108  // *************************************************************************
109 
110 #ifdef _OPENMP
111 #pragma omp parallel if (Gpu::notInLaunchRegion())
112 #endif
113  for ( MFIter mfi(S_stage_data[IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
114  {
115  const Array4<Real> & cur_cons = S_data[IntVars::cons].array(mfi);
116  const Array4<const Real>& prev_cons = S_prev[IntVars::cons].const_array(mfi);
117  const Array4<const Real>& stage_cons = S_stage_data[IntVars::cons].const_array(mfi);
118  const Array4<Real>& lagged_delta_rt = S_scratch[IntVars::cons].array(mfi);
119 
120  const Array4<Real>& old_drho = Delta_rho.array(mfi);
121  const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
122  const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
123  const Array4<Real>& old_drho_w = Delta_rho_w.array(mfi);
124  const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
125 
126  const Array4<const Real>& prev_xmom = S_prev[IntVars::xmom].const_array(mfi);
127  const Array4<const Real>& prev_ymom = S_prev[IntVars::ymom].const_array(mfi);
128  const Array4<const Real>& prev_zmom = S_prev[IntVars::zmom].const_array(mfi);
129 
130  const Array4<const Real>& stage_xmom = S_stage_data[IntVars::xmom].const_array(mfi);
131  const Array4<const Real>& stage_ymom = S_stage_data[IntVars::ymom].const_array(mfi);
132  const Array4<const Real>& stage_zmom = S_stage_data[IntVars::zmom].const_array(mfi);
133 
134  Box bx = mfi.validbox();
135  Box gbx = mfi.tilebox(); gbx.grow(1);
136 
137  if (step == 0) {
138  ParallelFor(gbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
139  cur_cons(i,j,k,Rho_comp) = prev_cons(i,j,k,Rho_comp);
140  cur_cons(i,j,k,RhoTheta_comp) = prev_cons(i,j,k,RhoTheta_comp);
141  });
142  } // step = 0
143 
144  Box gtbx = mfi.nodaltilebox(0); gtbx.grow(IntVect(1,1,0));
145  Box gtby = mfi.nodaltilebox(1); gtby.grow(IntVect(1,1,0));
146  Box gtbz = mfi.nodaltilebox(2); gtbz.grow(IntVect(1,1,0));
147 
148  const auto& bx_lo = lbound(bx);
149  const auto& bx_hi = ubound(bx);
150 
151  ParallelFor(gtbx, gtby, gtbz,
152  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
153  old_drho_u(i,j,k) = prev_xmom(i,j,k) - stage_xmom(i,j,k);
154  if (k == bx_lo.z && k != domlo.z) {
155  old_drho_u(i,j,k-1) = old_drho_u(i,j,k);
156  } else if (k == bx_hi.z) {
157  old_drho_u(i,j,k+1) = old_drho_u(i,j,k);
158  }
159  },
160  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
161  old_drho_v(i,j,k) = prev_ymom(i,j,k) - stage_ymom(i,j,k);
162  if (k == bx_lo.z && k != domlo.z) {
163  old_drho_v(i,j,k-1) = old_drho_v(i,j,k);
164  } else if (k == bx_hi.z) {
165  old_drho_v(i,j,k+1) = old_drho_v(i,j,k);
166  }
167  },
168  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
169  old_drho_w(i,j,k) = prev_zmom(i,j,k) - stage_zmom(i,j,k);
170  });
171 
172  const Array4<Real>& theta_extrap = extrap.array(mfi);
173 
174  ParallelFor(gbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
175  old_drho(i,j,k) = cur_cons(i,j,k,Rho_comp) - stage_cons(i,j,k,Rho_comp);
176  old_drho_theta(i,j,k) = cur_cons(i,j,k,RhoTheta_comp) - stage_cons(i,j,k,RhoTheta_comp);
177  if (step == 0) {
178  theta_extrap(i,j,k) = old_drho_theta(i,j,k);
179  } else {
180  theta_extrap(i,j,k) = old_drho_theta(i,j,k) + beta_d *
181  ( old_drho_theta(i,j,k) - lagged_delta_rt(i,j,k,RhoTheta_comp) );
182  }
183  });
184  } // mfi
185 
186 #ifdef _OPENMP
187 #pragma omp parallel if (Gpu::notInLaunchRegion())
188 #endif
189  for ( MFIter mfi(S_stage_data[IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
190  {
191  // We define lagged_delta_rt for our next step as the current delta_rt
192  Box gbx = mfi.tilebox(); gbx.grow(1);
193  const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
194  const Array4<Real>& lagged_delta_rt = S_scratch[IntVars::cons].array(mfi);
195  ParallelFor(gbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
196  lagged_delta_rt(i,j,k,RhoTheta_comp) = old_drho_theta(i,j,k);
197  });
198  } // mfi
199 
200  // *************************************************************************
201  // Define updates in the current RK stage
202  // *************************************************************************
203 
204 #ifdef _OPENMP
205 #pragma omp parallel if (Gpu::notInLaunchRegion())
206 #endif
207  for ( MFIter mfi(S_stage_data[IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
208  {
209  Box bx = mfi.validbox();
210  Box tbx = mfi.nodaltilebox(0);
211  Box tby = mfi.nodaltilebox(1);
212 
213  const Array4<const Real> & stage_xmom = S_stage_data[IntVars::xmom].const_array(mfi);
214  const Array4<const Real> & stage_ymom = S_stage_data[IntVars::ymom].const_array(mfi);
215  const Array4<const Real> & prim = S_stage_prim.const_array(mfi);
216 
217  const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
218  const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
219 
220  const Array4<const Real>& slow_rhs_rho_u = S_slow_rhs[IntVars::xmom].const_array(mfi);
221  const Array4<const Real>& slow_rhs_rho_v = S_slow_rhs[IntVars::ymom].const_array(mfi);
222 
223  const Array4<Real>& new_drho_u = New_rho_u.array(mfi);
224  const Array4<Real>& new_drho_v = New_rho_v.array(mfi);
225 
226  const Array4<Real>& cur_xmom = S_data[IntVars::xmom].array(mfi);
227  const Array4<Real>& cur_ymom = S_data[IntVars::ymom].array(mfi);
228 
229  // These store the advection momenta which we will use to update the slow variables
230  const Array4<Real>& avg_xmom = S_scratch[IntVars::xmom].array(mfi);
231  const Array4<Real>& avg_ymom = S_scratch[IntVars::ymom].array(mfi);
232 
233  const Array4<const Real>& z_nd = z_phys_nd->const_array(mfi);
234 
235  const Array4<const Real>& pi_stage_ca = pi_stage.const_array(mfi);
236 
237  const Array4<Real>& theta_extrap = extrap.array(mfi);
238 
239  // Map factors
240  const Array4<const Real>& mf_u = mapfac_u->const_array(mfi);
241  const Array4<const Real>& mf_v = mapfac_v->const_array(mfi);
242 
243  // Create old_drho_u/v/w/theta = U'', V'', W'', Theta'' in the docs
244  // Note that we do the Copy and Subtract including one ghost cell
245  // so that we don't have to fill ghost cells of the new MultiFabs
246  // Initialize New_rho_u/v/w to Delta_rho_u/v/w so that
247  // the ghost cells in New_rho_u/v/w will match old_drho_u/v/w
248 
249  // *********************************************************************
250  // Define updates in the RHS of {x, y, z}-momentum equations
251  // *********************************************************************
252  {
253  BL_PROFILE("fast_rhs_xymom_T");
254 
255  const auto& bx_lo = lbound(bx);
256  const auto& bx_hi = ubound(bx);
257 
258  ParallelFor(tbx, tby,
259  [=] AMREX_GPU_DEVICE (int i, int j, int k)
260  {
261  // Add (negative) gradient of (rho theta) multiplied by lagged "pi"
262  Real met_h_xi = Compute_h_xi_AtIface (i, j, k, dxInv, z_nd);
263  Real met_h_zeta = Compute_h_zeta_AtIface(i, j, k, dxInv, z_nd);
264  Real gp_xi = (theta_extrap(i,j,k) - theta_extrap(i-1,j,k)) * dxi;
265  Real gp_zeta_on_iface = (k == 0) ?
266  0.5 * dzi * ( theta_extrap(i-1,j,k+1) + theta_extrap(i,j,k+1)
267  -theta_extrap(i-1,j,k ) - theta_extrap(i,j,k ) ) :
268  0.25 * dzi * ( theta_extrap(i-1,j,k+1) + theta_extrap(i,j,k+1)
269  -theta_extrap(i-1,j,k-1) - theta_extrap(i,j,k-1) );
270  Real gpx = gp_xi - (met_h_xi / met_h_zeta) * gp_zeta_on_iface;
271  gpx *= mf_u(i,j,0);
272 
273  if (l_use_moisture) {
274  Real q = 0.5 * ( prim(i,j,k,PrimQ1_comp) + prim(i-1,j,k,PrimQ1_comp)
275  +prim(i,j,k,PrimQ2_comp) + prim(i-1,j,k,PrimQ2_comp) );
276  gpx /= (1.0 + q);
277  }
278 
279  Real pi_c = 0.5 * (pi_stage_ca(i-1,j,k,0) + pi_stage_ca(i ,j,k,0));
280  Real fast_rhs_rho_u = -Gamma * R_d * pi_c * gpx;
281 
282  new_drho_u(i, j, k) = old_drho_u(i,j,k) + dtau * fast_rhs_rho_u
283  + dtau * slow_rhs_rho_u(i,j,k);
284  if (k == bx_lo.z && k != domlo.z) {
285  new_drho_u(i,j,k-1) = new_drho_u(i,j,k);
286  } else if (k == bx_hi.z) {
287  new_drho_u(i,j,k+1) = new_drho_u(i,j,k);
288  }
289 
290  avg_xmom(i,j,k) += facinv*new_drho_u(i,j,k);
291 
292  cur_xmom(i,j,k) = stage_xmom(i,j,k) + new_drho_u(i,j,k);
293  },
294  [=] AMREX_GPU_DEVICE (int i, int j, int k)
295  {
296  // Add (negative) gradient of (rho theta) multiplied by lagged "pi"
297  Real met_h_eta = Compute_h_eta_AtJface(i, j, k, dxInv, z_nd);
298  Real met_h_zeta = Compute_h_zeta_AtJface(i, j, k, dxInv, z_nd);
299  Real gp_eta = (theta_extrap(i,j,k) -theta_extrap(i,j-1,k)) * dyi;
300  Real gp_zeta_on_jface = (k == 0) ?
301  0.5 * dzi * ( theta_extrap(i,j,k+1) + theta_extrap(i,j-1,k+1)
302  -theta_extrap(i,j,k ) - theta_extrap(i,j-1,k ) ) :
303  0.25 * dzi * ( theta_extrap(i,j,k+1) + theta_extrap(i,j-1,k+1)
304  -theta_extrap(i,j,k-1) - theta_extrap(i,j-1,k-1) );
305  Real gpy = gp_eta - (met_h_eta / met_h_zeta) * gp_zeta_on_jface;
306  gpy *= mf_v(i,j,0);
307 
308  if (l_use_moisture) {
309  Real q = 0.5 * ( prim(i,j,k,PrimQ1_comp) + prim(i,j-1,k,PrimQ1_comp)
310  +prim(i,j,k,PrimQ2_comp) + prim(i,j-1,k,PrimQ2_comp) );
311  gpy /= (1.0 + q);
312  }
313 
314  Real pi_c = 0.5 * (pi_stage_ca(i,j-1,k,0) + pi_stage_ca(i,j ,k,0));
315  Real fast_rhs_rho_v = -Gamma * R_d * pi_c * gpy;
316 
317  new_drho_v(i, j, k) = old_drho_v(i,j,k) + dtau * fast_rhs_rho_v
318  + dtau * slow_rhs_rho_v(i,j,k);
319 
320  if (k == bx_lo.z && k != domlo.z) {
321  new_drho_v(i,j,k-1) = new_drho_v(i,j,k);
322  } else if (k == bx_hi.z) {
323  new_drho_v(i,j,k+1) = new_drho_v(i,j,k);
324  }
325 
326  avg_ymom(i,j,k) += facinv*new_drho_v(i,j,k);
327 
328  cur_ymom(i,j,k) = stage_ymom(i,j,k) + new_drho_v(i,j,k);
329  });
330  } // end profile
331  }
332 
333  MultiFab Omega(S_data[IntVars::zmom].boxArray(), dm, 1, 1);
334 
335 #ifdef _OPENMP
336 #pragma omp parallel if (Gpu::notInLaunchRegion())
337 #endif
338  {
339  std::array<FArrayBox,AMREX_SPACEDIM> flux;
340  for ( MFIter mfi(S_stage_data[IntVars::cons],TileNoZ()); mfi.isValid(); ++mfi)
341  {
342  Box bx = mfi.tilebox();
343  Box tbz = surroundingNodes(bx,2);
344 
345  Box vbx = mfi.validbox();
346  const auto& vbx_hi = ubound(vbx);
347 
348  const Array4<const Real> & stage_zmom = S_stage_data[IntVars::zmom].const_array(mfi);
349  const Array4<const Real> & prim = S_stage_prim.const_array(mfi);
350 
351  const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
352  const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
353  const Array4<Real>& old_drho_w = Delta_rho_w.array(mfi);
354  const Array4<Real>& old_drho = Delta_rho.array(mfi);
355  const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
356 
357  const Array4<const Real>& slow_rhs_cons = S_slow_rhs[IntVars::cons].const_array(mfi);
358  const Array4<const Real>& slow_rhs_rho_w = S_slow_rhs[IntVars::zmom].const_array(mfi);
359 
360  const Array4<Real>& new_drho_u = New_rho_u.array(mfi);
361  const Array4<Real>& new_drho_v = New_rho_v.array(mfi);
362 
363  const Array4<Real>& cur_cons = S_data[IntVars::cons].array(mfi);
364  const Array4<Real>& cur_zmom = S_data[IntVars::zmom].array(mfi);
365 
366  // These store the advection momenta which we will use to update the slow variables
367  const Array4<Real>& avg_zmom = S_scratch[IntVars::zmom].array(mfi);
368 
369  const Array4<const Real>& z_nd = z_phys_nd->const_array(mfi);
370  const Array4<const Real>& detJ = detJ_cc->const_array(mfi);
371 
372  const Array4< Real>& omega_arr = Omega.array(mfi);
373 
374  // Map factors
375  const Array4<const Real>& mf_m = mapfac_m->const_array(mfi);
376  const Array4<const Real>& mf_u = mapfac_u->const_array(mfi);
377  const Array4<const Real>& mf_v = mapfac_v->const_array(mfi);
378 
379  // Create old_drho_u/v/w/theta = U'', V'', W'', Theta'' in the docs
380  // Note that we do the Copy and Subtract including one ghost cell
381  // so that we don't have to fill ghost cells of the new MultiFabs
382  // Initialize New_rho_u/v/w to Delta_rho_u/v/w so that
383  // the ghost cells in New_rho_u/v/w will match old_drho_u/v/w
384 
385  FArrayBox temp_rhs_fab;
386  FArrayBox RHS_fab;
387  FArrayBox soln_fab;
388 
389  RHS_fab.resize (tbz,1,The_Async_Arena());
390  soln_fab.resize (tbz,1,The_Async_Arena());
391  temp_rhs_fab.resize(tbz,2,The_Async_Arena());
392 
393  auto const& RHS_a = RHS_fab.array();
394  auto const& soln_a = soln_fab.array();
395  auto const& temp_rhs_arr = temp_rhs_fab.array();
396 
397  auto const& coeffA_a = coeff_A_mf.array(mfi);
398  auto const& inv_coeffB_a = inv_coeff_B_mf.array(mfi);
399  auto const& coeffC_a = coeff_C_mf.array(mfi);
400  auto const& coeffP_a = coeff_P_mf.array(mfi);
401  auto const& coeffQ_a = coeff_Q_mf.array(mfi);
402 
403  // *************************************************************************
404  // Define flux arrays for use in advection
405  // *************************************************************************
406  for (int dir = 0; dir < AMREX_SPACEDIM; ++dir) {
407  flux[dir].resize(surroundingNodes(bx,dir),2);
408  flux[dir].setVal<RunOn::Device>(0.);
409  }
410  const GpuArray<const Array4<Real>, AMREX_SPACEDIM>
411  flx_arr{{AMREX_D_DECL(flux[0].array(), flux[1].array(), flux[2].array())}};
412 
413  // *********************************************************************
414  {
415  BL_PROFILE("fast_T_making_rho_rhs");
416  ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
417  Real h_zeta_cc_xface_hi = 0.5 * dzi *
418  ( z_nd(i+1,j ,k+1) + z_nd(i+1,j+1,k+1)
419  -z_nd(i+1,j ,k ) - z_nd(i+1,j+1,k ) );
420 
421  Real h_zeta_cc_xface_lo = 0.5 * dzi *
422  ( z_nd(i ,j ,k+1) + z_nd(i ,j+1,k+1)
423  -z_nd(i ,j ,k ) - z_nd(i ,j+1,k ) );
424 
425  Real h_zeta_cc_yface_hi = 0.5 * dzi *
426  ( z_nd(i ,j+1,k+1) + z_nd(i+1,j+1,k+1)
427  -z_nd(i ,j+1,k ) - z_nd(i+1,j+1,k ) );
428 
429  Real h_zeta_cc_yface_lo = 0.5 * dzi *
430  ( z_nd(i ,j ,k+1) + z_nd(i+1,j ,k+1)
431  -z_nd(i ,j ,k ) - z_nd(i+1,j ,k ) );
432 
433  Real xflux_lo = new_drho_u(i ,j,k)*h_zeta_cc_xface_lo / mf_u(i ,j,0);
434  Real xflux_hi = new_drho_u(i+1,j,k)*h_zeta_cc_xface_hi / mf_u(i+1,j,0);
435  Real yflux_lo = new_drho_v(i,j ,k)*h_zeta_cc_yface_lo / mf_v(i,j ,0);
436  Real yflux_hi = new_drho_v(i,j+1,k)*h_zeta_cc_yface_hi / mf_v(i,j+1,0);
437 
438  Real mfsq = mf_m(i,j,0) * mf_m(i,j,0);
439 
440  // NOTE: we are saving the (1/J) weighting for later when we add this to rho and theta
441  temp_rhs_arr(i,j,k,0) = ( xflux_hi - xflux_lo ) * dxi * mfsq +
442  ( yflux_hi - yflux_lo ) * dyi * mfsq;
443  temp_rhs_arr(i,j,k,1) = (( xflux_hi * (prim(i,j,k,0) + prim(i+1,j,k,0)) -
444  xflux_lo * (prim(i,j,k,0) + prim(i-1,j,k,0)) ) * dxi * mfsq+
445  ( yflux_hi * (prim(i,j,k,0) + prim(i,j+1,k,0)) -
446  yflux_lo * (prim(i,j,k,0) + prim(i,j-1,k,0)) ) * dyi * mfsq) * 0.5;
447 
448  (flx_arr[0])(i,j,k,0) = xflux_lo;
449  (flx_arr[0])(i,j,k,1) = (flx_arr[0])(i ,j,k,0) * 0.5 * (prim(i,j,k,0) + prim(i-1,j,k,0));
450 
451  (flx_arr[1])(i,j,k,0) = yflux_lo;
452  (flx_arr[1])(i,j,k,1) = (flx_arr[0])(i,j ,k,0) * 0.5 * (prim(i,j,k,0) + prim(i,j-1,k,0));
453 
454  if (i == vbx_hi.x) {
455  (flx_arr[0])(i+1,j,k,0) = xflux_hi;
456  (flx_arr[0])(i+1,j,k,1) = (flx_arr[0])(i+1,j,k,0) * 0.5 * (prim(i,j,k,0) + prim(i+1,j,k,0));
457  }
458  if (j == vbx_hi.y) {
459  (flx_arr[1])(i,j+1,k,0) = yflux_hi;
460  (flx_arr[1])(i,j+1,k,1) = (flx_arr[1])(i,j+1,k,0) * 0.5 * (prim(i,j,k,0) + prim(i,j+1,k,0));
461  }
462 
463  });
464  } // end profile
465 
466  // *********************************************************************
467  {
468  Box gbxo = mfi.nodaltilebox(2);
469  Box gbxo_mid = gbxo;
470 
471  if (gbxo.smallEnd(2) == domlo.z) {
472  Box gbxo_lo = gbxo; gbxo_lo.setBig(2,gbxo.smallEnd(2));
473  gbxo_mid.setSmall(2,gbxo.smallEnd(2)+1);
474  ParallelFor(gbxo_lo, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
475  omega_arr(i,j,k) = 0.;
476  });
477  }
478  if (gbxo.bigEnd(2) == domhi.z+1) {
479  Box gbxo_hi = gbxo; gbxo_hi.setSmall(2,gbxo.bigEnd(2));
480  gbxo_mid.setBig(2,gbxo.bigEnd(2)-1);
481  ParallelFor(gbxo_hi, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
482  omega_arr(i,j,k) = old_drho_w(i,j,k);
483  });
484  }
485  ParallelFor(gbxo_mid, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
486  omega_arr(i,j,k) = OmegaFromW(i,j,k,old_drho_w(i,j,k),old_drho_u,old_drho_v,z_nd,dxInv);
487  });
488  } // end profile
489  // *********************************************************************
490 
491  Box bx_shrunk_in_k = bx;
492  int klo = tbz.smallEnd(2);
493  int khi = tbz.bigEnd(2);
494  bx_shrunk_in_k.setSmall(2,klo+1);
495  bx_shrunk_in_k.setBig(2,khi-1);
496 
497  // Note that the notes use "g" to mean the magnitude of gravity, so it is positive
498  // We set grav_gpu[2] to be the vector component which is negative
499  // We define halfg to match the notes (which is why we take the absolute value)
500  Real halfg = std::abs(0.5 * grav_gpu[2]);
501 
502  {
503  BL_PROFILE("fast_loop_on_shrunk_t");
504  //Note we don't act on the bottom or top boundaries of the domain
505  ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (int i, int j, int k)
506  {
507  Real detJ_on_kface = 0.5 * (detJ(i,j,k) + detJ(i,j,k-1));
508 
509  Real coeff_P = coeffP_a(i,j,k);
510  Real coeff_Q = coeffQ_a(i,j,k);
511 
512  if (l_use_moisture) {
513  Real q = 0.5 * ( prim(i,j,k,PrimQ1_comp) + prim(i,j,k-1,PrimQ1_comp)
514  +prim(i,j,k,PrimQ2_comp) + prim(i,j,k-1,PrimQ2_comp) );
515  coeff_P /= (1.0 + q);
516  coeff_Q /= (1.0 + q);
517  }
518 
519  Real theta_t_lo = 0.5 * ( prim(i,j,k-2,PrimTheta_comp) + prim(i,j,k-1,PrimTheta_comp) );
520  Real theta_t_mid = 0.5 * ( prim(i,j,k-1,PrimTheta_comp) + prim(i,j,k ,PrimTheta_comp) );
521  Real theta_t_hi = 0.5 * ( prim(i,j,k ,PrimTheta_comp) + prim(i,j,k+1,PrimTheta_comp) );
522 
523  // line 2 last two terms (order dtau)
524  Real R0_tmp = (-halfg * old_drho(i,j,k ) + coeff_P * old_drho_theta(i,j,k )) * detJ(i,j,k )
525  + (-halfg * old_drho(i,j,k-1) + coeff_Q * old_drho_theta(i,j,k-1)) * detJ(i,j,k-1);
526 
527  // line 3 residuals (order dtau^2) 1.0 <-> beta_2
528  Real R1_tmp = - halfg * ( slow_rhs_cons(i,j,k ,Rho_comp ) * detJ(i,j,k ) +
529  slow_rhs_cons(i,j,k-1,Rho_comp ) * detJ(i,j,k-1) )
530  + ( coeff_P * slow_rhs_cons(i,j,k ,RhoTheta_comp) * detJ(i,j,k ) +
531  coeff_Q * slow_rhs_cons(i,j,k-1,RhoTheta_comp) * detJ(i,j,k-1) );
532 
533  Real Omega_kp1 = omega_arr(i,j,k+1);
534  Real Omega_k = omega_arr(i,j,k );
535  Real Omega_km1 = omega_arr(i,j,k-1);
536 
537  // consolidate lines 4&5 (order dtau^2)
538  R1_tmp += ( halfg ) *
539  ( beta_1 * dzi * (Omega_kp1 - Omega_km1) + temp_rhs_arr(i,j,k,Rho_comp) + temp_rhs_arr(i,j,k-1,Rho_comp));
540 
541  // consolidate lines 6&7 (order dtau^2)
542  R1_tmp += -(
543  coeff_P * ( beta_1 * dzi * (Omega_kp1*theta_t_hi - Omega_k*theta_t_mid) + temp_rhs_arr(i,j,k ,RhoTheta_comp) ) +
544  coeff_Q * ( beta_1 * dzi * (Omega_k*theta_t_mid - Omega_km1*theta_t_lo) + temp_rhs_arr(i,j,k-1,RhoTheta_comp) ) );
545 
546  // line 1
547  RHS_a(i,j,k) = detJ_on_kface * old_drho_w(i,j,k) + dtau * (
548  detJ_on_kface * slow_rhs_rho_w(i,j,k) + R0_tmp + dtau*beta_2*R1_tmp);
549 
550  // We cannot use omega_arr here since that was built with old_rho_u and old_rho_v ...
551  RHS_a(i,j,k) += detJ_on_kface * OmegaFromW(i,j,k,0.,new_drho_u,new_drho_v,z_nd,dxInv);
552  });
553  } // end profile
554 
555  Box b2d = tbz; // Copy constructor
556  b2d.setRange(2,0);
557 
558  auto const lo = lbound(bx);
559  auto const hi = ubound(bx);
560 
561  {
562  BL_PROFILE("fast_rhs_b2d_loop_t");
563 #ifdef AMREX_USE_GPU
564  ParallelFor(b2d, [=] AMREX_GPU_DEVICE (int i, int j, int)
565  {
566  // w_klo, w_khi given by specified Dirichlet values
567  RHS_a(i,j,lo.z ) = dtau * slow_rhs_rho_w(i,j,lo.z);
568  RHS_a(i,j,hi.z+1) = dtau * slow_rhs_rho_w(i,j,hi.z+1);
569 
570  // w = specified Dirichlet value at k = lo.z
571  soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
572 
573  for (int k = lo.z+1; k <= hi.z+1; k++) {
574  soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
575  }
576 
577  cur_zmom(i,j,lo.z ) = stage_zmom(i,j,lo.z ) + soln_a(i,j,lo.z );
578  cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
579 
580  for (int k = hi.z; k >= lo.z; k--) {
581  soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) *soln_a(i,j,k+1);
582  }
583  });
584 #else
585  for (int j = lo.y; j <= hi.y; ++j) {
586  AMREX_PRAGMA_SIMD
587  for (int i = lo.x; i <= hi.x; ++i)
588  {
589  RHS_a(i,j,lo.z) = dtau * slow_rhs_rho_w(i,j,lo.z);
590  soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
591  }
592 
593  AMREX_PRAGMA_SIMD
594  for (int i = lo.x; i <= hi.x; ++i)
595  {
596  RHS_a(i,j,hi.z+1) = dtau * slow_rhs_rho_w(i,j,hi.z+1);
597  soln_a(i,j,hi.z+1) = RHS_a(i,j,hi.z+1) * inv_coeffB_a(i,j,hi.z+1);
598  }
599  }
600 
601  for (int k = lo.z+1; k <= hi.z; ++k) {
602  for (int j = lo.y; j <= hi.y; ++j) {
603  AMREX_PRAGMA_SIMD
604  for (int i = lo.x; i <= hi.x; ++i) {
605  soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
606  }
607  }
608  }
609  for (int k = hi.z; k > lo.z; --k) {
610  for (int j = lo.y; j <= hi.y; ++j) {
611  AMREX_PRAGMA_SIMD
612  for (int i = lo.x; i <= hi.x; ++i) {
613  soln_a(i,j,k) -= (coeffC_a(i,j,k) * inv_coeffB_a(i,j,k)) * soln_a(i,j,k+1);
614  }
615  }
616  }
617  if (hi.z == domhi.z) {
618  for (int j = lo.y; j <= hi.y; ++j) {
619  AMREX_PRAGMA_SIMD
620  for (int i = lo.x; i <= hi.x; ++i) {
621  cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
622  }
623  }
624  }
625 #endif
626  } // end profile
627 
628  ParallelFor(tbz, [=] AMREX_GPU_DEVICE (int i, int j, int k)
629  {
630  cur_zmom(i,j,k) = stage_zmom(i,j,k);
631  });
632 
633  if (lo.z == domlo.z) {
634  tbz.setSmall(2,domlo.z+1);
635  }
636  if (hi.z == domhi.z) {
637  tbz.setBig(2,domhi.z);
638  }
639  ParallelFor(tbz, [=] AMREX_GPU_DEVICE (int i, int j, int k)
640  {
641  Real wpp = WFromOmega(i,j,k,soln_a(i,j,k),new_drho_u,new_drho_v,z_nd,dxInv);
642  cur_zmom(i,j,k) += wpp;
643  });
644 
645  // **************************************************************************
646  // Define updates in the RHS of rho and (rho theta)
647  // **************************************************************************
648 
649  {
650  BL_PROFILE("fast_rho_final_update");
651  ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
652  {
653  Real zflux_lo = beta_2 * soln_a(i,j,k ) + beta_1 * omega_arr(i,j,k);
654  Real zflux_hi = beta_2 * soln_a(i,j,k+1) + beta_1 * omega_arr(i,j,k+1);
655 
656  // Note that in the solve we effectively impose new_drho_w(i,j,vbx_hi.z+1)=0
657  // so we don't update avg_zmom at k=vbx_hi.z+1
658  avg_zmom(i,j,k) += facinv*zflux_lo / (mf_m(i,j,0) * mf_m(i,j,0));
659  (flx_arr[2])(i,j,k,0) = zflux_lo / (mf_m(i,j,0) * mf_m(i,j,0));
660 
661  if (k == vbx_hi.z) {
662  avg_zmom(i,j,k+1) += facinv * zflux_hi / (mf_m(i,j,0) * mf_m(i,j,0));
663  (flx_arr[2])(i,j,k+1,0) = zflux_hi / (mf_m(i,j,0) * mf_m(i,j,0));
664  (flx_arr[2])(i,j,k+1,1) = (flx_arr[2])(i,j,k+1,0) * 0.5 * (prim(i,j,k) + prim(i,j,k+1));
665  }
666 
667  Real fast_rhs_rho = -(temp_rhs_arr(i,j,k,0) + ( zflux_hi - zflux_lo ) * dzi) / detJ(i,j,k);
668  cur_cons(i,j,k,0) += dtau * (slow_rhs_cons(i,j,k,0) + fast_rhs_rho);
669 
670  Real fast_rhs_rhotheta = -( temp_rhs_arr(i,j,k,1) + 0.5 *
671  ( zflux_hi * (prim(i,j,k) + prim(i,j,k+1)) -
672  zflux_lo * (prim(i,j,k) + prim(i,j,k-1)) ) * dzi ) / detJ(i,j,k);
673 
674  cur_cons(i,j,k,1) += dtau * (slow_rhs_cons(i,j,k,1) + fast_rhs_rhotheta);
675 
676  (flx_arr[2])(i,j,k,1) = (flx_arr[2])(i,j,k,0) * 0.5 * (prim(i,j,k) + prim(i,j,k-1));
677  });
678  } // end profile
679 
680  // We only add to the flux registers in the final RK step
681  if (l_reflux && nrk == 2) {
682  int strt_comp_reflux = 0;
683  // For now we don't reflux (rho theta) because it seems to create issues at c/f boundaries
684  int num_comp_reflux = 1;
685  if (level < finest_level) {
686  fr_as_crse->CrseAdd(mfi,
687  {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
688  dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
689  }
690  if (level > 0) {
691  fr_as_fine->FineAdd(mfi,
692  {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
693  dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
694  }
695 
696  // This is necessary here so we don't go on to the next FArrayBox without
697  // having finished copying the fluxes into the FluxRegisters (since the fluxes
698  // are stored in temporary FArrayBox's)
699  Gpu::streamSynchronize();
700 
701  } // two-way coupling
702  } // mfi
703  } // OMP
704 }
constexpr amrex::Real R_d
Definition: ERF_Constants.H:10
constexpr amrex::Real Gamma
Definition: ERF_Constants.H:19
#define PrimQ1_comp
Definition: ERF_IndexDefines.H:53
#define PrimQ2_comp
Definition: ERF_IndexDefines.H:54
#define Rho_comp
Definition: ERF_IndexDefines.H:36
#define RhoTheta_comp
Definition: ERF_IndexDefines.H:37
#define PrimTheta_comp
Definition: ERF_IndexDefines.H:50
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_xi_AtIface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:101
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real WFromOmega(int i, int j, int k, amrex::Real omega, amrex::Real u, amrex::Real v, const amrex::Array4< const amrex::Real > &z_nd, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &dxInv)
Definition: ERF_TerrainMetrics.H:407
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real OmegaFromW(int i, int j, int k, amrex::Real w, const amrex::Array4< const amrex::Real > u_arr, const amrex::Array4< const amrex::Real > v_arr, const amrex::Array4< const amrex::Real > z_nd, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &dxInv)
Definition: ERF_TerrainMetrics.H:374
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_zeta_AtIface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:87
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_zeta_AtJface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:130
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_eta_AtJface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:158
AMREX_FORCE_INLINE amrex::IntVect TileNoZ()
Definition: ERF_TileNoZ.H:11
@ ymom
Definition: ERF_IndexDefines.H:141
@ cons
Definition: ERF_IndexDefines.H:139
@ zmom
Definition: ERF_IndexDefines.H:142
@ xmom
Definition: ERF_IndexDefines.H:140
Here is the call graph for this function: