ERF
Energy Research and Forecasting: An Atmospheric Modeling Code
ERF_FastRhs_T.cpp File Reference
Include dependency graph for ERF_FastRhs_T.cpp:

Functions

void erf_fast_rhs_T (int step, int, int level, int finest_level, Vector< MultiFab > &S_slow_rhs, const Vector< MultiFab > &S_prev, Vector< MultiFab > &S_stage_data, const MultiFab &S_stage_prim, const MultiFab &qt, const MultiFab &pi_stage, const MultiFab &fast_coeffs, Vector< MultiFab > &S_data, MultiFab &lagged_delta_rt, MultiFab &avg_xmom, MultiFab &avg_ymom, MultiFab &avg_zmom, const MultiFab &cc_src, const MultiFab &xmom_src, const MultiFab &ymom_src, const MultiFab &zmom_src, const Geometry geom, const Real gravity, std::unique_ptr< MultiFab > &z_phys_nd, std::unique_ptr< MultiFab > &detJ_cc, const Real dtau, const Real beta_s, const Real facinv, Vector< std::unique_ptr< MultiFab >> &mapfac, YAFluxRegister *fr_as_crse, YAFluxRegister *fr_as_fine, bool l_use_moisture, bool l_reflux, bool)
 

Function Documentation

◆ erf_fast_rhs_T()

void erf_fast_rhs_T ( int  step,
int  ,
int  level,
int  finest_level,
Vector< MultiFab > &  S_slow_rhs,
const Vector< MultiFab > &  S_prev,
Vector< MultiFab > &  S_stage_data,
const MultiFab &  S_stage_prim,
const MultiFab &  qt,
const MultiFab &  pi_stage,
const MultiFab &  fast_coeffs,
Vector< MultiFab > &  S_data,
MultiFab &  lagged_delta_rt,
MultiFab &  avg_xmom,
MultiFab &  avg_ymom,
MultiFab &  avg_zmom,
const MultiFab &  cc_src,
const MultiFab &  xmom_src,
const MultiFab &  ymom_src,
const MultiFab &  zmom_src,
const Geometry  geom,
const Real  gravity,
std::unique_ptr< MultiFab > &  z_phys_nd,
std::unique_ptr< MultiFab > &  detJ_cc,
const Real  dtau,
const Real  beta_s,
const Real  facinv,
Vector< std::unique_ptr< MultiFab >> &  mapfac,
YAFluxRegister *  fr_as_crse,
YAFluxRegister *  fr_as_fine,
bool  l_use_moisture,
bool  l_reflux,
bool   
)

Function for computing the fast RHS with fixed terrain

Parameters
[in]stepwhich fast time step within each Runge-Kutta step
[in]nrkwhich Runge-Kutta step
[in]levellevel of resolution
[in]finest_levelfinest level of resolution
[in]S_slow_rhsslow RHS computed in erf_slow_rhs_pre
[in]S_prevprevious solution
[in]S_stage_datasolution at previous RK stage
[in]S_stage_primprimitive variables at previous RK stage
[in]pi_stageExner function at previous RK stage
[in]fast_coeffscoefficients for the tridiagonal solve used in the fast integrator
[out]S_datacurrent solution
[in,out]lagged_delta_rt
[in,out]avg_xmomtime-averaged x-momentum to be used for updating slow variables
[in,out]avg_ymomtime-averaged y-momentum to be used for updating slow variables
[in,out]avg_zmomtime-averaged z-momentum to be used for updating slow variables
[in]cc_srcsource terms for conserved variables
[in]xmom_srcsource terms for x-momentum
[in]ymom_srcsource terms for y-momentum
[in]zmom_srcsource terms for z-momentum
[in]geomcontainer for geometric information
[in]gravitymagnitude of gravity
[in]z_phys_ndheight coordinate at nodes
[in]detJ_ccJacobian of the metric transformation
[in]dtaufast time step
[in]beta_sCoefficient which determines how implicit vs explicit the solve is
[in]facinvinverse factor for time-averaging the momenta
[in]mapfacmap factors
[in,out]fr_as_crseYAFluxRegister at level l at level l / l+1 interface
[in,out]fr_as_fineYAFluxRegister at level l at level l-1 / l interface
[in]l_use_moisture
[in]l_refluxshould we add fluxes to the FluxRegisters?
[in]l_implicit_substepping
73 {
74  BL_PROFILE_REGION("erf_fast_rhs_T()");
75 
76  const Box& domain = geom.Domain();
77  auto const domlo = lbound(domain);
78  auto const domhi = ubound(domain);
79 
80  Real beta_1 = 0.5 * (1.0 - beta_s); // multiplies explicit terms
81  Real beta_2 = 0.5 * (1.0 + beta_s); // multiplies implicit terms
82 
83  // How much do we project forward the (rho theta) that is used in the horizontal momentum equations
84  Real beta_d = 0.1;
85 
86  Real RvOverRd = R_v / R_d;
87 
88  const Real* dx = geom.CellSize();
89  const GpuArray<Real, AMREX_SPACEDIM> dxInv = geom.InvCellSizeArray();
90 
91  Real dxi = dxInv[0];
92  Real dyi = dxInv[1];
93  Real dzi = dxInv[2];
94  const auto& ba = S_stage_data[IntVars::cons].boxArray();
95  const auto& dm = S_stage_data[IntVars::cons].DistributionMap();
96 
97  MultiFab Delta_rho_u( convert(ba,IntVect(1,0,0)), dm, 1, 1);
98  MultiFab Delta_rho_v( convert(ba,IntVect(0,1,0)), dm, 1, 1);
99  MultiFab Delta_rho_w( convert(ba,IntVect(0,0,1)), dm, 1, IntVect(1,1,0));
100  MultiFab Delta_rho ( ba , dm, 1, 1);
101  MultiFab Delta_rho_theta( ba , dm, 1, 1);
102 
103  MultiFab New_rho_u(convert(ba,IntVect(1,0,0)), dm, 1, 1);
104  MultiFab New_rho_v(convert(ba,IntVect(0,1,0)), dm, 1, 1);
105 
106  MultiFab coeff_A_mf(fast_coeffs, make_alias, 0, 1);
107  MultiFab inv_coeff_B_mf(fast_coeffs, make_alias, 1, 1);
108  MultiFab coeff_C_mf(fast_coeffs, make_alias, 2, 1);
109  MultiFab coeff_P_mf(fast_coeffs, make_alias, 3, 1);
110  MultiFab coeff_Q_mf(fast_coeffs, make_alias, 4, 1);
111 
112  // *************************************************************************
113  // Set gravity as a vector
114  const Array<Real,AMREX_SPACEDIM> grav{0.0, 0.0, -gravity};
115  const GpuArray<Real,AMREX_SPACEDIM> grav_gpu{grav[0], grav[1], grav[2]};
116 
117  MultiFab extrap(S_data[IntVars::cons].boxArray(),S_data[IntVars::cons].DistributionMap(),1,1);
118 
119  // *************************************************************************
120  // First set up some arrays we'll need
121  // *************************************************************************
122 
123 #ifdef _OPENMP
124 #pragma omp parallel if (Gpu::notInLaunchRegion())
125 #endif
126  for ( MFIter mfi(S_stage_data[IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
127  {
128  const Array4<Real> & cur_cons = S_data[IntVars::cons].array(mfi);
129  const Array4<const Real>& prev_cons = S_prev[IntVars::cons].const_array(mfi);
130  const Array4<const Real>& stage_cons = S_stage_data[IntVars::cons].const_array(mfi);
131  const Array4<Real>& lagged_arr = lagged_delta_rt.array(mfi);
132 
133  const Array4<Real>& old_drho = Delta_rho.array(mfi);
134  const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
135  const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
136  const Array4<Real>& old_drho_w = Delta_rho_w.array(mfi);
137  const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
138 
139  const Array4<const Real>& prev_xmom = S_prev[IntVars::xmom].const_array(mfi);
140  const Array4<const Real>& prev_ymom = S_prev[IntVars::ymom].const_array(mfi);
141  const Array4<const Real>& prev_zmom = S_prev[IntVars::zmom].const_array(mfi);
142 
143  const Array4<const Real>& stage_xmom = S_stage_data[IntVars::xmom].const_array(mfi);
144  const Array4<const Real>& stage_ymom = S_stage_data[IntVars::ymom].const_array(mfi);
145  const Array4<const Real>& stage_zmom = S_stage_data[IntVars::zmom].const_array(mfi);
146 
147  Box bx = mfi.validbox();
148  Box gbx = mfi.tilebox(); gbx.grow(1);
149 
150  if (step == 0) {
151  ParallelFor(gbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
152  cur_cons(i,j,k,Rho_comp) = prev_cons(i,j,k,Rho_comp);
153  cur_cons(i,j,k,RhoTheta_comp) = prev_cons(i,j,k,RhoTheta_comp);
154  });
155  } // step = 0
156 
157  Box gtbx = mfi.nodaltilebox(0); gtbx.grow(IntVect(1,1,0));
158  Box gtby = mfi.nodaltilebox(1); gtby.grow(IntVect(1,1,0));
159  Box gtbz = mfi.nodaltilebox(2); gtbz.grow(IntVect(1,1,0));
160 
161  const auto& bx_lo = lbound(bx);
162  const auto& bx_hi = ubound(bx);
163 
164  ParallelFor(gtbx, gtby, gtbz,
165  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
166  old_drho_u(i,j,k) = prev_xmom(i,j,k) - stage_xmom(i,j,k);
167  if (k == bx_lo.z && k != domlo.z) {
168  old_drho_u(i,j,k-1) = old_drho_u(i,j,k);
169  } else if (k == bx_hi.z) {
170  old_drho_u(i,j,k+1) = old_drho_u(i,j,k);
171  }
172  },
173  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
174  old_drho_v(i,j,k) = prev_ymom(i,j,k) - stage_ymom(i,j,k);
175  if (k == bx_lo.z && k != domlo.z) {
176  old_drho_v(i,j,k-1) = old_drho_v(i,j,k);
177  } else if (k == bx_hi.z) {
178  old_drho_v(i,j,k+1) = old_drho_v(i,j,k);
179  }
180  },
181  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
182  old_drho_w(i,j,k) = prev_zmom(i,j,k) - stage_zmom(i,j,k);
183  });
184 
185  const Array4<Real>& theta_extrap = extrap.array(mfi);
186  const Array4<const Real>& prim = S_stage_prim.const_array(mfi);
187 
188  ParallelFor(gbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
189  old_drho(i,j,k) = cur_cons(i,j,k,Rho_comp) - stage_cons(i,j,k,Rho_comp);
190  old_drho_theta(i,j,k) = cur_cons(i,j,k,RhoTheta_comp) - stage_cons(i,j,k,RhoTheta_comp);
191  if (step == 0) {
192  theta_extrap(i,j,k) = old_drho_theta(i,j,k);
193  } else {
194  theta_extrap(i,j,k) = old_drho_theta(i,j,k) + beta_d *
195  ( old_drho_theta(i,j,k) - lagged_arr(i,j,k) );
196  }
197 
198  // NOTE: qv is not changing over the fast steps so we use the stage data
199  Real qv = (l_use_moisture) ? prim(i,j,k,PrimQ1_comp) : 0.0;
200  theta_extrap(i,j,k) *= (1.0 + RvOverRd*qv);
201  });
202  } // mfi
203 
204 #ifdef _OPENMP
205 #pragma omp parallel if (Gpu::notInLaunchRegion())
206 #endif
207  for ( MFIter mfi(S_stage_data[IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
208  {
209  // We define lagged_delta_rt for our next step as the current delta_rt
210  Box gbx = mfi.tilebox(); gbx.grow(1);
211  const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
212  const Array4<Real>& lagged_arr = lagged_delta_rt.array(mfi);
213  ParallelFor(gbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
214  lagged_arr(i,j,k) = old_drho_theta(i,j,k);
215  });
216  } // mfi
217 
218  // *************************************************************************
219  // Define updates in the current RK stage
220  // *************************************************************************
221 
222 #ifdef _OPENMP
223 #pragma omp parallel if (Gpu::notInLaunchRegion())
224 #endif
225  for ( MFIter mfi(S_stage_data[IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
226  {
227  Box bx = mfi.validbox();
228  Box tbx = mfi.nodaltilebox(0);
229  Box tby = mfi.nodaltilebox(1);
230 
231  const Array4<Real const>& xmom_src_arr = xmom_src.const_array(mfi);
232  const Array4<Real const>& ymom_src_arr = ymom_src.const_array(mfi);
233 
234  const Array4<const Real> & stage_xmom = S_stage_data[IntVars::xmom].const_array(mfi);
235  const Array4<const Real> & stage_ymom = S_stage_data[IntVars::ymom].const_array(mfi);
236  const Array4<const Real> & qt_arr = qt.const_array(mfi);
237 
238  const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
239  const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
240 
241  const Array4<const Real>& slow_rhs_rho_u = S_slow_rhs[IntVars::xmom].const_array(mfi);
242  const Array4<const Real>& slow_rhs_rho_v = S_slow_rhs[IntVars::ymom].const_array(mfi);
243 
244  const Array4<Real>& new_drho_u = New_rho_u.array(mfi);
245  const Array4<Real>& new_drho_v = New_rho_v.array(mfi);
246 
247  const Array4<Real>& cur_xmom = S_data[IntVars::xmom].array(mfi);
248  const Array4<Real>& cur_ymom = S_data[IntVars::ymom].array(mfi);
249 
250  // These store the advection momenta which we will use to update the slow variables
251  const Array4<Real>& avg_xmom_arr = avg_xmom.array(mfi);
252  const Array4<Real>& avg_ymom_arr = avg_ymom.array(mfi);
253 
254  const Array4<const Real>& z_nd = z_phys_nd->const_array(mfi);
255 
256  const Array4<const Real>& pi_stage_ca = pi_stage.const_array(mfi);
257 
258  const Array4<Real>& theta_extrap = extrap.array(mfi);
259 
260  // Map factors
261  const Array4<const Real>& mf_ux = mapfac[MapFacType::u_x]->const_array(mfi);
262  const Array4<const Real>& mf_vy = mapfac[MapFacType::v_y]->const_array(mfi);
263 
264  // Create old_drho_u/v/w/theta = U'', V'', W'', Theta'' in the docs
265  // Note that we do the Copy and Subtract including one ghost cell
266  // so that we don't have to fill ghost cells of the new MultiFabs
267  // Initialize New_rho_u/v/w to Delta_rho_u/v/w so that
268  // the ghost cells in New_rho_u/v/w will match old_drho_u/v/w
269 
270  // *********************************************************************
271  // Define updates in the RHS of {x, y, z}-momentum equations
272  // *********************************************************************
273  {
274  BL_PROFILE("fast_rhs_xymom_T");
275 
276  const auto& bx_lo = lbound(bx);
277  const auto& bx_hi = ubound(bx);
278 
279  ParallelFor(tbx, tby,
280  [=] AMREX_GPU_DEVICE (int i, int j, int k)
281  {
282  // Add (negative) gradient of (rho theta) multiplied by lagged "pi"
283  Real met_h_xi = Compute_h_xi_AtIface (i, j, k, dxInv, z_nd);
284  Real met_h_zeta = Compute_h_zeta_AtIface(i, j, k, dxInv, z_nd);
285  Real gp_xi = (theta_extrap(i,j,k) - theta_extrap(i-1,j,k)) * dxi;
286  Real gp_zeta_on_iface = (k == 0) ?
287  0.5 * dzi * ( theta_extrap(i-1,j,k+1) + theta_extrap(i,j,k+1)
288  -theta_extrap(i-1,j,k ) - theta_extrap(i,j,k ) ) :
289  0.25 * dzi * ( theta_extrap(i-1,j,k+1) + theta_extrap(i,j,k+1)
290  -theta_extrap(i-1,j,k-1) - theta_extrap(i,j,k-1) );
291  Real gpx = gp_xi - (met_h_xi / met_h_zeta) * gp_zeta_on_iface;
292  gpx *= mf_ux(i,j,0);
293 
294  Real q = (l_use_moisture) ? 0.5 * (qt_arr(i,j,k) + qt_arr(i-1,j,k)) : 0.0;
295 
296  Real pi_c = 0.5 * (pi_stage_ca(i-1,j,k,0) + pi_stage_ca(i ,j,k,0));
297  Real fast_rhs_rho_u = -Gamma * R_d * pi_c * gpx / (1.0 + q);
298 
299  new_drho_u(i, j, k) = old_drho_u(i,j,k) + dtau * fast_rhs_rho_u
300  + dtau * slow_rhs_rho_u(i,j,k)
301  + dtau * xmom_src_arr(i,j,k);
302  if (k == bx_lo.z && k != domlo.z) {
303  new_drho_u(i,j,k-1) = new_drho_u(i,j,k);
304  } else if (k == bx_hi.z) {
305  new_drho_u(i,j,k+1) = new_drho_u(i,j,k);
306  }
307 
308  avg_xmom_arr(i,j,k) += facinv*new_drho_u(i,j,k);
309 
310  cur_xmom(i,j,k) = stage_xmom(i,j,k) + new_drho_u(i,j,k);
311  },
312  [=] AMREX_GPU_DEVICE (int i, int j, int k)
313  {
314  // Add (negative) gradient of (rho theta) multiplied by lagged "pi"
315  Real met_h_eta = Compute_h_eta_AtJface(i, j, k, dxInv, z_nd);
316  Real met_h_zeta = Compute_h_zeta_AtJface(i, j, k, dxInv, z_nd);
317  Real gp_eta = (theta_extrap(i,j,k) -theta_extrap(i,j-1,k)) * dyi;
318  Real gp_zeta_on_jface = (k == 0) ?
319  0.5 * dzi * ( theta_extrap(i,j,k+1) + theta_extrap(i,j-1,k+1)
320  -theta_extrap(i,j,k ) - theta_extrap(i,j-1,k ) ) :
321  0.25 * dzi * ( theta_extrap(i,j,k+1) + theta_extrap(i,j-1,k+1)
322  -theta_extrap(i,j,k-1) - theta_extrap(i,j-1,k-1) );
323  Real gpy = gp_eta - (met_h_eta / met_h_zeta) * gp_zeta_on_jface;
324  gpy *= mf_vy(i,j,0);
325 
326  Real q = (l_use_moisture) ? 0.5 * (qt_arr(i,j,k) + qt_arr(i,j-1,k)) : 0.0;
327 
328  Real pi_c = 0.5 * (pi_stage_ca(i,j-1,k,0) + pi_stage_ca(i,j ,k,0));
329  Real fast_rhs_rho_v = -Gamma * R_d * pi_c * gpy / (1.0 + q);
330 
331  new_drho_v(i, j, k) = old_drho_v(i,j,k) + dtau * fast_rhs_rho_v
332  + dtau * slow_rhs_rho_v(i,j,k)
333  + dtau * ymom_src_arr(i,j,k);
334 
335  if (k == bx_lo.z && k != domlo.z) {
336  new_drho_v(i,j,k-1) = new_drho_v(i,j,k);
337  } else if (k == bx_hi.z) {
338  new_drho_v(i,j,k+1) = new_drho_v(i,j,k);
339  }
340 
341  avg_ymom_arr(i,j,k) += facinv*new_drho_v(i,j,k);
342 
343  cur_ymom(i,j,k) = stage_ymom(i,j,k) + new_drho_v(i,j,k);
344  });
345  } // end profile
346  }
347 
348  MultiFab Omega(S_data[IntVars::zmom].boxArray(), dm, 1, 1);
349 
350 #ifdef _OPENMP
351 #pragma omp parallel if (Gpu::notInLaunchRegion())
352 #endif
353  {
354  std::array<FArrayBox,AMREX_SPACEDIM> flux;
355  for ( MFIter mfi(S_stage_data[IntVars::cons],TileNoZ()); mfi.isValid(); ++mfi)
356  {
357  Box bx = mfi.tilebox();
358  Box tbz = surroundingNodes(bx,2);
359 
360  Box vbx = mfi.validbox();
361  const auto& vbx_hi = ubound(vbx);
362 
363  const Array4<Real const>& zmom_src_arr = zmom_src.const_array(mfi);
364  const Array4<Real const>& cc_src_arr = cc_src.const_array(mfi);
365 
366  const Array4<const Real> & stage_zmom = S_stage_data[IntVars::zmom].const_array(mfi);
367  const Array4<const Real> & prim = S_stage_prim.const_array(mfi);
368  const Array4<const Real> & qt_arr = qt.const_array(mfi);
369 
370  const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
371  const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
372  const Array4<Real>& old_drho_w = Delta_rho_w.array(mfi);
373  const Array4<Real>& old_drho = Delta_rho.array(mfi);
374  const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
375 
376  const Array4<const Real>& slow_rhs_cons = S_slow_rhs[IntVars::cons].const_array(mfi);
377  const Array4<const Real>& slow_rhs_rho_w = S_slow_rhs[IntVars::zmom].const_array(mfi);
378 
379  const Array4<Real>& new_drho_u = New_rho_u.array(mfi);
380  const Array4<Real>& new_drho_v = New_rho_v.array(mfi);
381 
382  const Array4<Real>& cur_cons = S_data[IntVars::cons].array(mfi);
383  const Array4<Real>& cur_zmom = S_data[IntVars::zmom].array(mfi);
384 
385  // These store the advection momenta which we will use to update the slow variables
386  const Array4<Real>& avg_zmom_arr = avg_zmom.array(mfi);
387 
388  const Array4<const Real>& z_nd = z_phys_nd->const_array(mfi);
389  const Array4<const Real>& detJ = detJ_cc->const_array(mfi);
390 
391  const Array4< Real>& omega_arr = Omega.array(mfi);
392 
393  // Map factors
394  const Array4<const Real>& mf_mx = mapfac[MapFacType::m_x]->const_array(mfi);
395  const Array4<const Real>& mf_my = mapfac[MapFacType::m_y]->const_array(mfi);
396  const Array4<const Real>& mf_ux = mapfac[MapFacType::u_x]->const_array(mfi);
397  const Array4<const Real>& mf_uy = mapfac[MapFacType::u_y]->const_array(mfi);
398  const Array4<const Real>& mf_vx = mapfac[MapFacType::v_x]->const_array(mfi);
399  const Array4<const Real>& mf_vy = mapfac[MapFacType::v_y]->const_array(mfi);
400 
401  // Create old_drho_u/v/w/theta = U'', V'', W'', Theta'' in the docs
402  // Note that we do the Copy and Subtract including one ghost cell
403  // so that we don't have to fill ghost cells of the new MultiFabs
404  // Initialize New_rho_u/v/w to Delta_rho_u/v/w so that
405  // the ghost cells in New_rho_u/v/w will match old_drho_u/v/w
406 
407  FArrayBox temp_rhs_fab;
408  FArrayBox RHS_fab;
409  FArrayBox soln_fab;
410 
411  RHS_fab.resize (tbz,1,The_Async_Arena());
412  soln_fab.resize (tbz,1,The_Async_Arena());
413  temp_rhs_fab.resize(tbz,2,The_Async_Arena());
414 
415  auto const& RHS_a = RHS_fab.array();
416  auto const& soln_a = soln_fab.array();
417  auto const& temp_rhs_arr = temp_rhs_fab.array();
418 
419  auto const& coeffA_a = coeff_A_mf.array(mfi);
420  auto const& inv_coeffB_a = inv_coeff_B_mf.array(mfi);
421  auto const& coeffC_a = coeff_C_mf.array(mfi);
422  auto const& coeffP_a = coeff_P_mf.array(mfi);
423  auto const& coeffQ_a = coeff_Q_mf.array(mfi);
424 
425  // *************************************************************************
426  // Define flux arrays for use in advection
427  // *************************************************************************
428  for (int dir = 0; dir < AMREX_SPACEDIM; ++dir) {
429  flux[dir].resize(surroundingNodes(bx,dir),2);
430  flux[dir].setVal<RunOn::Device>(0.);
431  }
432  const GpuArray<const Array4<Real>, AMREX_SPACEDIM>
433  flx_arr{{AMREX_D_DECL(flux[0].array(), flux[1].array(), flux[2].array())}};
434 
435  // *********************************************************************
436  {
437  BL_PROFILE("fast_T_making_rho_rhs");
438  ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
439  Real h_zeta_cc_xface_hi = 0.5 * dzi *
440  ( z_nd(i+1,j ,k+1) + z_nd(i+1,j+1,k+1)
441  -z_nd(i+1,j ,k ) - z_nd(i+1,j+1,k ) );
442 
443  Real h_zeta_cc_xface_lo = 0.5 * dzi *
444  ( z_nd(i ,j ,k+1) + z_nd(i ,j+1,k+1)
445  -z_nd(i ,j ,k ) - z_nd(i ,j+1,k ) );
446 
447  Real h_zeta_cc_yface_hi = 0.5 * dzi *
448  ( z_nd(i ,j+1,k+1) + z_nd(i+1,j+1,k+1)
449  -z_nd(i ,j+1,k ) - z_nd(i+1,j+1,k ) );
450 
451  Real h_zeta_cc_yface_lo = 0.5 * dzi *
452  ( z_nd(i ,j ,k+1) + z_nd(i+1,j ,k+1)
453  -z_nd(i ,j ,k ) - z_nd(i+1,j ,k ) );
454 
455  Real xflux_lo = new_drho_u(i ,j,k)*h_zeta_cc_xface_lo / mf_uy(i ,j,0);
456  Real xflux_hi = new_drho_u(i+1,j,k)*h_zeta_cc_xface_hi / mf_uy(i+1,j,0);
457  Real yflux_lo = new_drho_v(i,j ,k)*h_zeta_cc_yface_lo / mf_vx(i,j ,0);
458  Real yflux_hi = new_drho_v(i,j+1,k)*h_zeta_cc_yface_hi / mf_vx(i,j+1,0);
459 
460  Real mfsq = mf_mx(i,j,0) * mf_my(i,j,0);
461 
462  // NOTE: we are saving the (1/J) weighting for later when we add this to rho and theta
463  temp_rhs_arr(i,j,k,0) = ( xflux_hi - xflux_lo ) * dxi * mfsq +
464  ( yflux_hi - yflux_lo ) * dyi * mfsq;
465  temp_rhs_arr(i,j,k,1) = (( xflux_hi * (prim(i,j,k,0) + prim(i+1,j,k,0)) -
466  xflux_lo * (prim(i,j,k,0) + prim(i-1,j,k,0)) ) * dxi * mfsq+
467  ( yflux_hi * (prim(i,j,k,0) + prim(i,j+1,k,0)) -
468  yflux_lo * (prim(i,j,k,0) + prim(i,j-1,k,0)) ) * dyi * mfsq) * 0.5;
469 
470  if (l_reflux) {
471  (flx_arr[0])(i,j,k,0) = xflux_lo;
472  (flx_arr[0])(i,j,k,1) = (flx_arr[0])(i ,j,k,0) * 0.5 * (prim(i,j,k,0) + prim(i-1,j,k,0));
473 
474  (flx_arr[1])(i,j,k,0) = yflux_lo;
475  (flx_arr[1])(i,j,k,1) = (flx_arr[1])(i,j ,k,0) * 0.5 * (prim(i,j,k,0) + prim(i,j-1,k,0));
476 
477  if (i == vbx_hi.x) {
478  (flx_arr[0])(i+1,j,k,0) = xflux_hi;
479  (flx_arr[0])(i+1,j,k,1) = (flx_arr[0])(i+1,j,k,0) * 0.5 * (prim(i,j,k,0) + prim(i+1,j,k,0));
480  }
481  if (j == vbx_hi.y) {
482  (flx_arr[1])(i,j+1,k,0) = yflux_hi;
483  (flx_arr[1])(i,j+1,k,1) = (flx_arr[1])(i,j+1,k,0) * 0.5 * (prim(i,j,k,0) + prim(i,j+1,k,0));
484  }
485  }
486  });
487  } // end profile
488 
489  // *********************************************************************
490  {
491  Box gbxo = mfi.nodaltilebox(2);
492  Box gbxo_mid = gbxo;
493 
494  if (gbxo.smallEnd(2) == domlo.z) {
495  Box gbxo_lo = gbxo; gbxo_lo.setBig(2,gbxo.smallEnd(2));
496  gbxo_mid.setSmall(2,gbxo.smallEnd(2)+1);
497  ParallelFor(gbxo_lo, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
498  omega_arr(i,j,k) = 0.;
499  });
500  }
501  if (gbxo.bigEnd(2) == domhi.z+1) {
502  Box gbxo_hi = gbxo; gbxo_hi.setSmall(2,gbxo.bigEnd(2));
503  gbxo_mid.setBig(2,gbxo.bigEnd(2)-1);
504  ParallelFor(gbxo_hi, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
505  omega_arr(i,j,k) = old_drho_w(i,j,k);
506  });
507  }
508  ParallelFor(gbxo_mid, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
509  omega_arr(i,j,k) = OmegaFromW(i,j,k,old_drho_w(i,j,k),
510  old_drho_u,old_drho_v,
511  mf_ux,mf_vy,z_nd,dxInv);
512  });
513  } // end profile
514  // *********************************************************************
515 
516  Box bx_shrunk_in_k = bx;
517  int klo = tbz.smallEnd(2);
518  int khi = tbz.bigEnd(2);
519  bx_shrunk_in_k.setSmall(2,klo+1);
520  bx_shrunk_in_k.setBig(2,khi-1);
521 
522  // Note that the notes use "g" to mean the magnitude of gravity, so it is positive
523  // We set grav_gpu[2] to be the vector component which is negative
524  // We define halfg to match the notes (which is why we take the absolute value)
525  Real halfg = std::abs(0.5 * grav_gpu[2]);
526 
527  {
528  BL_PROFILE("fast_loop_on_shrunk_t");
529  //Note we don't act on the bottom or top boundaries of the domain
530  ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (int i, int j, int k)
531  {
532  Real detJ_on_kface = 0.5 * (detJ(i,j,k) + detJ(i,j,k-1));
533 
534  Real q = (l_use_moisture) ? 0.5 * (qt_arr(i,j,k) + qt_arr(i,j,k-1)) : 0.0;
535 
536  Real coeff_P = coeffP_a(i,j,k) / (1.0 + q);
537  Real coeff_Q = coeffQ_a(i,j,k) / (1.0 + q);
538 
539  Real theta_t_lo = 0.5 * ( prim(i,j,k-2,PrimTheta_comp) + prim(i,j,k-1,PrimTheta_comp) );
540  Real theta_t_mid = 0.5 * ( prim(i,j,k-1,PrimTheta_comp) + prim(i,j,k ,PrimTheta_comp) );
541  Real theta_t_hi = 0.5 * ( prim(i,j,k ,PrimTheta_comp) + prim(i,j,k+1,PrimTheta_comp) );
542 
543  // line 2 last two terms (order dtau)
544  Real R0_tmp = (-halfg * old_drho(i,j,k ) + coeff_P * old_drho_theta(i,j,k )) * detJ(i,j,k )
545  + (-halfg * old_drho(i,j,k-1) + coeff_Q * old_drho_theta(i,j,k-1)) * detJ(i,j,k-1);
546 
547  // line 3 residuals (order dtau^2) 1.0 <-> beta_2
548  Real R1_tmp = - halfg * ( slow_rhs_cons(i,j,k ,Rho_comp ) * detJ(i,j,k ) +
549  slow_rhs_cons(i,j,k-1,Rho_comp ) * detJ(i,j,k-1) )
550  + ( coeff_P * slow_rhs_cons(i,j,k ,RhoTheta_comp) * detJ(i,j,k ) +
551  coeff_Q * slow_rhs_cons(i,j,k-1,RhoTheta_comp) * detJ(i,j,k-1) );
552 
553  Real Omega_kp1 = omega_arr(i,j,k+1);
554  Real Omega_k = omega_arr(i,j,k );
555  Real Omega_km1 = omega_arr(i,j,k-1);
556 
557  // consolidate lines 4&5 (order dtau^2)
558  R1_tmp += ( halfg ) *
559  ( beta_1 * dzi * (Omega_kp1 - Omega_km1) + temp_rhs_arr(i,j,k,Rho_comp) + temp_rhs_arr(i,j,k-1,Rho_comp));
560 
561  // consolidate lines 6&7 (order dtau^2)
562  R1_tmp += -(
563  coeff_P * ( beta_1 * dzi * (Omega_kp1*theta_t_hi - Omega_k*theta_t_mid) + temp_rhs_arr(i,j,k ,RhoTheta_comp) ) +
564  coeff_Q * ( beta_1 * dzi * (Omega_k*theta_t_mid - Omega_km1*theta_t_lo) + temp_rhs_arr(i,j,k-1,RhoTheta_comp) ) );
565 
566  // line 1
567  RHS_a(i,j,k) = detJ_on_kface * old_drho_w(i,j,k) + dtau * (
568  detJ_on_kface * (slow_rhs_rho_w(i,j,k)+zmom_src_arr(i,j,k)) + R0_tmp + dtau*beta_2*R1_tmp);
569 
570  // We cannot use omega_arr here since that was built with old_rho_u and old_rho_v ...
571  RHS_a(i,j,k) += detJ_on_kface * OmegaFromW(i,j,k,0.,
572  new_drho_u,new_drho_v,
573  mf_ux,mf_vy,z_nd,dxInv);
574  });
575  } // end profile
576 
577  Box b2d = tbz; // Copy constructor
578  b2d.setRange(2,0);
579 
580  auto const lo = lbound(bx);
581  auto const hi = ubound(bx);
582 
583  {
584  BL_PROFILE("fast_rhs_b2d_loop_t");
585 #ifdef AMREX_USE_GPU
586  ParallelFor(b2d, [=] AMREX_GPU_DEVICE (int i, int j, int)
587  {
588  // w_klo, w_khi given by specified Dirichlet values
589  RHS_a(i,j,lo.z ) = dtau * (slow_rhs_rho_w(i,j,lo.z) + zmom_src_arr(i,j,lo.z));
590  RHS_a(i,j,hi.z+1) = dtau * (slow_rhs_rho_w(i,j,hi.z+1) + zmom_src_arr(i,j,hi.z+1));
591 
592  // w = specified Dirichlet value at k = lo.z
593  soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
594 
595  for (int k = lo.z+1; k <= hi.z+1; k++) {
596  soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
597  }
598 
599  cur_zmom(i,j,lo.z ) = stage_zmom(i,j,lo.z ) + soln_a(i,j,lo.z );
600  cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
601 
602  for (int k = hi.z; k >= lo.z; k--) {
603  soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) *soln_a(i,j,k+1);
604  }
605  });
606 #else
607  for (int j = lo.y; j <= hi.y; ++j) {
608  AMREX_PRAGMA_SIMD
609  for (int i = lo.x; i <= hi.x; ++i)
610  {
611  RHS_a(i,j,lo.z) = dtau * (slow_rhs_rho_w(i,j,lo.z) + zmom_src_arr(i,j,lo.z));
612  soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
613  }
614 
615  AMREX_PRAGMA_SIMD
616  for (int i = lo.x; i <= hi.x; ++i)
617  {
618  RHS_a(i,j,hi.z+1) = dtau * (slow_rhs_rho_w(i,j,hi.z+1) + zmom_src_arr(i,j,hi.z+1));
619  soln_a(i,j,hi.z+1) = RHS_a(i,j,hi.z+1) * inv_coeffB_a(i,j,hi.z+1);
620  }
621  }
622 
623  for (int k = lo.z+1; k <= hi.z; ++k) {
624  for (int j = lo.y; j <= hi.y; ++j) {
625  AMREX_PRAGMA_SIMD
626  for (int i = lo.x; i <= hi.x; ++i) {
627  soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
628  }
629  }
630  }
631  for (int k = hi.z; k > lo.z; --k) {
632  for (int j = lo.y; j <= hi.y; ++j) {
633  AMREX_PRAGMA_SIMD
634  for (int i = lo.x; i <= hi.x; ++i) {
635  soln_a(i,j,k) -= (coeffC_a(i,j,k) * inv_coeffB_a(i,j,k)) * soln_a(i,j,k+1);
636  }
637  }
638  }
639  if (hi.z == domhi.z) {
640  for (int j = lo.y; j <= hi.y; ++j) {
641  AMREX_PRAGMA_SIMD
642  for (int i = lo.x; i <= hi.x; ++i) {
643  cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
644  }
645  }
646  }
647 #endif
648  } // end profile
649 
650  ParallelFor(tbz, [=] AMREX_GPU_DEVICE (int i, int j, int k)
651  {
652  cur_zmom(i,j,k) = stage_zmom(i,j,k);
653  });
654 
655  if (lo.z == domlo.z) {
656  tbz.setSmall(2,domlo.z+1);
657  }
658  if (hi.z == domhi.z) {
659  tbz.setBig(2,domhi.z);
660  }
661  ParallelFor(tbz, [=] AMREX_GPU_DEVICE (int i, int j, int k)
662  {
663  Real wpp = WFromOmega(i,j,k,soln_a(i,j,k),
664  new_drho_u,new_drho_v,
665  mf_ux,mf_vy,z_nd,dxInv);
666  cur_zmom(i,j,k) += wpp;
667  });
668 
669  // **************************************************************************
670  // Define updates in the RHS of rho and (rho theta)
671  // **************************************************************************
672  {
673  BL_PROFILE("fast_rho_final_update");
674  ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
675  {
676  Real zflux_lo = beta_2 * soln_a(i,j,k ) + beta_1 * omega_arr(i,j,k);
677  Real zflux_hi = beta_2 * soln_a(i,j,k+1) + beta_1 * omega_arr(i,j,k+1);
678 
679  // Note that in the solve we effectively impose new_drho_w(i,j,vbx_hi.z+1)=0
680  // so we don't update avg_zmom at k=vbx_hi.z+1
681  avg_zmom_arr(i,j,k) += facinv*zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0));
682  if (l_reflux) {
683  (flx_arr[2])(i,j,k,0) = zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0));
684  }
685 
686  if (k == vbx_hi.z) {
687  avg_zmom_arr(i,j,k+1) += facinv * zflux_hi / (mf_mx(i,j,0) * mf_my(i,j,0));
688  if (l_reflux) {
689  (flx_arr[2])(i,j,k+1,0) = zflux_hi / (mf_mx(i,j,0) * mf_my(i,j,0));
690  (flx_arr[2])(i,j,k+1,1) = (flx_arr[2])(i,j,k+1,0) * 0.5 * (prim(i,j,k) + prim(i,j,k+1));
691  }
692  }
693 
694  Real fast_rhs_rho = -(temp_rhs_arr(i,j,k,0) + ( zflux_hi - zflux_lo ) * dzi) / detJ(i,j,k);
695  cur_cons(i,j,k,0) += dtau * (slow_rhs_cons(i,j,k,0) + fast_rhs_rho);
696 
697  Real fast_rhs_rhotheta = -( temp_rhs_arr(i,j,k,1) + 0.5 *
698  ( zflux_hi * (prim(i,j,k) + prim(i,j,k+1)) -
699  zflux_lo * (prim(i,j,k) + prim(i,j,k-1)) ) * dzi ) / detJ(i,j,k);
700 
701  cur_cons(i,j,k,1) += dtau * (slow_rhs_cons(i,j,k,1) + fast_rhs_rhotheta);
702 
703  if (l_reflux) {
704  (flx_arr[2])(i,j,k,1) = (flx_arr[2])(i,j,k,0) * 0.5 * (prim(i,j,k) + prim(i,j,k-1));
705  }
706 
707  // add in source terms for cell-centered conserved variables
708  cur_cons(i,j,k,Rho_comp) += dtau * cc_src_arr(i,j,k,Rho_comp);
709  cur_cons(i,j,k,RhoTheta_comp) += dtau * cc_src_arr(i,j,k,RhoTheta_comp);
710  });
711  } // end profile
712 
713  // We only add to the flux registers in the final RK step
714  if (l_reflux) {
715  int strt_comp_reflux = 0;
716  // For now we don't reflux (rho theta) because it seems to create issues at c/f boundaries
717  int num_comp_reflux = 1;
718  if (level < finest_level) {
719  fr_as_crse->CrseAdd(mfi,
720  {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
721  dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
722  }
723  if (level > 0) {
724  fr_as_fine->FineAdd(mfi,
725  {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
726  dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
727  }
728 
729  // This is necessary here so we don't go on to the next FArrayBox without
730  // having finished copying the fluxes into the FluxRegisters (since the fluxes
731  // are stored in temporary FArrayBox's)
732  Gpu::streamSynchronize();
733 
734  } // two-way coupling
735  } // mfi
736  } // OMP
737 }
constexpr amrex::Real R_v
Definition: ERF_Constants.H:11
constexpr amrex::Real R_d
Definition: ERF_Constants.H:10
constexpr amrex::Real Gamma
Definition: ERF_Constants.H:19
@ v_x
Definition: ERF_DataStruct.H:22
@ u_y
Definition: ERF_DataStruct.H:23
@ v_y
Definition: ERF_DataStruct.H:23
@ m_y
Definition: ERF_DataStruct.H:23
@ u_x
Definition: ERF_DataStruct.H:22
@ m_x
Definition: ERF_DataStruct.H:22
#define PrimQ1_comp
Definition: ERF_IndexDefines.H:53
#define Rho_comp
Definition: ERF_IndexDefines.H:36
#define RhoTheta_comp
Definition: ERF_IndexDefines.H:37
#define PrimTheta_comp
Definition: ERF_IndexDefines.H:50
amrex::Real Real
Definition: ERF_ShocInterface.H:19
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real OmegaFromW(int &i, int &j, int &k, amrex::Real w, const amrex::Array4< const amrex::Real > &u_arr, const amrex::Array4< const amrex::Real > &v_arr, const amrex::Array4< const amrex::Real > &mf_u, const amrex::Array4< const amrex::Real > &mf_v, const amrex::Array4< const amrex::Real > &z_nd, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &dxInv)
Definition: ERF_TerrainMetrics.H:406
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_xi_AtIface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:109
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_zeta_AtIface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:96
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_zeta_AtJface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:136
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_eta_AtJface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:162
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real WFromOmega(int &i, int &j, int &k, amrex::Real omega, const amrex::Array4< const amrex::Real > &u_arr, const amrex::Array4< const amrex::Real > &v_arr, const amrex::Array4< const amrex::Real > &mf_u, const amrex::Array4< const amrex::Real > &mf_v, const amrex::Array4< const amrex::Real > &z_nd, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &dxInv)
Definition: ERF_TerrainMetrics.H:456
AMREX_FORCE_INLINE amrex::IntVect TileNoZ()
Definition: ERF_TileNoZ.H:11
@ gpy
Definition: ERF_IndexDefines.H:151
@ gpx
Definition: ERF_IndexDefines.H:150
@ ymom
Definition: ERF_IndexDefines.H:160
@ cons
Definition: ERF_IndexDefines.H:158
@ zmom
Definition: ERF_IndexDefines.H:161
@ xmom
Definition: ERF_IndexDefines.H:159
@ qt
Definition: ERF_Kessler.H:27
@ qv
Definition: ERF_Kessler.H:28
Here is the call graph for this function: