ERF
Energy Research and Forecasting: An Atmospheric Modeling Code
ERF_FastRhs_MT.cpp File Reference
Include dependency graph for ERF_FastRhs_MT.cpp:

Functions

void erf_fast_rhs_MT (int step, int nrk, int level, int finest_level, Vector< MultiFab > &S_slow_rhs, const Vector< MultiFab > &S_prev, Vector< MultiFab > &S_stg_data, const MultiFab &S_stg_prim, const MultiFab &pi_stage, const MultiFab &fast_coeffs, Vector< MultiFab > &S_data, Vector< MultiFab > &S_scratch, const Geometry geom, const Real gravity, const bool use_lagged_delta_rt, std::unique_ptr< MultiFab > &z_t_rk, const MultiFab *z_t_pert, std::unique_ptr< MultiFab > &z_phys_nd_old, std::unique_ptr< MultiFab > &z_phys_nd_new, std::unique_ptr< MultiFab > &z_phys_nd_stg, std::unique_ptr< MultiFab > &detJ_cc_old, std::unique_ptr< MultiFab > &detJ_cc_new, std::unique_ptr< MultiFab > &detJ_cc_stg, const Real dtau, const Real beta_s, const Real facinv, std::unique_ptr< MultiFab > &mapfac_m, std::unique_ptr< MultiFab > &mapfac_u, std::unique_ptr< MultiFab > &mapfac_v, YAFluxRegister *fr_as_crse, YAFluxRegister *fr_as_fine, bool l_use_moisture, bool l_reflux, bool)
 

Function Documentation

◆ erf_fast_rhs_MT()

void erf_fast_rhs_MT ( int  step,
int  nrk,
int  level,
int  finest_level,
Vector< MultiFab > &  S_slow_rhs,
const Vector< MultiFab > &  S_prev,
Vector< MultiFab > &  S_stg_data,
const MultiFab &  S_stg_prim,
const MultiFab &  pi_stage,
const MultiFab &  fast_coeffs,
Vector< MultiFab > &  S_data,
Vector< MultiFab > &  S_scratch,
const Geometry  geom,
const Real  gravity,
const bool  use_lagged_delta_rt,
std::unique_ptr< MultiFab > &  z_t_rk,
const MultiFab *  z_t_pert,
std::unique_ptr< MultiFab > &  z_phys_nd_old,
std::unique_ptr< MultiFab > &  z_phys_nd_new,
std::unique_ptr< MultiFab > &  z_phys_nd_stg,
std::unique_ptr< MultiFab > &  detJ_cc_old,
std::unique_ptr< MultiFab > &  detJ_cc_new,
std::unique_ptr< MultiFab > &  detJ_cc_stg,
const Real  dtau,
const Real  beta_s,
const Real  facinv,
std::unique_ptr< MultiFab > &  mapfac_m,
std::unique_ptr< MultiFab > &  mapfac_u,
std::unique_ptr< MultiFab > &  mapfac_v,
YAFluxRegister *  fr_as_crse,
YAFluxRegister *  fr_as_fine,
bool  l_use_moisture,
bool  l_reflux,
bool   
)

Function for computing the fast RHS with moving terrain

Parameters
[in]stepwhich fast time step within each Runge-Kutta step
[in]nrkwhich Runge-Kutta step
[in]levellevel of resolution
[in]finest_levelfinest level of resolution
[in]S_slow_rhsslow RHS computed in erf_slow_rhs_pre
[in]S_prevprevious solution
[in]S_stg_datasolution at previous RK stage
[in]S_stg_primprimitive variables at previous RK stage
[in]pi_stageExner function at previous RK stage
[in]fast_coeffscoefficients for the tridiagonal solve used in the fast integrator
[out]S_datacurrent solution
[in]S_scratchscratch space
[in]geomcontainer for geometric information
[in]gravityMagnitude of gravity
[in]use_lagged_delta_rtdefine lagged_delta_rt for our next step
[in]z_t_rkrate of change of grid height – only relevant for moving terrain
[in]z_t_pertrate of change of grid height – interpolated between RK stages
[in]z_phys_nd_oldheight coordinate at nodes at old time
[in]z_phys_nd_newheight coordinate at nodes at new time
[in]z_phys_nd_stgheight coordinate at nodes at previous stage
[in]detJ_cc_oldJacobian of the metric transformation at old time
[in]detJ_cc_newJacobian of the metric transformation at new time
[in]detJ_cc_stgJacobian of the metric transformation at previous stage
[in]dtaufast time step
[in]beta_sCoefficient which determines how implicit vs explicit the solve is
[in]facinvinverse factor for time-averaging the momenta
[in]mapfac_mmap factor at cell centers
[in]mapfac_umap factor at x-faces
[in]mapfac_vmap factor at y-faces
[in,out]fr_as_crseYAFluxRegister at level l at level l / l+1 interface
[in,out]fr_as_fineYAFluxRegister at level l at level l-1 / l interface
[in]l_use_moisture
[in]l_refluxshould we add fluxes to the FluxRegisters?
[in]l_implicit_substepping
76 {
77  BL_PROFILE_REGION("erf_fast_rhs_MT()");
78 
79  Real beta_1 = 0.5 * (1.0 - beta_s); // multiplies explicit terms
80  Real beta_2 = 0.5 * (1.0 + beta_s); // multiplies implicit terms
81 
82  // How much do we project forward the (rho theta) that is used in the horizontal momentum equations
83  Real beta_d = 0.1;
84 
85  const Real* dx = geom.CellSize();
86  const GpuArray<Real, AMREX_SPACEDIM> dxInv = geom.InvCellSizeArray();
87 
88  Real dxi = dxInv[0];
89  Real dyi = dxInv[1];
90  Real dzi = dxInv[2];
91 
92  MultiFab coeff_A_mf(fast_coeffs, make_alias, 0, 1);
93  MultiFab inv_coeff_B_mf(fast_coeffs, make_alias, 1, 1);
94  MultiFab coeff_C_mf(fast_coeffs, make_alias, 2, 1);
95  MultiFab coeff_P_mf(fast_coeffs, make_alias, 3, 1);
96  MultiFab coeff_Q_mf(fast_coeffs, make_alias, 4, 1);
97 
98  // *************************************************************************
99  // Set gravity as a vector
100  const Array<Real,AMREX_SPACEDIM> grav{0.0, 0.0, -gravity};
101  const GpuArray<Real,AMREX_SPACEDIM> grav_gpu{grav[0], grav[1], grav[2]};
102 
103  MultiFab extrap(S_data[IntVars::cons].boxArray(),S_data[IntVars::cons].DistributionMap(),1,1);
104 
105  MultiFab Omega(S_data[IntVars::zmom].boxArray(), S_data[IntVars::zmom].DistributionMap(), 1, 1);
106 
107  // *************************************************************************
108  // Define updates in the current RK stg
109  // *************************************************************************
110 #ifdef _OPENMP
111 #pragma omp parallel if (Gpu::notInLaunchRegion())
112 #endif
113  {
114  FArrayBox temp_rhs_fab;
115 
116  FArrayBox RHS_fab;
117  FArrayBox soln_fab;
118 
119  std::array<FArrayBox,AMREX_SPACEDIM> flux;
120 
121  // NOTE: we leave tiling off here for efficiency -- to make this loop work with tiling
122  // will require additional changes
123  for ( MFIter mfi(S_stg_data[IntVars::cons],false); mfi.isValid(); ++mfi)
124  {
125  Box bx = mfi.tilebox();
126  Box tbx = surroundingNodes(bx,0);
127  Box tby = surroundingNodes(bx,1);
128  Box tbz = surroundingNodes(bx,2);
129 
130  Box vbx = mfi.validbox();
131  const auto& vbx_hi = ubound(vbx);
132 
133  const Array4<const Real> & stg_cons = S_stg_data[IntVars::cons].const_array(mfi);
134  const Array4<const Real> & stg_xmom = S_stg_data[IntVars::xmom].const_array(mfi);
135  const Array4<const Real> & stg_ymom = S_stg_data[IntVars::ymom].const_array(mfi);
136  const Array4<const Real> & stg_zmom = S_stg_data[IntVars::zmom].const_array(mfi);
137  const Array4<const Real> & prim = S_stg_prim.const_array(mfi);
138 
139  const Array4<const Real>& slow_rhs_cons = S_slow_rhs[IntVars::cons].const_array(mfi);
140  const Array4<const Real>& slow_rhs_rho_u = S_slow_rhs[IntVars::xmom].const_array(mfi);
141  const Array4<const Real>& slow_rhs_rho_v = S_slow_rhs[IntVars::ymom].const_array(mfi);
142  const Array4<const Real>& slow_rhs_rho_w = S_slow_rhs[IntVars::zmom].const_array(mfi);
143 
144  const Array4<Real>& cur_cons = S_data[IntVars::cons].array(mfi);
145  const Array4<Real>& cur_xmom = S_data[IntVars::xmom].array(mfi);
146  const Array4<Real>& cur_ymom = S_data[IntVars::ymom].array(mfi);
147  const Array4<Real>& cur_zmom = S_data[IntVars::zmom].array(mfi);
148 
149  const Array4<Real>& lagged_delta_rt = S_scratch[IntVars::cons].array(mfi);
150 
151  const Array4<const Real>& prev_cons = S_prev[IntVars::cons].const_array(mfi);
152  const Array4<const Real>& prev_xmom = S_prev[IntVars::xmom].const_array(mfi);
153  const Array4<const Real>& prev_ymom = S_prev[IntVars::ymom].const_array(mfi);
154  const Array4<const Real>& prev_zmom = S_prev[IntVars::zmom].const_array(mfi);
155 
156  // These store the advection momenta which we will use to update the slow variables
157  const Array4<Real>& avg_xmom = S_scratch[IntVars::xmom].array(mfi);
158  const Array4<Real>& avg_ymom = S_scratch[IntVars::ymom].array(mfi);
159  const Array4<Real>& avg_zmom = S_scratch[IntVars::zmom].array(mfi);
160 
161  const Array4<const Real>& z_nd_old = z_phys_nd_old->const_array(mfi);
162  const Array4<const Real>& z_nd_new = z_phys_nd_new->const_array(mfi);
163  const Array4<const Real>& z_nd_stg = z_phys_nd_stg->const_array(mfi);
164  const Array4<const Real>& detJ_old = detJ_cc_old->const_array(mfi);
165  const Array4<const Real>& detJ_new = detJ_cc_new->const_array(mfi);
166  const Array4<const Real>& detJ_stg = detJ_cc_stg->const_array(mfi);
167 
168  const Array4<const Real>& z_t_arr = z_t_rk->const_array(mfi);
169  const Array4<const Real>& zp_t_arr = z_t_pert->const_array(mfi);
170 
171  const Array4< Real>& omega_arr = Omega.array(mfi);
172 
173  const Array4<const Real>& pi_stage_ca = pi_stage.const_array(mfi);
174 
175  const Array4<Real>& theta_extrap = extrap.array(mfi);
176 
177  // Map factors
178  const Array4<const Real>& mf_m = mapfac_m->const_array(mfi);
179  const Array4<const Real>& mf_u = mapfac_u->const_array(mfi);
180  const Array4<const Real>& mf_v = mapfac_v->const_array(mfi);
181 
182  // Note: it is important to grow the tilebox rather than use growntilebox because
183  // we need to fill the ghost cells of the tilebox so we can use them below
184  Box gbx = mfi.tilebox(); gbx.grow(1);
185  Box gtbx = mfi.nodaltilebox(0); gtbx.grow(1); gtbx.setSmall(2,0);
186  Box gtby = mfi.nodaltilebox(1); gtby.grow(1); gtby.setSmall(2,0);
187 
188  {
189  BL_PROFILE("fast_rhs_copies_0");
190  if (step == 0) {
191  ParallelFor(gbx,
192  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
193  cur_cons(i,j,k,Rho_comp) = prev_cons(i,j,k,Rho_comp);
194  cur_cons(i,j,k,RhoTheta_comp) = prev_cons(i,j,k,RhoTheta_comp);
195 
196  Real delta_rt = cur_cons(i,j,k,RhoTheta_comp) - stg_cons(i,j,k,RhoTheta_comp);
197  theta_extrap(i,j,k) = delta_rt;
198 
199  // We define lagged_delta_rt for our next step as the current delta_rt
200  lagged_delta_rt(i,j,k,RhoTheta_comp) = delta_rt;
201  });
202  } else if (use_lagged_delta_rt) {
203  // This is the default for cases with no or static terrain
204  ParallelFor(gbx,
205  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
206  Real delta_rt = cur_cons(i,j,k,RhoTheta_comp) - stg_cons(i,j,k,RhoTheta_comp);
207  theta_extrap(i,j,k) = delta_rt + beta_d * (delta_rt - lagged_delta_rt(i,j,k,RhoTheta_comp));
208 
209  // We define lagged_delta_rt for our next step as the current delta_rt
210  lagged_delta_rt(i,j,k,RhoTheta_comp) = delta_rt;
211  });
212  } else {
213  // For the moving wave problem, this choice seems more robust
214  ParallelFor(gbx,
215  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
216  theta_extrap(i,j,k) = cur_cons(i,j,k,RhoTheta_comp) - stg_cons(i,j,k,RhoTheta_comp);
217  });
218  } // if step
219  } // end profile
220 
221  RHS_fab.resize (tbz,1, The_Async_Arena());
222  soln_fab.resize (tbz,1, The_Async_Arena());
223  temp_rhs_fab.resize(tbz,2, The_Async_Arena());
224 
225  auto const& RHS_a = RHS_fab.array();
226  auto const& soln_a = soln_fab.array();
227  auto const& temp_rhs_arr = temp_rhs_fab.array();
228 
229  auto const& coeffA_a = coeff_A_mf.array(mfi);
230  auto const& inv_coeffB_a = inv_coeff_B_mf.array(mfi);
231  auto const& coeffC_a = coeff_C_mf.array(mfi);
232  auto const& coeffP_a = coeff_P_mf.array(mfi);
233  auto const& coeffQ_a = coeff_Q_mf.array(mfi);
234 
235  // *********************************************************************
236  // Define updates in the RHS of {x, y, z}-momentum equations
237  // *********************************************************************
238  {
239  BL_PROFILE("fast_rhs_xymom_T");
240  ParallelFor(tbx, tby,
241  [=] AMREX_GPU_DEVICE (int i, int j, int k)
242  {
243  // Add (negative) gradient of (rho theta) multiplied by lagged "pi"
244  Real h_xi_old = Compute_h_xi_AtIface(i, j, k, dxInv, z_nd_old);
245  Real h_zeta_old = Compute_h_zeta_AtIface(i, j, k, dxInv, z_nd_old);
246  Real gp_xi = (theta_extrap(i,j,k) - theta_extrap(i-1,j,k)) * dxi;
247  Real gp_zeta_on_iface = (k == 0) ?
248  0.5 * dzi * ( theta_extrap(i-1,j,k+1) + theta_extrap(i,j,k+1)
249  -theta_extrap(i-1,j,k ) - theta_extrap(i,j,k ) ) :
250  0.25 * dzi * ( theta_extrap(i-1,j,k+1) + theta_extrap(i,j,k+1)
251  -theta_extrap(i-1,j,k-1) - theta_extrap(i,j,k-1) );
252  Real gpx = h_zeta_old * gp_xi - h_xi_old * gp_zeta_on_iface;
253  gpx *= mf_u(i,j,0);
254 
255  if (l_use_moisture) {
256  Real q = 0.5 * ( prim(i,j,k,PrimQ1_comp) + prim(i-1,j,k,PrimQ1_comp)
257  +prim(i,j,k,PrimQ2_comp) + prim(i-1,j,k,PrimQ2_comp) );
258  gpx /= (1.0 + q);
259  }
260 
261  Real pi_c = 0.5 * (pi_stage_ca(i-1,j,k,0) + pi_stage_ca(i ,j,k,0));
262  Real fast_rhs_rho_u = -Gamma * R_d * pi_c * gpx;
263 
264  // We have already scaled the source terms to have the extra factor of dJ
265  cur_xmom(i,j,k) = h_zeta_old * prev_xmom(i,j,k) + dtau * fast_rhs_rho_u
266  + dtau * slow_rhs_rho_u(i,j,k);
267  },
268  [=] AMREX_GPU_DEVICE (int i, int j, int k)
269  {
270  // Add (negative) gradient of (rho theta) multiplied by lagged "pi"
271  Real h_eta_old = Compute_h_eta_AtJface(i, j, k, dxInv, z_nd_old);
272  Real h_zeta_old = Compute_h_zeta_AtJface(i, j, k, dxInv, z_nd_old);
273  Real gp_eta = (theta_extrap(i,j,k) -theta_extrap(i,j-1,k)) * dyi;
274  Real gp_zeta_on_jface = (k == 0) ?
275  0.5 * dzi * ( theta_extrap(i,j,k+1) + theta_extrap(i,j-1,k+1)
276  -theta_extrap(i,j,k ) - theta_extrap(i,j-1,k ) ) :
277  0.25 * dzi * ( theta_extrap(i,j,k+1) + theta_extrap(i,j-1,k+1)
278  -theta_extrap(i,j,k-1) - theta_extrap(i,j-1,k-1) );
279  Real gpy = h_zeta_old * gp_eta - h_eta_old * gp_zeta_on_jface;
280  gpy *= mf_v(i,j,0);
281 
282  if (l_use_moisture) {
283  Real q = 0.5 * ( prim(i,j,k,PrimQ1_comp) + prim(i,j-1,k,PrimQ1_comp)
284  +prim(i,j,k,PrimQ2_comp) + prim(i,j-1,k,PrimQ2_comp) );
285  gpy /= (1.0 + q);
286  }
287 
288  Real pi_c = 0.5 * (pi_stage_ca(i,j-1,k,0) + pi_stage_ca(i,j ,k,0));
289  Real fast_rhs_rho_v = -Gamma * R_d * pi_c * gpy;
290 
291  // We have already scaled the source terms to have the extra factor of dJ
292  cur_ymom(i, j, k) = h_zeta_old * prev_ymom(i,j,k) + dtau * fast_rhs_rho_v
293  + dtau * slow_rhs_rho_v(i,j,k);
294  });
295  } // end profile
296 
297  // *************************************************************************
298  // Define flux arrays for use in advection
299  // *************************************************************************
300  for (int dir = 0; dir < AMREX_SPACEDIM; ++dir) {
301  flux[dir].resize(surroundingNodes(bx,dir),2);
302  flux[dir].setVal<RunOn::Device>(0.);
303  }
304  const GpuArray<const Array4<Real>, AMREX_SPACEDIM>
305  flx_arr{{AMREX_D_DECL(flux[0].array(), flux[1].array(), flux[2].array())}};
306 
307  // *********************************************************************
308  {
309  BL_PROFILE("fast_T_making_rho_rhs");
310  ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
311  {
312  Real h_zeta_stg_xlo = Compute_h_zeta_AtIface(i, j , k, dxInv, z_nd_stg);
313  Real h_zeta_stg_xhi = Compute_h_zeta_AtIface(i+1,j , k, dxInv, z_nd_stg);
314  Real xflux_lo = cur_xmom(i ,j,k) - stg_xmom(i ,j,k)*h_zeta_stg_xlo;
315  Real xflux_hi = cur_xmom(i+1,j,k) - stg_xmom(i+1,j,k)*h_zeta_stg_xhi;
316 
317  Real h_zeta_stg_yhi = Compute_h_zeta_AtJface(i, j+1, k, dxInv, z_nd_stg);
318  Real h_zeta_stg_ylo = Compute_h_zeta_AtJface(i, j , k, dxInv, z_nd_stg);
319  Real yflux_lo = cur_ymom(i,j ,k) - stg_ymom(i,j ,k)*h_zeta_stg_ylo;
320  Real yflux_hi = cur_ymom(i,j+1,k) - stg_ymom(i,j+1,k)*h_zeta_stg_yhi;
321 
322  // NOTE: we are saving the (1/J) weighting for later when we add this to rho and theta
323  temp_rhs_arr(i,j,k,0) = ( xflux_hi - xflux_lo ) * dxi + ( yflux_hi - yflux_lo ) * dyi;
324  temp_rhs_arr(i,j,k,1) = (( xflux_hi * (prim(i,j,k,0) + prim(i+1,j,k,0)) -
325  xflux_lo * (prim(i,j,k,0) + prim(i-1,j,k,0)) ) * dxi +
326  ( yflux_hi * (prim(i,j,k,0) + prim(i,j+1,k,0)) -
327  yflux_lo * (prim(i,j,k,0) + prim(i,j-1,k,0)) ) * dyi) * 0.5;
328 
329  (flx_arr[0])(i,j,k,0) = xflux_lo;
330  (flx_arr[0])(i,j,k,1) = (flx_arr[0])(i ,j,k,0) * 0.5 * (prim(i,j,k,0) + prim(i-1,j,k,0));
331 
332  (flx_arr[1])(i,j,k,0) = yflux_lo;
333  (flx_arr[1])(i,j,k,1) = (flx_arr[0])(i,j ,k,0) * 0.5 * (prim(i,j,k,0) + prim(i,j-1,k,0));
334 
335  if (i == vbx_hi.x) {
336  (flx_arr[0])(i+1,j,k,0) = xflux_hi;
337  (flx_arr[0])(i+1,j,k,1) = (flx_arr[0])(i+1,j,k,0) * 0.5 * (prim(i,j,k,0) + prim(i+1,j,k,0));
338  }
339  if (j == vbx_hi.y) {
340  (flx_arr[1])(i,j+1,k,0) = yflux_hi;
341  (flx_arr[1])(i,j+1,k,1) = (flx_arr[1])(i,j+1,k,0) * 0.5 * (prim(i,j,k,0) + prim(i,j+1,k,0));
342  }
343  });
344  } // end profile
345 
346  // *********************************************************************
347  // This must be done before we set cur_xmom and cur_ymom, since those
348  // in fact point to the same array as prev_xmom and prev_ymom
349  // *********************************************************************
350  Box gbxo = mfi.nodaltilebox(2);
351  {
352  BL_PROFILE("fast_MT_making_omega");
353  Box gbxo_lo = gbxo; gbxo_lo.setBig(2,0);
354  ParallelFor(gbxo_lo, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
355  omega_arr(i,j,k) = 0.;
356  });
357  Box gbxo_hi = gbxo; gbxo_hi.setSmall(2,gbxo.bigEnd(2));
358  ParallelFor(gbxo_hi, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
359  omega_arr(i,j,k) = prev_zmom(i,j,k) - stg_zmom(i,j,k) - zp_t_arr(i,j,k);
360  });
361  Box gbxo_mid = gbxo; gbxo_mid.setSmall(2,1); gbxo_mid.setBig(2,gbxo.bigEnd(2)-1);
362  ParallelFor(gbxo_mid, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
363  omega_arr(i,j,k) =
364  (OmegaFromW(i,j,k,prev_zmom(i,j,k),prev_xmom,prev_ymom,z_nd_old,dxInv)
365  -OmegaFromW(i,j,k, stg_zmom(i,j,k), stg_xmom, stg_ymom,z_nd_old,dxInv)) - zp_t_arr(i,j,k);
366  });
367  } // end profile
368  // *********************************************************************
369 
370  ParallelFor(tbx, tby,
371  [=] AMREX_GPU_DEVICE (int i, int j, int k)
372  {
373  Real h_zeta_new = Compute_h_zeta_AtIface(i, j, k, dxInv, z_nd_new);
374  cur_xmom(i, j, k) /= h_zeta_new;
375  avg_xmom(i,j,k) += facinv*(cur_xmom(i,j,k) - stg_xmom(i,j,k));
376  },
377  [=] AMREX_GPU_DEVICE (int i, int j, int k)
378  {
379  Real h_zeta_new = Compute_h_zeta_AtJface(i, j, k, dxInv, z_nd_new);
380  cur_ymom(i, j, k) /= h_zeta_new;
381  avg_ymom(i,j,k) += facinv*(cur_ymom(i,j,k) - stg_ymom(i,j,k));
382  });
383 
384  Box bx_shrunk_in_k = bx;
385  int klo = tbz.smallEnd(2);
386  int khi = tbz.bigEnd(2);
387  bx_shrunk_in_k.setSmall(2,klo+1);
388  bx_shrunk_in_k.setBig(2,khi-1);
389 
390  // Note that the notes use "g" to mean the magnitude of gravity, so it is positive
391  // We set grav_gpu[2] to be the vector component which is negative
392  // We define halfg to match the notes (which is why we take the absolute value)
393  Real halfg = std::abs(0.5 * grav_gpu[2]);
394 
395  {
396  BL_PROFILE("fast_loop_on_shrunk_t");
397  //Note we don't act on the bottom or top boundaries of the domain
398  ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (int i, int j, int k)
399  {
400  Real dJ_old_kface = 0.5 * (detJ_old(i,j,k) + detJ_old(i,j,k-1));
401  Real dJ_new_kface = 0.5 * (detJ_new(i,j,k) + detJ_new(i,j,k-1));
402  Real dJ_stg_kface = 0.5 * (detJ_stg(i,j,k) + detJ_stg(i,j,k-1));
403 
404  Real coeff_P = coeffP_a(i,j,k);
405  Real coeff_Q = coeffQ_a(i,j,k);
406 
407  if (l_use_moisture) {
408  Real q = 0.5 * ( prim(i,j,k,PrimQ1_comp) + prim(i,j,k-1,PrimQ1_comp)
409  +prim(i,j,k,PrimQ2_comp) + prim(i,j,k-1,PrimQ2_comp) );
410  coeff_P /= (1.0 + q);
411  coeff_Q /= (1.0 + q);
412  }
413 
414  Real theta_t_lo = 0.5 * ( prim(i,j,k-2,PrimTheta_comp) + prim(i,j,k-1,PrimTheta_comp) );
415  Real theta_t_mid = 0.5 * ( prim(i,j,k-1,PrimTheta_comp) + prim(i,j,k ,PrimTheta_comp) );
416  Real theta_t_hi = 0.5 * ( prim(i,j,k ,PrimTheta_comp) + prim(i,j,k+1,PrimTheta_comp) );
417 
418  // line 2 last two terms (order dtau)
419  Real R0_tmp = coeff_P * cur_cons(i,j,k ,RhoTheta_comp) * dJ_old_kface
420  + coeff_Q * cur_cons(i,j,k-1,RhoTheta_comp) * dJ_old_kface
421  - coeff_P * stg_cons(i,j,k ,RhoTheta_comp) * dJ_stg_kface
422  - coeff_Q * stg_cons(i,j,k-1,RhoTheta_comp) * dJ_stg_kface
423  - halfg * ( cur_cons(i,j,k,Rho_comp) + cur_cons(i,j,k-1,Rho_comp) ) * dJ_old_kface
424  + halfg * ( stg_cons(i,j,k,Rho_comp) + stg_cons(i,j,k-1,Rho_comp) ) * dJ_stg_kface;
425 
426  // line 3 residuals (order dtau^2) 1.0 <-> beta_2
427  Real R1_tmp = - halfg * ( slow_rhs_cons(i,j,k ,Rho_comp) +
428  slow_rhs_cons(i,j,k-1,Rho_comp) )
429  + ( coeff_P * slow_rhs_cons(i,j,k ,RhoTheta_comp) +
430  coeff_Q * slow_rhs_cons(i,j,k-1,RhoTheta_comp) );
431 
432  Real Omega_kp1 = omega_arr(i,j,k+1);
433  Real Omega_k = omega_arr(i,j,k );
434  Real Omega_km1 = omega_arr(i,j,k-1);
435 
436  // consolidate lines 4&5 (order dtau^2)
437  R1_tmp += ( halfg ) *
438  ( beta_1 * dzi * (Omega_kp1 - Omega_km1) + temp_rhs_arr(i,j,k,Rho_comp) + temp_rhs_arr(i,j,k-1,Rho_comp));
439 
440  // consolidate lines 6&7 (order dtau^2)
441  R1_tmp += -(
442  coeff_P * ( beta_1 * dzi * (Omega_kp1*theta_t_hi - Omega_k*theta_t_mid) + temp_rhs_arr(i,j,k ,RhoTheta_comp) ) +
443  coeff_Q * ( beta_1 * dzi * (Omega_k*theta_t_mid - Omega_km1*theta_t_lo) + temp_rhs_arr(i,j,k-1,RhoTheta_comp) ) );
444 
445  // line 1
446  RHS_a(i,j,k) = dJ_old_kface * prev_zmom(i,j,k) - dJ_stg_kface * stg_zmom(i,j,k)
447  + dtau *(slow_rhs_rho_w(i,j,k) + R0_tmp + dtau*beta_2*R1_tmp );
448 
449  // We cannot use omega_arr here since that was built with old_rho_u and old_rho_v ...
450  Real UppVpp = dJ_new_kface * OmegaFromW(i,j,k,0.,cur_xmom,cur_ymom,z_nd_new,dxInv)
451  -dJ_stg_kface * OmegaFromW(i,j,k,0.,stg_xmom,stg_ymom,z_nd_stg,dxInv);
452  RHS_a(i,j,k) += UppVpp;
453  });
454  } // end profile
455 
456  Box b2d = tbz; // Copy constructor
457  b2d.setRange(2,0);
458 
459  auto const lo = lbound(bx);
460  auto const hi = ubound(bx);
461 
462  {
463  BL_PROFILE("fast_rhs_b2d_loop_t");
464 
465 #ifdef AMREX_USE_GPU
466  ParallelFor(b2d, [=] AMREX_GPU_DEVICE (int i, int j, int)
467  {
468  // Moving terrain
469  Real rho_on_bdy = 0.5 * ( prev_cons(i,j,lo.z) + prev_cons(i,j,lo.z-1) );
470  RHS_a(i,j,lo.z) = rho_on_bdy * zp_t_arr(i,j,0);
471 
472  soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
473 
474  // w_khi = 0
475  RHS_a(i,j,hi.z+1) = 0.0;
476 
477  for (int k = lo.z+1; k <= hi.z+1; k++) {
478  soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
479  }
480 
481  for (int k = hi.z; k >= lo.z; k--) {
482  soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) * soln_a(i,j,k+1);
483  }
484 
485  // We assume that Omega == w at the top boundary and that changes in J there are irrelevant
486  cur_zmom(i,j,hi.z+1) = stg_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
487  });
488 #else
489  for (int j = lo.y; j <= hi.y; ++j) {
490  AMREX_PRAGMA_SIMD
491  for (int i = lo.x; i <= hi.x; ++i) {
492 
493  Real rho_on_bdy = 0.5 * ( prev_cons(i,j,lo.z) + prev_cons(i,j,lo.z-1) );
494  RHS_a(i,j,lo.z) = rho_on_bdy * zp_t_arr(i,j,lo.z);
495 
496  soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
497  }
498  }
499 
500  for (int j = lo.y; j <= hi.y; ++j) {
501  AMREX_PRAGMA_SIMD
502  for (int i = lo.x; i <= hi.x; ++i) {
503  RHS_a (i,j,hi.z+1) = 0.0;
504  }
505  }
506  for (int k = lo.z+1; k <= hi.z+1; ++k) {
507  for (int j = lo.y; j <= hi.y; ++j) {
508  AMREX_PRAGMA_SIMD
509  for (int i = lo.x; i <= hi.x; ++i) {
510  soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
511  }
512  }
513  }
514  for (int k = hi.z; k >= lo.z; --k) {
515  for (int j = lo.y; j <= hi.y; ++j) {
516  AMREX_PRAGMA_SIMD
517  for (int i = lo.x; i <= hi.x; ++i) {
518  soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) * soln_a(i,j,k+1);
519  }
520  }
521  }
522 
523  // We assume that Omega == w at the top boundary and that changes in J there are irrelevant
524  for (int j = lo.y; j <= hi.y; ++j) {
525  AMREX_PRAGMA_SIMD
526  for (int i = lo.x; i <= hi.x; ++i) {
527  cur_zmom(i,j,hi.z+1) = stg_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
528  }
529  }
530 #endif
531  } // end profile
532 
533  {
534  BL_PROFILE("fast_rhs_new_drhow");
535  tbz.setBig(2,hi.z);
536  ParallelFor(tbz, [=] AMREX_GPU_DEVICE (int i, int j, int k)
537  {
538  Real rho_on_face = 0.5 * (cur_cons(i,j,k,Rho_comp) + cur_cons(i,j,k-1,Rho_comp));
539 
540  if (k == lo.z) {
541  cur_zmom(i,j,k) = WFromOmega(i,j,k,rho_on_face*(z_t_arr(i,j,k)+zp_t_arr(i,j,k)),
542  cur_xmom,cur_ymom,z_nd_new,dxInv);
543 
544  // We need to set this here because it is used to define zflux_lo below
545  soln_a(i,j,k) = 0.;
546 
547  } else {
548 
549  Real UppVpp = WFromOmega(i,j,k,0.0,cur_xmom,cur_ymom,z_nd_new,dxInv)
550  -WFromOmega(i,j,k,0.0,stg_xmom,stg_ymom,z_nd_stg,dxInv);
551  Real wpp = soln_a(i,j,k) + UppVpp;
552  Real dJ_old_kface = 0.5 * (detJ_old(i,j,k) + detJ_old(i,j,k-1));
553  Real dJ_new_kface = 0.5 * (detJ_new(i,j,k) + detJ_new(i,j,k-1));
554 
555  cur_zmom(i,j,k) = dJ_old_kface * (stg_zmom(i,j,k) + wpp);
556  cur_zmom(i,j,k) /= dJ_new_kface;
557 
558  soln_a(i,j,k) = OmegaFromW(i,j,k,cur_zmom(i,j,k),cur_xmom,cur_ymom,z_nd_new,dxInv)
559  - OmegaFromW(i,j,k,stg_zmom(i,j,k),stg_xmom,stg_ymom,z_nd_stg,dxInv);
560  soln_a(i,j,k) -= rho_on_face * zp_t_arr(i,j,k);
561  }
562  });
563  } // end profile
564 
565  // **************************************************************************
566  // Define updates in the RHS of rho and (rho theta)
567  // **************************************************************************
568  {
569  BL_PROFILE("fast_rho_final_update");
570  ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
571  {
572  Real zflux_lo = beta_2 * soln_a(i,j,k ) + beta_1 * omega_arr(i,j,k);
573  Real zflux_hi = beta_2 * soln_a(i,j,k+1) + beta_1 * omega_arr(i,j,k+1);
574 
575  // Note that in the solve we effectively impose new_drho_w(i,j,vbx_hi.z+1)=0
576  // so we don't update avg_zmom at k=vbx_hi.z+1
577  avg_zmom(i,j,k) += facinv*zflux_lo / (mf_m(i,j,0) * mf_m(i,j,0));
578  (flx_arr[2])(i,j,k,0) = zflux_lo / (mf_m(i,j,0) * mf_m(i,j,0));
579 
580  // Note that the factor of (1/J) in the fast source term is canceled
581  // when we multiply old and new by detJ_old and detJ_new , respectively
582  // We have already scaled the slow source term to have the extra factor of dJ
583  Real fast_rhs_rho = -(temp_rhs_arr(i,j,k,0) + ( zflux_hi - zflux_lo ) * dzi);
584  Real temp_rho = detJ_old(i,j,k) * cur_cons(i,j,k,0) +
585  dtau * ( slow_rhs_cons(i,j,k,0) + fast_rhs_rho );
586  cur_cons(i,j,k,0) = temp_rho / detJ_new(i,j,k);
587 
588  // Note that the factor of (1/J) in the fast source term is canceled
589  // when we multiply old and new by detJ_old and detJ_new , respectively
590  // We have already scaled the slow source term to have the extra factor of dJ
591  Real fast_rhs_rhotheta = -( temp_rhs_arr(i,j,k,1) + 0.5 *
592  ( zflux_hi * (prim(i,j,k) + prim(i,j,k+1))
593  - zflux_lo * (prim(i,j,k) + prim(i,j,k-1)) ) * dzi );
594  Real temp_rth = detJ_old(i,j,k) * cur_cons(i,j,k,1) +
595  dtau * ( slow_rhs_cons(i,j,k,1) + fast_rhs_rhotheta );
596  cur_cons(i,j,k,1) = temp_rth / detJ_new(i,j,k);
597  (flx_arr[2])(i,j,k,1) = (flx_arr[2])(i,j,k,0) * 0.5 * (prim(i,j,k) + prim(i,j,k-1));
598 
599  if (k == vbx_hi.z) {
600  avg_zmom(i,j,k+1) += facinv * zflux_hi / (mf_m(i,j,0) * mf_m(i,j,0));
601  (flx_arr[2])(i,j,k+1,0) = zflux_hi / (mf_m(i,j,0) * mf_m(i,j,0));
602  (flx_arr[2])(i,j,k+1,1) = (flx_arr[2])(i,j,k+1,0) * 0.5 * (prim(i,j,k) + prim(i,j,k+1));
603  }
604  });
605  } // end profile
606 
607  // We only add to the flux registers in the final RK step
608  if (l_reflux && nrk == 2) {
609  int strt_comp_reflux = 0;
610  int num_comp_reflux = 2;
611  if (level < finest_level) {
612  fr_as_crse->CrseAdd(mfi,
613  {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
614  dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
615  }
616  if (level > 0) {
617  fr_as_fine->FineAdd(mfi,
618  {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
619  dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
620  }
621 
622  // This is necessary here so we don't go on to the next FArrayBox without
623  // having finished copying the fluxes into the FluxRegisters (since the fluxes
624  // are stored in temporary FArrayBox's)
625  Gpu::streamSynchronize();
626 
627  } // two-way coupling
628 
629  } // mfi
630  } // OMP
631 }
constexpr amrex::Real R_d
Definition: ERF_Constants.H:10
constexpr amrex::Real Gamma
Definition: ERF_Constants.H:19
#define PrimQ1_comp
Definition: ERF_IndexDefines.H:53
#define PrimQ2_comp
Definition: ERF_IndexDefines.H:54
#define Rho_comp
Definition: ERF_IndexDefines.H:36
#define RhoTheta_comp
Definition: ERF_IndexDefines.H:37
#define PrimTheta_comp
Definition: ERF_IndexDefines.H:50
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_xi_AtIface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:101
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real WFromOmega(int i, int j, int k, amrex::Real omega, amrex::Real u, amrex::Real v, const amrex::Array4< const amrex::Real > &z_nd, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &dxInv)
Definition: ERF_TerrainMetrics.H:407
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real OmegaFromW(int i, int j, int k, amrex::Real w, const amrex::Array4< const amrex::Real > u_arr, const amrex::Array4< const amrex::Real > v_arr, const amrex::Array4< const amrex::Real > z_nd, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &dxInv)
Definition: ERF_TerrainMetrics.H:374
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_zeta_AtIface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:87
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_zeta_AtJface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:130
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_eta_AtJface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:158
@ ymom
Definition: ERF_IndexDefines.H:141
@ cons
Definition: ERF_IndexDefines.H:139
@ zmom
Definition: ERF_IndexDefines.H:142
@ xmom
Definition: ERF_IndexDefines.H:140
Here is the call graph for this function: