ERF
Energy Research and Forecasting: An Atmospheric Modeling Code
ERF_TI_no_substep_fun.H
Go to the documentation of this file.
1 /**
2  * Wrapper for advancing the solution with the slow RHS in the absence of acoustic substepping
3  */
4  auto no_substep_fun = [&](Vector<MultiFab>& S_sum,
5  Vector<MultiFab>& S_old,
6  Vector<MultiFab>& F_slow,
7  const Real time_for_fp, const Real slow_dt,
8  const int nrk)
9  {
10  BL_PROFILE("no_substep_fun");
11  amrex::ignore_unused(nrk);
12  int n_data = IntVars::NumTypes;
13 
14  const auto& dxInv = fine_geom.InvCellSizeArray();
15 
16  const amrex::GpuArray<int, IntVars::NumTypes> scomp_fast = {0,0,0,0};
17  const amrex::GpuArray<int, IntVars::NumTypes> ncomp_fast = {2,1,1,1};
18 
19  if (verbose) amrex::Print() << " No-substepping time integration at level " << level
20  << std::setprecision(timeprecision)
21  << " to " << time_for_fp
22  << " with dt = " << slow_dt << std::endl;
23 
24  // Update S_sum = S_stage only for the fast variables
25 #ifdef _OPENMP
26 #pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
27 #endif
28  {
29  for ( MFIter mfi(S_sum[IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
30  {
31  const Box bx = mfi.tilebox();
32  Box tbx = mfi.nodaltilebox(0);
33  Box tby = mfi.nodaltilebox(1);
34  Box tbz = mfi.nodaltilebox(2);
35 
36  Vector<Array4<Real> > ssum_h(n_data);
37  Vector<Array4<Real> > sold_h(n_data);
38  Vector<Array4<Real> > fslow_h(n_data);
39 
40  for (int i = 0; i < n_data; ++i) {
41  ssum_h[i] = S_sum[i].array(mfi);
42  sold_h[i] = S_old[i].array(mfi);
43  fslow_h[i] = F_slow[i].array(mfi);
44  }
45 
46  Gpu::AsyncVector<Array4<Real> > sold_d(n_data);
47  Gpu::AsyncVector<Array4<Real> > ssum_d(n_data);
48  Gpu::AsyncVector<Array4<Real> > fslow_d(n_data);
49 
50  Gpu::copy(Gpu::hostToDevice, sold_h.begin(), sold_h.end(), sold_d.begin());
51  Gpu::copy(Gpu::hostToDevice, ssum_h.begin(), ssum_h.end(), ssum_d.begin());
52  Gpu::copy(Gpu::hostToDevice, fslow_h.begin(), fslow_h.end(), fslow_d.begin());
53 
54  Array4<Real>* sold = sold_d.dataPtr();
55  Array4<Real>* ssum = ssum_d.dataPtr();
56  Array4<Real>* fslow = fslow_d.dataPtr();
57 
58  // Moving terrain
59  if ( solverChoice.terrain_type == TerrainType::MovingFittedMesh )
60  {
61  const Array4<const Real>& dJ_old = detJ_cc[level]->const_array(mfi);
62  const Array4<const Real>& dJ_new = detJ_cc_new[level]->const_array(mfi);
63  const Array4<const Real>& dJ_stg = detJ_cc_src[level]->const_array(mfi);
64 
65  const Array4<const Real>& z_nd_old = z_phys_nd[level]->const_array(mfi);
66  const Array4<const Real>& z_nd_new = z_phys_nd_new[level]->const_array(mfi);
67 
68  const Array4<const Real>& mf_u = mapfac[level][MapFacType::u_x]->const_array(mfi);
69  const Array4<const Real>& mf_v = mapfac[level][MapFacType::v_y]->const_array(mfi);
70 
71  const Array4<Real >& z_t_arr = z_t_rk[level]->array(mfi);
72 
73  // We have already scaled the slow source term to have the extra factor of dJ
74  ParallelFor(bx, ncomp_fast[IntVars::cons],
75  [=] AMREX_GPU_DEVICE (int i, int j, int k, int nn) {
76  const int n = scomp_fast[IntVars::cons] + nn;
77  ssum[IntVars::cons](i,j,k,n) = dJ_old(i,j,k) * sold[IntVars::cons](i,j,k,n)
78  + slow_dt * dJ_stg(i,j,k) * fslow[IntVars::cons](i,j,k,n);
79  ssum[IntVars::cons](i,j,k,n) /= dJ_new(i,j,k);
80  });
81 
82  // We have already scaled the slow source term to have the extra factor of dJ
83  ParallelFor(tbx, tby,
84  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
85  Real h_zeta_old = Compute_h_zeta_AtIface(i, j, k, dxInv, z_nd_old);
86  Real h_zeta_new = Compute_h_zeta_AtIface(i, j, k, dxInv, z_nd_new);
87  ssum[IntVars::xmom](i,j,k) = ( h_zeta_old * sold[IntVars::xmom](i,j,k)
88  + slow_dt * fslow[IntVars::xmom](i,j,k) ) / h_zeta_new;
89  },
90  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
91  Real h_zeta_old = Compute_h_zeta_AtJface(i, j, k, dxInv, z_nd_old);
92  Real h_zeta_new = Compute_h_zeta_AtJface(i, j, k, dxInv, z_nd_new);
93  ssum[IntVars::ymom](i,j,k) = ( h_zeta_old * sold[IntVars::ymom](i,j,k)
94  + slow_dt * fslow[IntVars::ymom](i,j,k) ) / h_zeta_new;
95  });
96  ParallelFor(tbz,
97  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
98  if (k == 0) {
99  // Here we take advantage of the fact that moving terrain has a slip wall
100  // so we can just use the new value at (i,j,0).
101  Real rho_on_face = ssum[IntVars::cons](i,j,k,Rho_comp);
102  ssum[IntVars::zmom](i,j,k) = WFromOmega(i,j,k,rho_on_face*z_t_arr(i,j,k),
103  ssum[IntVars::xmom], ssum[IntVars::ymom],
104  mf_u, mf_v, z_nd_new,dxInv);
105  } else {
106  Real dJ_old_kface = 0.5 * (dJ_old(i,j,k) + dJ_old(i,j,k-1));
107  Real dJ_new_kface = 0.5 * (dJ_new(i,j,k) + dJ_new(i,j,k-1));
108  ssum[IntVars::zmom](i,j,k) = ( dJ_old_kface * sold[IntVars::zmom](i,j,k)
109  + slow_dt * fslow[IntVars::zmom](i,j,k) ) / dJ_new_kface;
110  }
111  });
112 
113  } else { // Fixed or no terrain
114  const Array4<const Real>& dJ_old = detJ_cc[level]->const_array(mfi);
115  ParallelFor(bx, ncomp_fast[IntVars::cons],
116  [=] AMREX_GPU_DEVICE (int i, int j, int k, int nn) {
117  const int n = scomp_fast[IntVars::cons] + nn;
118  if (dJ_old(i,j,k) > 0.0) {
119  ssum[IntVars::cons](i,j,k,n) = sold[IntVars::cons](i,j,k,n) + slow_dt *
120  ( fslow[IntVars::cons](i,j,k,n) );
121  } else {
122  ssum[IntVars::cons](i,j,k,n) = sold[IntVars::cons](i,j,k,n);
123  }
124  });
125 
126  // Commenting out the update is a HACK while developing the EB capability
127  if (solverChoice.terrain_type == TerrainType::EB)
128  {
129  const Array4<const Real>& vfrac_u = (get_eb(level).get_u_const_factory())->getVolFrac().const_array(mfi);
130  const Array4<const Real>& vfrac_v = (get_eb(level).get_v_const_factory())->getVolFrac().const_array(mfi);
131  const Array4<const Real>& vfrac_w = (get_eb(level).get_w_const_factory())->getVolFrac().const_array(mfi);
132 
133  ParallelFor(tbx, tby, tbz,
134  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
135  if (vfrac_u(i,j,k) > 0.0) {
136  // ssum[IntVars::xmom](i,j,k) = sold[IntVars::xmom](i,j,k);
137  ssum[IntVars::xmom](i,j,k) = sold[IntVars::xmom](i,j,k)
138  + slow_dt * fslow[IntVars::xmom](i,j,k);
139  } else {
140  ssum[IntVars::xmom](i,j,k) = sold[IntVars::xmom](i,j,k);
141  }
142  },
143  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
144  if (vfrac_v(i,j,k) > 0.0) {
145  // ssum[IntVars::ymom](i,j,k) = sold[IntVars::ymom](i,j,k);
146  ssum[IntVars::ymom](i,j,k) = sold[IntVars::ymom](i,j,k)
147  + slow_dt * fslow[IntVars::ymom](i,j,k);
148  } else {
149  ssum[IntVars::ymom](i,j,k) = sold[IntVars::ymom](i,j,k);
150  }
151  },
152  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
153  if (vfrac_w(i,j,k) > 0.0) {
154  // ssum[IntVars::zmom](i,j,k) = sold[IntVars::zmom](i,j,k);
155  ssum[IntVars::zmom](i,j,k) = sold[IntVars::zmom](i,j,k)
156  + slow_dt * fslow[IntVars::zmom](i,j,k);
157  } else {
158  ssum[IntVars::zmom](i,j,k) = sold[IntVars::zmom](i,j,k);
159  }
160  });
161 
162  } else {
163  ParallelFor(tbx, tby, tbz,
164  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
165  ssum[IntVars::xmom](i,j,k) = sold[IntVars::xmom](i,j,k)
166  + slow_dt * fslow[IntVars::xmom](i,j,k);
167  },
168  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
169  ssum[IntVars::ymom](i,j,k) = sold[IntVars::ymom](i,j,k)
170  + slow_dt * fslow[IntVars::ymom](i,j,k);
171  },
172  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
173  ssum[IntVars::zmom](i,j,k) = sold[IntVars::zmom](i,j,k)
174  + slow_dt * fslow[IntVars::zmom](i,j,k);
175  });
176  } // no EB
177  } // not moving terrain
178  } // mfi
179 
180  if (solverChoice.terrain_type == TerrainType::EB)
181  {
182  BL_PROFILE("no_substep_fun_redistribute");
183  // Redistribute cons states (cell-centered)
184 
185  Vector<MultiFab> dUdt(IntVars::NumTypes);
186  dUdt[IntVars::cons].define(ba, dm, F_slow[IntVars::cons].nComp(), F_slow[IntVars::cons].nGrow(), MFInfo(), EBFactory(level));
187  for (int i = 1; i <= AMREX_SPACEDIM; ++i) {
188  dUdt[i].define(F_slow[i].boxArray(), F_slow[i].DistributionMap(), F_slow[i].nComp(), F_slow[i].nGrow(), MFInfo());
189  }
190  for (int i = 0; i <= AMREX_SPACEDIM; ++i) {
191  dUdt[i].setVal(0.0, 0, ncomp_fast[i], dUdt[i].nGrow());
192  MultiFab::Copy(dUdt[i], F_slow[i], 0, 0, F_slow[i].nComp(), 0);
193  dUdt[i].setDomainBndry(1.234e10, 0, ncomp_fast[i], fine_geom);
194  dUdt[i].FillBoundary(fine_geom.periodicity());
195  }
196 
197  BCRec const* bc_ptr_d = domain_bcs_type_d.data();
198  IntVect ngv_pert = S_old[IntVars::cons].nGrowVect();
199 
200  MultiFab S_pert_cons(ba, dm, S_old[IntVars::cons].nComp(), ngv_pert, MFInfo(), EBFactory(level));
201  MultiFab rt0(ba, dm, 1, ngv_pert, MFInfo(), EBFactory(level));
202 
203  // Copy S_old into S_pert
204  MultiFab::Copy(S_pert_cons, S_old[IntVars::cons],0,0,S_pert_cons.nComp(), ngv_pert);
205 
206  // Copy theta0 into rt0
207  MultiFab::Copy(rt0,base_state[level],BaseState::th0_comp,0,1,ngv_pert);
208 
209  // Multiply theta0 by rho0 in rt0
210  MultiFab::Multiply(rt0,base_state[level],BaseState::r0_comp,0,1,ngv_pert);
211 
212  MultiFab::Subtract(S_pert_cons, base_state[level], BaseState::r0_comp,0,1,ngv_pert);
213  MultiFab::Subtract(S_pert_cons, rt0 , 0,1,1,ngv_pert);
214 
215  // Update F_slow for perturbational fast quantities (rho - rho0) and (rho_theta - rho_theta_0)
216  redistribute_term(ncomp_fast[IntVars::cons], fine_geom, F_slow[IntVars::cons], dUdt[IntVars::cons],
217  S_pert_cons, EBFactory(level), bc_ptr_d, slow_dt);
218 
219  // Update F_slow for momenta
220  redistribute_term(ncomp_fast[IntVars::xmom], fine_geom, F_slow[IntVars::xmom], dUdt[IntVars::xmom],
221  S_old[IntVars::xmom], *(get_eb(level).get_u_const_factory()), bc_ptr_d, slow_dt, IntVars::xmom);
222  redistribute_term(ncomp_fast[IntVars::ymom], fine_geom, F_slow[IntVars::ymom], dUdt[IntVars::ymom],
223  S_old[IntVars::ymom], *(get_eb(level).get_v_const_factory()), bc_ptr_d, slow_dt, IntVars::ymom);
224  redistribute_term(ncomp_fast[IntVars::zmom], fine_geom, F_slow[IntVars::zmom], dUdt[IntVars::zmom],
225  S_old[IntVars::zmom], *(get_eb(level).get_w_const_factory()), bc_ptr_d, slow_dt, IntVars::zmom);
226 
227 
228  // Update state using the updated F_slow.
229  for ( MFIter mfi(S_sum[IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
230  {
231  const Box bx = mfi.tilebox();
232  Box tbx = mfi.nodaltilebox(0);
233  Box tby = mfi.nodaltilebox(1);
234  Box tbz = mfi.nodaltilebox(2);
235 
236  Vector<Array4<Real> > ssum_h(n_data);
237  Vector<Array4<Real> > sold_h(n_data);
238  Vector<Array4<Real> > fslow_h(n_data);
239 
240  for (int i = 0; i < n_data; ++i) {
241  ssum_h[i] = S_sum[i].array(mfi);
242  sold_h[i] = S_old[i].array(mfi);
243  fslow_h[i] = F_slow[i].array(mfi);
244  }
245 
246  Gpu::AsyncVector<Array4<Real> > sold_d(n_data);
247  Gpu::AsyncVector<Array4<Real> > ssum_d(n_data);
248  Gpu::AsyncVector<Array4<Real> > fslow_d(n_data);
249 
250  Gpu::copy(Gpu::hostToDevice, sold_h.begin(), sold_h.end(), sold_d.begin());
251  Gpu::copy(Gpu::hostToDevice, ssum_h.begin(), ssum_h.end(), ssum_d.begin());
252  Gpu::copy(Gpu::hostToDevice, fslow_h.begin(), fslow_h.end(), fslow_d.begin());
253 
254  Array4<Real>* sold = sold_d.dataPtr();
255  Array4<Real>* ssum = ssum_d.dataPtr();
256  Array4<Real>* fslow = fslow_d.dataPtr();
257 
258  // const Array4<const Real>& vfrac_c = detJ_cc[level]->const_array(mfi);
259  const Array4<const Real>& vfrac_c = (get_eb(level).get_const_factory())->getVolFrac().const_array(mfi);
260 
261  ParallelFor(bx, ncomp_fast[IntVars::cons], [=] AMREX_GPU_DEVICE (int i, int j, int k, int nn)
262  {
263  const int n = scomp_fast[IntVars::cons] + nn;
264  if (vfrac_c(i,j,k) > 0.0) {
265  ssum[IntVars::cons](i,j,k,n) = sold[IntVars::cons](i,j,k,n) + slow_dt *
266  ( fslow[IntVars::cons](i,j,k,n) );
267  }
268  });
269 
270  const Array4<const Real>& vfrac_u = (get_eb(level).get_u_const_factory())->getVolFrac().const_array(mfi);
271  const Array4<const Real>& vfrac_v = (get_eb(level).get_v_const_factory())->getVolFrac().const_array(mfi);
272  const Array4<const Real>& vfrac_w = (get_eb(level).get_w_const_factory())->getVolFrac().const_array(mfi);
273 
274  ParallelFor(tbx, tby, tbz,
275  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
276  if (vfrac_u(i,j,k) > 0.0) {
277  ssum[IntVars::xmom](i,j,k) = sold[IntVars::xmom](i,j,k)
278  + slow_dt * fslow[IntVars::xmom](i,j,k);
279  }
280  },
281  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
282  if (vfrac_v(i,j,k) > 0.0) {
283  ssum[IntVars::ymom](i,j,k) = sold[IntVars::ymom](i,j,k)
284  + slow_dt * fslow[IntVars::ymom](i,j,k);
285  }
286  },
287  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
288  if (vfrac_w(i,j,k) > 0.0) {
289  ssum[IntVars::zmom](i,j,k) = sold[IntVars::zmom](i,j,k)
290  + slow_dt * fslow[IntVars::zmom](i,j,k);
291 
292  }
293  });
294  } // MFIter
295  } // EB
296  } // omp
297 
298  // Even if we update all the conserved variables we don't need
299  // to fillpatch the slow ones every acoustic substep
300  apply_bcs(S_sum, time_for_fp, S_sum[IntVars::cons].nGrow(), S_sum[IntVars::xmom].nGrow(),
301  fast_only=true, vel_and_mom_synced=false);
302 
303  if (solverChoice.anelastic[level]) {
304  bool have_tb = (thin_xforce[0] || thin_yforce[0] || thin_zforce[0]);
305  if (have_tb) {
306  project_velocity_tb(level, slow_dt, S_sum);
307  } else {
308  project_momenta(level, slow_dt, S_sum);
309  }
310  }
311  };
@ v_y
Definition: ERF_DataStruct.H:24
@ u_x
Definition: ERF_DataStruct.H:23
void redistribute_term(int ncomp, const Geometry &geom, MultiFab &result, MultiFab &result_tmp, MultiFab const &state, EBFArrayBoxFactory const &ebfact, BCRec const *bc, Real const local_dt)
Definition: ERF_EBRedistribute.cpp:13
#define Rho_comp
Definition: ERF_IndexDefines.H:36
amrex::Real Real
Definition: ERF_ShocInterface.H:19
auto no_substep_fun
Definition: ERF_TI_no_substep_fun.H:4
auto apply_bcs
Definition: ERF_TI_utils.H:73
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_zeta_AtIface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:102
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_zeta_AtJface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:142
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real WFromOmega(int &i, int &j, int &k, amrex::Real omega, const amrex::Array4< const amrex::Real > &u_arr, const amrex::Array4< const amrex::Real > &v_arr, const amrex::Array4< const amrex::Real > &mf_u, const amrex::Array4< const amrex::Real > &mf_v, const amrex::Array4< const amrex::Real > &z_nd, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &dxInv)
Definition: ERF_TerrainMetrics.H:462
@ th0_comp
Definition: ERF_IndexDefines.H:66
@ r0_comp
Definition: ERF_IndexDefines.H:63
@ NumTypes
Definition: ERF_IndexDefines.H:162
@ ymom
Definition: ERF_IndexDefines.H:160
@ cons
Definition: ERF_IndexDefines.H:158
@ zmom
Definition: ERF_IndexDefines.H:161
@ xmom
Definition: ERF_IndexDefines.H:159