ERF
Energy Research and Forecasting: An Atmospheric Modeling Code
ERF_TI_no_substep_fun.H
Go to the documentation of this file.
1 /**
2  * Wrapper for advancing the solution with the slow RHS in the absence of acoustic substepping
3  */
4  auto no_substep_fun = [&](Vector<MultiFab>& S_sum,
5  Vector<MultiFab>& S_old,
6  Vector<MultiFab>& F_slow,
7  const Real time_for_fp, const Real slow_dt,
8  const int nrk)
9  {
10  BL_PROFILE("no_substep_fun");
11  amrex::ignore_unused(nrk);
12  int n_data = IntVars::NumTypes;
13 
14  const auto& dxInv = fine_geom.InvCellSizeArray();
15 
16  const amrex::GpuArray<int, IntVars::NumTypes> scomp_fast = {0,0,0,0};
17  const amrex::GpuArray<int, IntVars::NumTypes> ncomp_fast = {2,1,1,1};
18 
19  if (verbose) amrex::Print() << " No-substepping time integration at level " << level
20  << " to " << time_for_fp
21  << " with dt = " << slow_dt << std::endl;
22 
23  // Update S_sum = S_stage only for the fast variables
24 #ifdef _OPENMP
25 #pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
26 #endif
27  {
28  for ( MFIter mfi(S_sum[IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
29  {
30  const Box bx = mfi.tilebox();
31  Box tbx = mfi.nodaltilebox(0);
32  Box tby = mfi.nodaltilebox(1);
33  Box tbz = mfi.nodaltilebox(2);
34 
35  Vector<Array4<Real> > ssum_h(n_data);
36  Vector<Array4<Real> > sold_h(n_data);
37  Vector<Array4<Real> > fslow_h(n_data);
38 
39  for (int i = 0; i < n_data; ++i) {
40  ssum_h[i] = S_sum[i].array(mfi);
41  sold_h[i] = S_old[i].array(mfi);
42  fslow_h[i] = F_slow[i].array(mfi);
43  }
44 
45  Gpu::AsyncVector<Array4<Real> > sold_d(n_data);
46  Gpu::AsyncVector<Array4<Real> > ssum_d(n_data);
47  Gpu::AsyncVector<Array4<Real> > fslow_d(n_data);
48 
49  Gpu::copy(Gpu::hostToDevice, sold_h.begin(), sold_h.end(), sold_d.begin());
50  Gpu::copy(Gpu::hostToDevice, ssum_h.begin(), ssum_h.end(), ssum_d.begin());
51  Gpu::copy(Gpu::hostToDevice, fslow_h.begin(), fslow_h.end(), fslow_d.begin());
52 
53  Array4<Real>* sold = sold_d.dataPtr();
54  Array4<Real>* ssum = ssum_d.dataPtr();
55  Array4<Real>* fslow = fslow_d.dataPtr();
56 
57  // Moving terrain
58  if ( solverChoice.terrain_type == TerrainType::MovingFittedMesh )
59  {
60  const Array4<const Real>& dJ_old = detJ_cc[level]->const_array(mfi);
61  const Array4<const Real>& dJ_new = detJ_cc_new[level]->const_array(mfi);
62 
63  const Array4<const Real>& z_nd_old = z_phys_nd[level]->const_array(mfi);
64  const Array4<const Real>& z_nd_new = z_phys_nd_new[level]->const_array(mfi);
65 
66  const Array4<Real >& z_t_arr = z_t_rk[level]->array(mfi);
67 
68  // We have already scaled the slow source term to have the extra factor of dJ
69  ParallelFor(bx, ncomp_fast[IntVars::cons],
70  [=] AMREX_GPU_DEVICE (int i, int j, int k, int nn) {
71  const int n = scomp_fast[IntVars::cons] + nn;
72  ssum[IntVars::cons](i,j,k,n) = dJ_old(i,j,k) * sold[IntVars::cons](i,j,k,n)
73  + slow_dt * fslow[IntVars::cons](i,j,k,n);
74  ssum[IntVars::cons](i,j,k,n) /= dJ_new(i,j,k);
75  });
76 
77  // We have already scaled the slow source term to have the extra factor of dJ
78  ParallelFor(tbx, tby,
79  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
80  Real h_zeta_old = Compute_h_zeta_AtIface(i, j, k, dxInv, z_nd_old);
81  Real h_zeta_new = Compute_h_zeta_AtIface(i, j, k, dxInv, z_nd_new);
82  ssum[IntVars::xmom](i,j,k) = ( h_zeta_old * sold[IntVars::xmom](i,j,k)
83  + slow_dt * fslow[IntVars::xmom](i,j,k) ) / h_zeta_new;
84  },
85  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
86  Real h_zeta_old = Compute_h_zeta_AtJface(i, j, k, dxInv, z_nd_old);
87  Real h_zeta_new = Compute_h_zeta_AtJface(i, j, k, dxInv, z_nd_new);
88  ssum[IntVars::ymom](i,j,k) = ( h_zeta_old * sold[IntVars::ymom](i,j,k)
89  + slow_dt * fslow[IntVars::ymom](i,j,k) ) / h_zeta_new;
90  });
91  ParallelFor(tbz,
92  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
93  if (k == 0) {
94  // Here we take advantage of the fact that moving terrain has a slip wall
95  // so we can just use the new value at (i,j,0).
96  Real rho_on_face = ssum[IntVars::cons](i,j,k,Rho_comp);
97  ssum[IntVars::zmom](i,j,k) = WFromOmega(i,j,k,rho_on_face*z_t_arr(i,j,k),
98  ssum[IntVars::xmom], ssum[IntVars::ymom],
99  z_nd_new,dxInv);
100  } else {
101  Real dJ_old_kface = 0.5 * (dJ_old(i,j,k) + dJ_old(i,j,k-1));
102  Real dJ_new_kface = 0.5 * (dJ_new(i,j,k) + dJ_new(i,j,k-1));
103  ssum[IntVars::zmom](i,j,k) = ( dJ_old_kface * sold[IntVars::zmom](i,j,k)
104  + slow_dt * fslow[IntVars::zmom](i,j,k) ) / dJ_new_kface;
105  }
106  });
107 
108  } else { // Fixed or no terrain
109  const Array4<const Real>& dJ_old = detJ_cc[level]->const_array(mfi);
110  ParallelFor(bx, ncomp_fast[IntVars::cons],
111  [=] AMREX_GPU_DEVICE (int i, int j, int k, int nn) {
112  const int n = scomp_fast[IntVars::cons] + nn;
113  if (dJ_old(i,j,k) > 0.0) {
114  ssum[IntVars::cons](i,j,k,n) = sold[IntVars::cons](i,j,k,n) + slow_dt *
115  ( fslow[IntVars::cons](i,j,k,n) );
116  }
117  });
118 
119  // Commenting out the update is a HACK while developing the EB capability
120  if (solverChoice.terrain_type == TerrainType::EB)
121  {
122  ParallelFor(tbx, tby, tbz,
123  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
124  ssum[IntVars::xmom](i,j,k) = sold[IntVars::xmom](i,j,k);
125  },
126  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
127  ssum[IntVars::ymom](i,j,k) = sold[IntVars::ymom](i,j,k);
128  },
129  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
130  ssum[IntVars::zmom](i,j,k) = sold[IntVars::zmom](i,j,k);
131  });
132 
133  } else {
134  ParallelFor(tbx, tby, tbz,
135  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
136  ssum[IntVars::xmom](i,j,k) = sold[IntVars::xmom](i,j,k)
137  + slow_dt * fslow[IntVars::xmom](i,j,k);
138  },
139  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
140  ssum[IntVars::ymom](i,j,k) = sold[IntVars::ymom](i,j,k)
141  + slow_dt * fslow[IntVars::ymom](i,j,k);
142  },
143  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
144  ssum[IntVars::zmom](i,j,k) = sold[IntVars::zmom](i,j,k)
145  + slow_dt * fslow[IntVars::zmom](i,j,k);
146  });
147  } // no EB
148  } // not moving terrain
149  } // mfi
150 
151  if (solverChoice.terrain_type == TerrainType::EB)
152  {
153  // Redistribute cons states (cell-centered)
154 
155  MultiFab dUdt_tmp(ba, dm, F_slow[IntVars::cons].nComp(), F_slow[IntVars::cons].nGrow(), MFInfo(), Factory(level));
156  dUdt_tmp.setVal(0.);
157  MultiFab::Copy(dUdt_tmp, F_slow[IntVars::cons], 0, 0, F_slow[IntVars::cons].nComp(), F_slow[IntVars::cons].nGrow());
158  dUdt_tmp.FillBoundary(fine_geom.periodicity());
159  dUdt_tmp.setDomainBndry(1.234e10, 0, ncomp_fast[IntVars::cons], fine_geom);
160 
161  BCRec const* bc_ptr_d = domain_bcs_type_d.data();
162  S_old[IntVars::cons].FillBoundary(fine_geom.periodicity());
163  S_old[IntVars::cons].setDomainBndry(1.234e10, 0, ncomp_fast[IntVars::cons], fine_geom);
164 
165  // Update F_slow by Redistribution.
166  redistribute_term ( level, ncomp_fast[IntVars::cons], F_slow[IntVars::cons], dUdt_tmp,
167  S_old[IntVars::cons], bc_ptr_d, slow_dt);
168 
169  // Update state using the updated F_slow. (NOTE: redistribute_term returns RHS not state variables.)
170 
171  for ( MFIter mfi(S_sum[IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
172  {
173  const Box bx = mfi.tilebox();
174  Vector<Array4<Real> > ssum_h(ncomp_fast[IntVars::cons]);
175  Vector<Array4<Real> > sold_h(ncomp_fast[IntVars::cons]);
176  Vector<Array4<Real> > fslow_h(ncomp_fast[IntVars::cons]);
177  const Array4<const Real>& dJ_old = detJ_cc[level]->const_array(mfi);
178 
179  for (int i = 0; i < ncomp_fast[IntVars::cons]; ++i) {
180  ssum_h[i] = S_sum[i].array(mfi);
181  sold_h[i] = S_old[i].array(mfi);
182  fslow_h[i] = F_slow[i].array(mfi);
183  }
184 
185  Gpu::AsyncVector<Array4<Real> > sold_d(ncomp_fast[IntVars::cons]);
186  Gpu::AsyncVector<Array4<Real> > ssum_d(ncomp_fast[IntVars::cons]);
187  Gpu::AsyncVector<Array4<Real> > fslow_d(ncomp_fast[IntVars::cons]);
188 
189  Gpu::copy(Gpu::hostToDevice, sold_h.begin(), sold_h.end(), sold_d.begin());
190  Gpu::copy(Gpu::hostToDevice, ssum_h.begin(), ssum_h.end(), ssum_d.begin());
191  Gpu::copy(Gpu::hostToDevice, fslow_h.begin(), fslow_h.end(), fslow_d.begin());
192 
193  Array4<Real>* sold = sold_d.dataPtr();
194  Array4<Real>* ssum = ssum_d.dataPtr();
195  Array4<Real>* fslow = fslow_d.dataPtr();
196 
197  ParallelFor(bx, ncomp_fast[IntVars::cons], [=] AMREX_GPU_DEVICE (int i, int j, int k, int nn)
198  {
199  const int n = scomp_fast[IntVars::cons] + nn;
200  if (dJ_old(i,j,k) > 0.0) {
201  ssum[IntVars::cons](i,j,k,n) = sold[IntVars::cons](i,j,k,n) + slow_dt *
202  ( fslow[IntVars::cons](i,j,k,n) );
203  }
204  });
205  }
206 
207  // Redistribute momentum states (face-centered) will be added here.
208  } // EB
209  } // omp
210 
211  // Even if we update all the conserved variables we don't need
212  // to fillpatch the slow ones every acoustic substep
213  apply_bcs(S_sum, time_for_fp, S_sum[IntVars::cons].nGrow(), S_sum[IntVars::xmom].nGrow(),
214  fast_only=true, vel_and_mom_synced=false);
215 
216  if (solverChoice.anelastic[level]) {
217  bool have_tb = (thin_xforce[0] || thin_yforce[0] || thin_zforce[0]);
218  if (have_tb) {
219  project_velocities_tb(level, slow_dt, S_sum, pp_inc[level]);
220  } else {
221  project_velocities(level, slow_dt, S_sum, pp_inc[level]);
222  }
223  }
224  };
#define Rho_comp
Definition: ERF_IndexDefines.H:36
auto no_substep_fun
Definition: ERF_TI_no_substep_fun.H:4
auto apply_bcs
Definition: ERF_TI_utils.H:50
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real WFromOmega(int i, int j, int k, amrex::Real omega, amrex::Real u, amrex::Real v, const amrex::Array4< const amrex::Real > &z_nd, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &dxInv)
Definition: ERF_TerrainMetrics.H:407
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_zeta_AtIface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:87
AMREX_GPU_DEVICE AMREX_FORCE_INLINE amrex::Real Compute_h_zeta_AtJface(const int &i, const int &j, const int &k, const amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > &cellSizeInv, const amrex::Array4< const amrex::Real > &z_nd)
Definition: ERF_TerrainMetrics.H:130
@ NumTypes
Definition: ERF_IndexDefines.H:143
@ ymom
Definition: ERF_IndexDefines.H:141
@ cons
Definition: ERF_IndexDefines.H:139
@ zmom
Definition: ERF_IndexDefines.H:142
@ xmom
Definition: ERF_IndexDefines.H:140