MFC
Exascale flow solver
Loading...
Searching...
No Matches
m_mpi_common.fpp.f90
Go to the documentation of this file.
1# 1 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2!>
3!! @file
4!! @brief Contains module m_mpi_common
5
6# 1 "/home/runner/work/MFC/MFC/src/common/include/case.fpp" 1
7! This file exists so that Fypp can be run without generating case.fpp files for
8! each target. This is useful when generating documentation, for example. This
9! should also let MFC be built with CMake directly, without invoking mfc.sh.
10
11! For pre-process.
12# 9 "/home/runner/work/MFC/MFC/src/common/include/case.fpp"
13
14! For moving immersed boundaries in simulation
15# 14 "/home/runner/work/MFC/MFC/src/common/include/case.fpp"
16# 6 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp" 2
17# 1 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp" 1
18# 1 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp" 1
19# 1 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp" 1
20# 2 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
21# 3 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
22# 4 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
23# 5 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
24# 6 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
25
26# 8 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
27# 9 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
28# 10 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
29
30# 17 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
31
32# 46 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
33
34# 58 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
35
36# 68 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
37
38# 98 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
39
40# 110 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
41
42# 120 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
43! New line at end of file is required for FYPP
44# 2 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp" 2
45# 1 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp" 1
46# 1 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp" 1
47# 2 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
48# 3 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
49# 4 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
50# 5 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
51# 6 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
52
53# 8 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
54# 9 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
55# 10 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
56
57# 17 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
58
59# 46 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
60
61# 58 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
62
63# 68 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
64
65# 98 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
66
67# 110 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
68
69# 120 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
70! New line at end of file is required for FYPP
71# 2 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp" 2
72
73# 4 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
74# 5 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
75# 6 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
76# 7 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
77# 8 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
78
79# 20 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
80
81# 43 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
82
83# 48 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
84
85# 53 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
86
87# 58 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
88
89# 63 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
90
91# 68 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
92
93# 76 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
94
95# 81 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
96
97# 86 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
98
99# 91 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
100
101# 96 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
102
103# 101 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
104
105# 106 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
106
107# 111 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
108
109# 116 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
110
111# 121 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
112
113# 151 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
114
115# 192 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
116
117# 207 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
118
119# 232 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
120
121# 243 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
122
123# 245 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
124# 255 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
125
126# 283 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
127
128# 293 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
129
130# 303 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
131
132# 312 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
133
134# 329 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
135
136# 339 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
137
138# 346 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
139
140# 352 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
141
142# 358 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
143
144# 364 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
145
146# 370 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
147
148# 376 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
149! New line at end of file is required for FYPP
150# 3 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp" 2
151# 1 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp" 1
152# 1 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp" 1
153# 2 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
154# 3 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
155# 4 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
156# 5 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
157# 6 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
158
159# 8 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
160# 9 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
161# 10 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
162
163# 17 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
164
165# 46 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
166
167# 58 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
168
169# 68 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
170
171# 98 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
172
173# 110 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
174
175# 120 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
176! New line at end of file is required for FYPP
177# 2 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp" 2
178
179# 7 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
180
181# 17 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
182
183# 22 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
184
185# 27 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
186
187# 32 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
188
189# 37 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
190
191# 42 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
192
193# 47 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
194
195# 52 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
196
197# 57 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
198
199# 62 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
200
201# 73 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
202
203# 78 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
204
205# 83 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
206
207# 88 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
208
209# 103 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
210
211# 131 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
212
213# 160 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
214
215# 175 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
216
217# 192 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
218
219# 213 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
220
221# 241 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
222
223# 256 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
224
225# 266 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
226
227# 275 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
228
229# 291 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
230
231# 301 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
232
233# 308 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
234! New line at end of file is required for FYPP
235# 4 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp" 2
236
237# 21 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
238
239# 37 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
240
241# 50 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
242
243# 76 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
244
245# 91 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
246
247# 102 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
248
249# 115 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
250
251# 143 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
252
253# 154 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
254
255# 165 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
256
257# 176 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
258
259# 187 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
260
261# 198 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
262
263# 208 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
264
265# 214 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
266
267# 220 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
268
269# 226 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
270
271# 232 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
272
273# 234 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
274# 235 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
275! New line at end of file is required for FYPP
276# 2 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp" 2
277
278# 14 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
279
280! Caution:
281! This macro requires the use of a binding script to set CUDA_VISIBLE_DEVICES, such that we have one GPU device per MPI rank.
282! That's because for both cudaMemAdvise (preferred location) and cudaMemPrefetchAsync we use location = device_id = 0.
283! For an example see misc/nvidia_uvm/bind.sh.
284# 63 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
285
286# 81 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
287
288# 88 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
289
290# 111 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
291
292# 127 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
293
294# 153 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
295
296# 159 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
297
298# 167 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
299! New line at end of file is required for FYPP
300# 7 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp" 2
301
302!> @brief MPI communication layer: domain decomposition, halo exchange, reductions, and parallel I/O setup
304
305#ifdef MFC_MPI
306 use mpi !< message passing interface (mpi) module
307#endif
308
309 use m_derived_types !< definitions of the derived types
310
311 use m_global_parameters !< definitions of the global parameters
312
313 use m_helper
314
315 use ieee_arithmetic
316
317 use m_nvtx
318
319 implicit none
320
321 integer, private :: v_size
322
323# 28 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
324#if defined(MFC_OpenACC)
325# 28 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
326!$acc declare create(v_size)
327# 28 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
328#elif defined(MFC_OpenMP)
329# 28 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
330!$omp declare target (v_size)
331# 28 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
332#endif
333 !! Generic flags used to identify and report MPI errors
334
335 real(wp), private, allocatable, dimension(:) :: buff_send !<
336 !! This variable is utilized to pack and send the buffer of the cell-average
337 !! primitive variables, for a single computational domain boundary at the
338 !! time, to the relevant neighboring processor.
339
340 real(wp), private, allocatable, dimension(:) :: buff_recv !<
341 !! buff_recv is utilized to receive and unpack the buffer of the cell-
342 !! average primitive variables, for a single computational domain boundary
343 !! at the time, from the relevant neighboring processor.
344
345#ifndef __NVCOMPILER_GPU_UNIFIED_MEM
346
347# 42 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
348#if defined(MFC_OpenACC)
349# 42 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
350!$acc declare create(buff_send, buff_recv)
351# 42 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
352#elif defined(MFC_OpenMP)
353# 42 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
354!$omp declare target (buff_send, buff_recv)
355# 42 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
356#endif
357#endif
358
359 integer(kind=8) :: halo_size
360
361# 46 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
362#if defined(MFC_OpenACC)
363# 46 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
364!$acc declare create(halo_size)
365# 46 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
366#elif defined(MFC_OpenMP)
367# 46 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
368!$omp declare target (halo_size)
369# 46 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
370#endif
371
372contains
373
374 !> The computation of parameters, the allocation of memory,
375 !! the association of pointers and/or the execution of any
376 !! other procedures that are necessary to setup the module.
378
379#ifdef MFC_MPI
380 ! Allocating buff_send/recv and. Please note that for the sake of
381 ! simplicity, both variables are provided sufficient storage to hold
382 ! the largest buffer in the computational domain.
383
384 if (qbmm .and. .not. polytropic) then
385 v_size = sys_size + 2*nb*4
386 else
388 end if
389
390 if (n > 0) then
391 if (p > 0) then
392 halo_size = nint(-1._wp + 1._wp*buff_size*(v_size)* &
393 & (m + 2*buff_size + 1)* &
394 & (n + 2*buff_size + 1)* &
395 & (p + 2*buff_size + 1)/ &
396 & (cells_bounds%mnp_min + 2*buff_size + 1))
397 else
398 halo_size = -1 + buff_size*(v_size)* &
399 & (cells_bounds%mn_max + 2*buff_size + 1)
400 end if
401 else
402 halo_size = -1 + buff_size*(v_size)
403 end if
404
405
406# 81 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
407#if defined(MFC_OpenACC)
408# 81 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
409!$acc update device(halo_size, v_size)
410# 81 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
411#elif defined(MFC_OpenMP)
412# 81 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
413!$omp target update to(halo_size, v_size)
414# 81 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
415#endif
416
417#ifndef __NVCOMPILER_GPU_UNIFIED_MEM
418#ifdef MFC_DEBUG
419# 84 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
420 block
421# 84 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
422 use iso_fortran_env, only: output_unit
423# 84 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
424
425# 84 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
426 print *, 'm_mpi_common.fpp:84: ', '@:ALLOCATE(buff_send(0:halo_size), buff_recv(0:halo_size))'
427# 84 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
428
429# 84 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
430 call flush (output_unit)
431# 84 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
432 end block
433# 84 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
434#endif
435# 84 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
436 allocate (buff_send(0:halo_size), buff_recv(0:halo_size))
437# 84 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
438
439# 84 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
440
441# 84 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
442
443# 84 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
444#if defined(MFC_OpenACC)
445# 84 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
446!$acc enter data create(buff_send, buff_recv)
447# 84 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
448#elif defined(MFC_OpenMP)
449# 84 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
450!$omp target enter data map(always,alloc:buff_send, buff_recv)
451# 84 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
452#endif
453#else
454 allocate (buff_send(0:halo_size), buff_recv(0:halo_size))
455
456# 87 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
457#if defined(MFC_OpenACC)
458# 87 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
459!$acc enter data create(capture:buff_send)
460# 87 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
461#elif defined(MFC_OpenMP)
462# 87 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
463!$omp target enter data map(always,alloc:capture:buff_send)
464# 87 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
465#endif
466
467# 88 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
468#if defined(MFC_OpenACC)
469# 88 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
470!$acc enter data create(capture:buff_recv)
471# 88 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
472#elif defined(MFC_OpenMP)
473# 88 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
474!$omp target enter data map(always,alloc:capture:buff_recv)
475# 88 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
476#endif
477#endif
478#endif
479
480 end subroutine s_initialize_mpi_common_module
481
482 !> The subroutine initializes the MPI execution environment
483 !! and queries both the number of processors which will be
484 !! available for the job and the local processor rank.
485 impure subroutine s_mpi_initialize
486
487#ifdef MFC_MPI
488 integer :: ierr !< Generic flag used to identify and report MPI errors
489
490 ! Initializing the MPI environment
491 call mpi_init(ierr)
492
493 ! Checking whether the MPI environment has been properly initialized
494 if (ierr /= mpi_success) then
495 print '(A)', 'Unable to initialize MPI environment. Exiting.'
496 call mpi_abort(mpi_comm_world, 1, ierr)
497 end if
498
499 ! Querying the number of processors available for the job
500 call mpi_comm_size(mpi_comm_world, num_procs, ierr)
501
502 ! Querying the rank of the local processor
503 call mpi_comm_rank(mpi_comm_world, proc_rank, ierr)
504#else
505 ! Serial run only has 1 processor
506 num_procs = 1
507 ! Local processor rank is 0
508 proc_rank = 0
509#endif
510
511 end subroutine s_mpi_initialize
512
513 !! @param q_cons_vf Conservative variables
514 !! @param ib_markers track if a cell is within the immersed boundary
515 !! @param beta Eulerian void fraction from lagrangian bubbles
516 impure subroutine s_initialize_mpi_data(q_cons_vf, ib_markers, beta)
517
518 type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf
519 type(integer_field), optional, intent(in) :: ib_markers
520 type(scalar_field), intent(in), optional :: beta
521
522 integer, dimension(num_dims) :: sizes_glb, sizes_loc
523 integer, dimension(1) :: airfoil_glb, airfoil_loc, airfoil_start
524
525#ifdef MFC_MPI
526
527 ! Generic loop iterator
528 integer :: i, j
529 integer :: ierr !< Generic flag used to identify and report MPI errors
530
531 !Altered system size for the lagrangian subgrid bubble model
532 integer :: alt_sys
533
534 if (present(beta)) then
535 alt_sys = sys_size + 1
536 else
537 alt_sys = sys_size
538 end if
539
540 do i = 1, sys_size
541 mpi_io_data%var(i)%sf => q_cons_vf(i)%sf(0:m, 0:n, 0:p)
542 end do
543
544 if (present(beta)) then
545 mpi_io_data%var(alt_sys)%sf => beta%sf(0:m, 0:n, 0:p)
546 end if
547
548 !Additional variables pb and mv for non-polytropic qbmm
549 if (qbmm .and. .not. polytropic) then
550 do i = 1, nb
551 do j = 1, nnode
552#ifdef MFC_PRE_PROCESS
553 mpi_io_data%var(sys_size + (i - 1)*nnode + j)%sf => pb%sf(0:m, 0:n, 0:p, j, i)
554 mpi_io_data%var(sys_size + (i - 1)*nnode + j + nb*nnode)%sf => mv%sf(0:m, 0:n, 0:p, j, i)
555#elif defined (MFC_SIMULATION)
556 mpi_io_data%var(sys_size + (i - 1)*nnode + j)%sf => pb_ts(1)%sf(0:m, 0:n, 0:p, j, i)
557 mpi_io_data%var(sys_size + (i - 1)*nnode + j + nb*nnode)%sf => mv_ts(1)%sf(0:m, 0:n, 0:p, j, i)
558#endif
559 end do
560 end do
561 end if
562
563 ! Define global(g) and local(l) sizes for flow variables
564 sizes_glb(1) = m_glb + 1; sizes_loc(1) = m + 1
565 if (n > 0) then
566 sizes_glb(2) = n_glb + 1; sizes_loc(2) = n + 1
567 if (p > 0) then
568 sizes_glb(num_dims) = p_glb + 1; sizes_loc(num_dims) = p + 1
569 end if
570 end if
571
572 ! Define the view for each variable
573 do i = 1, alt_sys
574 call mpi_type_create_subarray(num_dims, sizes_glb, sizes_loc, start_idx, &
575 mpi_order_fortran, mpi_p, mpi_io_data%view(i), ierr)
576 call mpi_type_commit(mpi_io_data%view(i), ierr)
577 end do
578
579#ifndef MFC_POST_PROCESS
580 if (qbmm .and. .not. polytropic) then
581 do i = sys_size + 1, sys_size + 2*nb*4
582 call mpi_type_create_subarray(num_dims, sizes_glb, sizes_loc, start_idx, &
583 mpi_order_fortran, mpi_p, mpi_io_data%view(i), ierr)
584 call mpi_type_commit(mpi_io_data%view(i), ierr)
585
586 end do
587 end if
588#endif
589
590#ifndef MFC_PRE_PROCESS
591 if (present(ib_markers)) then
592 mpi_io_ib_data%var%sf => ib_markers%sf(0:m, 0:n, 0:p)
593
594 call mpi_type_create_subarray(num_dims, sizes_glb, sizes_loc, start_idx, &
595 mpi_order_fortran, mpi_integer, mpi_io_ib_data%view, ierr)
596 call mpi_type_commit(mpi_io_ib_data%view, ierr)
597 end if
598#endif
599
600#endif
601
602 end subroutine s_initialize_mpi_data
603
604 !! @param q_cons_vf Conservative variables
605 subroutine s_initialize_mpi_data_ds(q_cons_vf)
606
607 type(scalar_field), &
608 dimension(sys_size), &
609 intent(in) :: q_cons_vf
610
611 integer, dimension(num_dims) :: sizes_glb, sizes_loc
612 integer, dimension(3) :: sf_start_idx
613
614#ifdef MFC_MPI
615
616 ! Generic loop iterator
617 integer :: i, j, q, k, l, m_ds, n_ds, p_ds, ierr
618
619 sf_start_idx = (/0, 0, 0/)
620
621#ifndef MFC_POST_PROCESS
622 m_ds = int((m + 1)/3) - 1
623 n_ds = int((n + 1)/3) - 1
624 p_ds = int((p + 1)/3) - 1
625#else
626 m_ds = m
627 n_ds = n
628 p_ds = p
629#endif
630
631#ifdef MFC_POST_PROCESS
632 do i = 1, sys_size
633 mpi_io_data%var(i)%sf => q_cons_vf(i)%sf(-1:m_ds + 1, -1:n_ds + 1, -1:p_ds + 1)
634 end do
635#endif
636 ! Define global(g) and local(l) sizes for flow variables
637 sizes_loc(1) = m_ds + 3
638 if (n > 0) then
639 sizes_loc(2) = n_ds + 3
640 if (p > 0) then
641 sizes_loc(num_dims) = p_ds + 3
642 end if
643 end if
644
645 ! Define the view for each variable
646 do i = 1, sys_size
647 call mpi_type_create_subarray(num_dims, sizes_loc, sizes_loc, sf_start_idx, &
648 mpi_order_fortran, mpi_p, mpi_io_data%view(i), ierr)
649 call mpi_type_commit(mpi_io_data%view(i), ierr)
650 end do
651#endif
652
653 end subroutine s_initialize_mpi_data_ds
654
655 !> @brief Gathers variable-length real vectors from all MPI ranks onto the root process.
656 impure subroutine s_mpi_gather_data(my_vector, counts, gathered_vector, root)
657
658 integer, intent(in) :: counts ! Array of vector lengths for each process
659 real(wp), intent(in), dimension(counts) :: my_vector ! Input vector on each process
660 integer, intent(in) :: root ! Rank of the root process
661 real(wp), allocatable, intent(out) :: gathered_vector(:) ! Gathered vector on the root process
662
663 integer :: i
664 integer :: ierr !< Generic flag used to identify and report MPI errors
665 integer, allocatable :: recounts(:), displs(:)
666
667#ifdef MFC_MPI
668
669 allocate (recounts(num_procs))
670
671 call mpi_gather(counts, 1, mpi_integer, recounts, 1, mpi_integer, root, &
672 mpi_comm_world, ierr)
673
674 allocate (displs(size(recounts)))
675
676 displs(1) = 0
677
678 do i = 2, size(recounts)
679 displs(i) = displs(i - 1) + recounts(i - 1)
680 end do
681
682 allocate (gathered_vector(sum(recounts)))
683 call mpi_gatherv(my_vector, counts, mpi_p, gathered_vector, recounts, displs, mpi_p, &
684 root, mpi_comm_world, ierr)
685#endif
686 end subroutine s_mpi_gather_data
687
688 !> @brief Gathers per-rank time step wall-clock times onto rank 0 for performance reporting.
689 impure subroutine mpi_bcast_time_step_values(proc_time, time_avg)
690
691 real(wp), dimension(0:num_procs - 1), intent(inout) :: proc_time
692 real(wp), intent(inout) :: time_avg
693
694#ifdef MFC_MPI
695 integer :: ierr !< Generic flag used to identify and report MPI errors
696
697 call mpi_gather(time_avg, 1, mpi_p, proc_time(0), 1, mpi_p, 0, mpi_comm_world, ierr)
698
699#endif
700
701 end subroutine mpi_bcast_time_step_values
702
703 !> @brief Prints a case file error with the prohibited condition and message, then aborts execution.
704 impure subroutine s_prohibit_abort(condition, message)
705 character(len=*), intent(in) :: condition, message
706
707 print *, ""
708 print *, "CASE FILE ERROR"
709 print *, " - Prohibited condition: ", trim(condition)
710 if (len_trim(message) > 0) then
711 print *, " - Note: ", trim(message)
712 end if
713 print *, ""
714 call s_mpi_abort(code=case_file_error_code)
715 end subroutine s_prohibit_abort
716
717 !> The goal of this subroutine is to determine the global
718 !! extrema of the stability criteria in the computational
719 !! domain. This is performed by sifting through the local
720 !! extrema of each stability criterion. Note that each of
721 !! the local extrema is from a single process, within its
722 !! assigned section of the computational domain. Finally,
723 !! note that the global extrema values are only bookkeept
724 !! on the rank 0 processor.
725 !! @param icfl_max_loc Local maximum ICFL stability criterion
726 !! @param vcfl_max_loc Local maximum VCFL stability criterion
727 !! @param Rc_min_loc Local minimum Rc stability criterion
728 !! @param icfl_max_glb Global maximum ICFL stability criterion
729 !! @param vcfl_max_glb Global maximum VCFL stability criterion
730 !! @param Rc_min_glb Global minimum Rc stability criterion
731 impure subroutine s_mpi_reduce_stability_criteria_extrema(icfl_max_loc, &
732 vcfl_max_loc, &
733 Rc_min_loc, &
734 icfl_max_glb, &
735 vcfl_max_glb, &
736 Rc_min_glb)
737
738 real(wp), intent(in) :: icfl_max_loc
739 real(wp), intent(in) :: vcfl_max_loc
740 real(wp), intent(in) :: rc_min_loc
741
742 real(wp), intent(out) :: icfl_max_glb
743 real(wp), intent(out) :: vcfl_max_glb
744 real(wp), intent(out) :: rc_min_glb
745
746#ifdef MFC_SIMULATION
747#ifdef MFC_MPI
748 integer :: ierr !< Generic flag used to identify and report MPI errors
749
750 ! Reducing local extrema of ICFL, VCFL, CCFL and Rc numbers to their
751 ! global extrema and bookkeeping the results on the rank 0 processor
752 call mpi_reduce(icfl_max_loc, icfl_max_glb, 1, &
753 mpi_p, mpi_max, 0, &
754 mpi_comm_world, ierr)
755
756 if (viscous) then
757 call mpi_reduce(vcfl_max_loc, vcfl_max_glb, 1, &
758 mpi_p, mpi_max, 0, &
759 mpi_comm_world, ierr)
760 call mpi_reduce(rc_min_loc, rc_min_glb, 1, &
761 mpi_p, mpi_min, 0, &
762 mpi_comm_world, ierr)
763 end if
764
765#else
766
767 icfl_max_glb = icfl_max_loc
768
769 if (viscous) then
770 vcfl_max_glb = vcfl_max_loc
771 rc_min_glb = rc_min_loc
772 end if
773
774#endif
775#endif
776
778
779 !> The following subroutine takes the input local variable
780 !! from all processors and reduces to the sum of all
781 !! values. The reduced variable is recorded back onto the
782 !! original local variable on each processor.
783 !! @param var_loc Some variable containing the local value which should be
784 !! reduced amongst all the processors in the communicator.
785 !! @param var_glb The globally reduced value
786 impure subroutine s_mpi_allreduce_sum(var_loc, var_glb)
787
788 real(wp), intent(in) :: var_loc
789 real(wp), intent(out) :: var_glb
790
791#ifdef MFC_MPI
792 integer :: ierr !< Generic flag used to identify and report MPI errors
793
794 ! Performing the reduction procedure
795 call mpi_allreduce(var_loc, var_glb, 1, mpi_p, &
796 mpi_sum, mpi_comm_world, ierr)
797
798#endif
799
800 end subroutine s_mpi_allreduce_sum
801
802 !> This subroutine follows the behavior of the s_mpi_allreduce_sum subroutine
803 !> with the additional feature that it reduces an array of vectors.
804 impure subroutine s_mpi_allreduce_vectors_sum(var_loc, var_glb, num_vectors, vector_length)
805
806 integer, intent(in) :: num_vectors, vector_length
807 real(wp), dimension(:, :), intent(in) :: var_loc
808 real(wp), dimension(:, :), intent(out) :: var_glb
809
810#ifdef MFC_MPI
811 integer :: ierr !< Generic flag used to identify and report MPI errors
812
813 ! Performing the reduction procedure
814 if (loc(var_loc) == loc(var_glb)) then
815 call mpi_allreduce(mpi_in_place, var_glb, num_vectors*vector_length, &
816 mpi_p, mpi_sum, mpi_comm_world, ierr)
817 else
818 call mpi_allreduce(var_loc, var_glb, num_vectors*vector_length, &
819 mpi_p, mpi_sum, mpi_comm_world, ierr)
820 end if
821
822#else
823 var_glb(1:num_vectors, 1:vector_length) = var_loc(1:num_vectors, 1:vector_length)
824#endif
825
826 end subroutine s_mpi_allreduce_vectors_sum
827
828 !> The following subroutine takes the input local variable
829 !! from all processors and reduces to the sum of all
830 !! values. The reduced variable is recorded back onto the
831 !! original local variable on each processor.
832 !! @param var_loc Some variable containing the local value which should be
833 !! reduced amongst all the processors in the communicator.
834 !! @param var_glb The globally reduced value
835 impure subroutine s_mpi_allreduce_integer_sum(var_loc, var_glb)
836
837 integer, intent(in) :: var_loc
838 integer, intent(out) :: var_glb
839
840#ifdef MFC_MPI
841 integer :: ierr !< Generic flag used to identify and report MPI errors
842
843 ! Performing the reduction procedure
844 call mpi_allreduce(var_loc, var_glb, 1, mpi_integer, &
845 mpi_sum, mpi_comm_world, ierr)
846#else
847 var_glb = var_loc
848#endif
849
850 end subroutine s_mpi_allreduce_integer_sum
851
852 !> The following subroutine takes the input local variable
853 !! from all processors and reduces to the minimum of all
854 !! values. The reduced variable is recorded back onto the
855 !! original local variable on each processor.
856 !! @param var_loc Some variable containing the local value which should be
857 !! reduced amongst all the processors in the communicator.
858 !! @param var_glb The globally reduced value
859 impure subroutine s_mpi_allreduce_min(var_loc, var_glb)
860
861 real(wp), intent(in) :: var_loc
862 real(wp), intent(out) :: var_glb
863
864#ifdef MFC_MPI
865 integer :: ierr !< Generic flag used to identify and report MPI errors
866
867 ! Performing the reduction procedure
868 call mpi_allreduce(var_loc, var_glb, 1, mpi_p, &
869 mpi_min, mpi_comm_world, ierr)
870
871#endif
872
873 end subroutine s_mpi_allreduce_min
874
875 !> The following subroutine takes the input local variable
876 !! from all processors and reduces to the maximum of all
877 !! values. The reduced variable is recorded back onto the
878 !! original local variable on each processor.
879 !! @param var_loc Some variable containing the local value which should be
880 !! reduced amongst all the processors in the communicator.
881 !! @param var_glb The globally reduced value
882 impure subroutine s_mpi_allreduce_max(var_loc, var_glb)
883
884 real(wp), intent(in) :: var_loc
885 real(wp), intent(out) :: var_glb
886
887#ifdef MFC_MPI
888 integer :: ierr !< Generic flag used to identify and report MPI errors
889
890 ! Performing the reduction procedure
891 call mpi_allreduce(var_loc, var_glb, 1, mpi_p, &
892 mpi_max, mpi_comm_world, ierr)
893
894#endif
895
896 end subroutine s_mpi_allreduce_max
897
898 !> The following subroutine takes the inputted variable and
899 !! determines its minimum value on the entire computational
900 !! domain. The result is stored back into inputted variable.
901 !! @param var_loc holds the local value to be reduced among
902 !! all the processors in communicator. On output, the variable holds
903 !! the minimum value, reduced amongst all of the local values.
904 impure subroutine s_mpi_reduce_min(var_loc)
905
906 real(wp), intent(inout) :: var_loc
907
908#ifdef MFC_MPI
909 integer :: ierr !< Generic flag used to identify and report MPI errors
910
911 ! Temporary storage variable that holds the reduced minimum value
912 real(wp) :: var_glb
913
914 ! Performing reduction procedure and eventually storing its result
915 ! into the variable that was initially inputted into the subroutine
916 call mpi_reduce(var_loc, var_glb, 1, mpi_p, &
917 mpi_min, 0, mpi_comm_world, ierr)
918
919 call mpi_bcast(var_glb, 1, mpi_p, &
920 0, mpi_comm_world, ierr)
921
922 var_loc = var_glb
923
924#endif
925
926 end subroutine s_mpi_reduce_min
927
928 !> The following subroutine takes the first element of the
929 !! 2-element inputted variable and determines its maximum
930 !! value on the entire computational domain. The result is
931 !! stored back into the first element of the variable while
932 !! the rank of the processor that is in charge of the sub-
933 !! domain containing the maximum is stored into the second
934 !! element of the variable.
935 !! @param var_loc On input, this variable holds the local value and processor rank,
936 !! which are to be reduced among all the processors in communicator.
937 !! On output, this variable holds the maximum value, reduced amongst
938 !! all of the local values, and the process rank to which the value
939 !! belongs.
940 impure subroutine s_mpi_reduce_maxloc(var_loc)
941
942 real(wp), dimension(2), intent(inout) :: var_loc
943
944#ifdef MFC_MPI
945 integer :: ierr !< Generic flag used to identify and report MPI errors
946
947 real(wp), dimension(2) :: var_glb !<
948 !! Temporary storage variable that holds the reduced maximum value
949 !! and the rank of the processor with which the value is associated
950
951 ! Performing reduction procedure and eventually storing its result
952 ! into the variable that was initially inputted into the subroutine
953 call mpi_reduce(var_loc, var_glb, 1, mpi_2p, &
954 mpi_maxloc, 0, mpi_comm_world, ierr)
955
956 call mpi_bcast(var_glb, 1, mpi_2p, &
957 0, mpi_comm_world, ierr)
958
959 var_loc = var_glb
960
961#endif
962
963 end subroutine s_mpi_reduce_maxloc
964
965 !> The subroutine terminates the MPI execution environment.
966 !! @param prnt error message to be printed
967 !! @param code optional exit code
968 impure subroutine s_mpi_abort(prnt, code)
969
970 character(len=*), intent(in), optional :: prnt
971 integer, intent(in), optional :: code
972
973#ifdef MFC_MPI
974 integer :: ierr !< Generic flag used to identify and report MPI errors
975#endif
976
977 if (present(prnt)) then
978 print *, prnt
979 call flush (6)
980
981 end if
982
983#ifndef MFC_MPI
984 if (present(code)) then
985 stop code
986 else
987 stop 1
988 end if
989#else
990 ! Terminating the MPI environment
991 if (present(code)) then
992 call mpi_abort(mpi_comm_world, code, ierr)
993 else
994 call mpi_abort(mpi_comm_world, 1, ierr)
995 end if
996#endif
997
998 end subroutine s_mpi_abort
999
1000 !>Halts all processes until all have reached barrier.
1001 impure subroutine s_mpi_barrier
1002
1003#ifdef MFC_MPI
1004 integer :: ierr !< Generic flag used to identify and report MPI errors
1005
1006 ! Calling MPI_BARRIER
1007 call mpi_barrier(mpi_comm_world, ierr)
1008
1009#endif
1010
1011 end subroutine s_mpi_barrier
1012
1013 !> The subroutine finalizes the MPI execution environment.
1014 impure subroutine s_mpi_finalize
1015
1016#ifdef MFC_MPI
1017 integer :: ierr !< Generic flag used to identify and report MPI errors
1018
1019 ! Finalizing the MPI environment
1020 call mpi_finalize(ierr)
1021
1022#endif
1023
1024 end subroutine s_mpi_finalize
1025
1026 !> The goal of this procedure is to populate the buffers of
1027 !! the cell-average conservative variables by communicating
1028 !! with the neighboring processors.
1029 !! @param q_comm Cell-average conservative variables
1030 !! @param mpi_dir MPI communication coordinate direction
1031 !! @param pbc_loc Processor boundary condition (PBC) location
1032 !! @param nVar Number of variables to communicate
1033 !! @param pb_in Optional internal bubble pressure
1034 !! @param mv_in Optional bubble mass velocity
1036 mpi_dir, &
1037 pbc_loc, &
1038 nVar, &
1039 pb_in, mv_in)
1040
1041 type(scalar_field), dimension(1:), intent(inout) :: q_comm
1042 real(stp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb_in, mv_in
1043 integer, intent(in) :: mpi_dir, pbc_loc, nVar
1044
1045 integer :: i, j, k, l, r, q !< Generic loop iterators
1046
1047 integer :: buffer_counts(1:3), buffer_count
1048
1049 type(int_bounds_info) :: boundary_conditions(1:3)
1050 integer :: beg_end(1:2), grid_dims(1:3)
1051 integer :: dst_proc, src_proc, recv_tag, send_tag
1052
1053 logical :: beg_end_geq_0, qbmm_comm
1054
1055 integer :: pack_offset, unpack_offset
1056
1057#ifdef MFC_MPI
1058 integer :: ierr !< Generic flag used to identify and report MPI errors
1059
1060 call nvtxstartrange("RHS-COMM-PACKBUF")
1061
1062 qbmm_comm = .false.
1063
1064 if (present(pb_in) .and. present(mv_in) .and. qbmm .and. .not. polytropic) then
1065 qbmm_comm = .true.
1066 v_size = nvar + 2*nb*4
1067 buffer_counts = (/ &
1068 buff_size*v_size*(n + 1)*(p + 1), &
1069 buff_size*v_size*(m + 2*buff_size + 1)*(p + 1), &
1070 buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) &
1071 /)
1072 else
1073 v_size = nvar
1074 buffer_counts = (/ &
1075 buff_size*v_size*(n + 1)*(p + 1), &
1076 buff_size*v_size*(m + 2*buff_size + 1)*(p + 1), &
1077 buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) &
1078 /)
1079 end if
1080
1081
1082# 693 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1083#if defined(MFC_OpenACC)
1084# 693 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1085!$acc update device(v_size)
1086# 693 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1087#elif defined(MFC_OpenMP)
1088# 693 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1089!$omp target update to(v_size)
1090# 693 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1091#endif
1092
1093 buffer_count = buffer_counts(mpi_dir)
1094 boundary_conditions = (/bc_x, bc_y, bc_z/)
1095 beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/)
1096 beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0
1097
1098 ! Implements:
1099 ! pbc_loc bc_x >= 0 -> [send/recv]_tag [dst/src]_proc
1100 ! -1 (=0) 0 -> [1,0] [0,0] | 0 0 [1,0] [beg,beg]
1101 ! -1 (=0) 1 -> [0,0] [1,0] | 0 1 [0,0] [end,beg]
1102 ! +1 (=1) 0 -> [0,1] [1,1] | 1 0 [0,1] [end,end]
1103 ! +1 (=1) 1 -> [1,1] [0,1] | 1 1 [1,1] [beg,end]
1104
1105 send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1))
1106 recv_tag = f_logical_to_int(pbc_loc == 1)
1107
1108 dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0)))
1109 src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1))
1110
1111 grid_dims = (/m, n, p/)
1112
1113 pack_offset = 0
1114 if (f_xor(pbc_loc == 1, beg_end_geq_0)) then
1115 pack_offset = grid_dims(mpi_dir) - buff_size + 1
1116 end if
1117
1118 unpack_offset = 0
1119 if (pbc_loc == 1) then
1120 unpack_offset = grid_dims(mpi_dir) + buff_size + 1
1121 end if
1122
1123 ! Pack Buffer to Send
1124# 727 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1125 if (mpi_dir == 1) then
1126# 729 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1127
1128# 729 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1129
1130# 729 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1131#if defined(MFC_OpenACC)
1132# 729 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1133!$acc parallel loop collapse(4) gang vector default(present) private(r)
1134# 729 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1135#elif defined(MFC_OpenMP)
1136# 729 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1137
1138# 729 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1139
1140# 729 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1141
1142# 729 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1143!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(4) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1144# 729 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1145#endif
1146# 729 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1147
1148 do l = 0, p
1149 do k = 0, n
1150 do j = 0, buff_size - 1
1151 do i = 1, nvar
1152 r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l))
1153 buff_send(r) = real(q_comm(i)%sf(j + pack_offset, k, l), kind=wp)
1154 end do
1155 end do
1156 end do
1157 end do
1158
1159# 740 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1160
1161# 740 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1162#if defined(MFC_OpenACC)
1163# 740 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1164!$acc end parallel loop
1165# 740 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1166#elif defined(MFC_OpenMP)
1167# 740 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1168
1169# 740 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1170
1171# 740 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1172!$omp end target teams loop
1173# 740 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1174#endif
1175# 740 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1176
1177
1178 if (qbmm_comm) then
1179
1180# 743 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1181
1182# 743 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1183#if defined(MFC_OpenACC)
1184# 743 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1185!$acc parallel loop collapse(4) gang vector default(present) private(r)
1186# 743 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1187#elif defined(MFC_OpenMP)
1188# 743 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1189
1190# 743 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1191
1192# 743 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1193
1194# 743 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1195!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(4) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1196# 743 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1197#endif
1198# 743 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1199
1200 do l = 0, p
1201 do k = 0, n
1202 do j = 0, buff_size - 1
1203 do i = nvar + 1, nvar + 4
1204 do q = 1, nb
1205 r = (i - 1) + (q - 1)*4 + v_size* &
1206 (j + buff_size*(k + (n + 1)*l))
1207 buff_send(r) = real(pb_in(j + pack_offset, k, l, i - nvar, q), kind=wp)
1208 end do
1209 end do
1210 end do
1211 end do
1212 end do
1213
1214# 757 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1215
1216# 757 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1217#if defined(MFC_OpenACC)
1218# 757 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1219!$acc end parallel loop
1220# 757 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1221#elif defined(MFC_OpenMP)
1222# 757 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1223
1224# 757 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1225
1226# 757 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1227!$omp end target teams loop
1228# 757 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1229#endif
1230# 757 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1231
1232
1233
1234# 759 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1235
1236# 759 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1237#if defined(MFC_OpenACC)
1238# 759 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1239!$acc parallel loop collapse(5) gang vector default(present) private(r)
1240# 759 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1241#elif defined(MFC_OpenMP)
1242# 759 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1243
1244# 759 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1245
1246# 759 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1247
1248# 759 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1249!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(5) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1250# 759 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1251#endif
1252# 759 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1253
1254 do l = 0, p
1255 do k = 0, n
1256 do j = 0, buff_size - 1
1257 do i = nvar + 1, nvar + 4
1258 do q = 1, nb
1259 r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
1260 (j + buff_size*(k + (n + 1)*l))
1261 buff_send(r) = real(mv_in(j + pack_offset, k, l, i - nvar, q), kind=wp)
1262 end do
1263 end do
1264 end do
1265 end do
1266 end do
1267
1268# 773 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1269
1270# 773 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1271#if defined(MFC_OpenACC)
1272# 773 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1273!$acc end parallel loop
1274# 773 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1275#elif defined(MFC_OpenMP)
1276# 773 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1277
1278# 773 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1279
1280# 773 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1281!$omp end target teams loop
1282# 773 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1283#endif
1284# 773 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1285
1286 end if
1287# 878 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1288 end if
1289# 727 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1290 if (mpi_dir == 2) then
1291# 776 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1292
1293# 776 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1294
1295# 776 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1296#if defined(MFC_OpenACC)
1297# 776 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1298!$acc parallel loop collapse(4) gang vector default(present) private(r)
1299# 776 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1300#elif defined(MFC_OpenMP)
1301# 776 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1302
1303# 776 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1304
1305# 776 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1306
1307# 776 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1308!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(4) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1309# 776 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1310#endif
1311# 776 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1312
1313 do i = 1, nvar
1314 do l = 0, p
1315 do k = 0, buff_size - 1
1316 do j = -buff_size, m + buff_size
1317 r = (i - 1) + v_size* &
1318 ((j + buff_size) + (m + 2*buff_size + 1)* &
1319 (k + buff_size*l))
1320 buff_send(r) = real(q_comm(i)%sf(j, k + pack_offset, l), kind=wp)
1321 end do
1322 end do
1323 end do
1324 end do
1325
1326# 789 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1327
1328# 789 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1329#if defined(MFC_OpenACC)
1330# 789 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1331!$acc end parallel loop
1332# 789 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1333#elif defined(MFC_OpenMP)
1334# 789 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1335
1336# 789 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1337
1338# 789 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1339!$omp end target teams loop
1340# 789 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1341#endif
1342# 789 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1343
1344
1345 if (qbmm_comm) then
1346
1347# 792 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1348
1349# 792 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1350#if defined(MFC_OpenACC)
1351# 792 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1352!$acc parallel loop collapse(5) gang vector default(present) private(r)
1353# 792 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1354#elif defined(MFC_OpenMP)
1355# 792 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1356
1357# 792 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1358
1359# 792 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1360
1361# 792 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1362!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(5) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1363# 792 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1364#endif
1365# 792 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1366
1367 do i = nvar + 1, nvar + 4
1368 do l = 0, p
1369 do k = 0, buff_size - 1
1370 do j = -buff_size, m + buff_size
1371 do q = 1, nb
1372 r = (i - 1) + (q - 1)*4 + v_size* &
1373 ((j + buff_size) + (m + 2*buff_size + 1)* &
1374 (k + buff_size*l))
1375 buff_send(r) = real(pb_in(j, k + pack_offset, l, i - nvar, q), kind=wp)
1376 end do
1377 end do
1378 end do
1379 end do
1380 end do
1381
1382# 807 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1383
1384# 807 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1385#if defined(MFC_OpenACC)
1386# 807 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1387!$acc end parallel loop
1388# 807 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1389#elif defined(MFC_OpenMP)
1390# 807 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1391
1392# 807 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1393
1394# 807 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1395!$omp end target teams loop
1396# 807 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1397#endif
1398# 807 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1399
1400
1401
1402# 809 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1403
1404# 809 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1405#if defined(MFC_OpenACC)
1406# 809 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1407!$acc parallel loop collapse(5) gang vector default(present) private(r)
1408# 809 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1409#elif defined(MFC_OpenMP)
1410# 809 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1411
1412# 809 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1413
1414# 809 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1415
1416# 809 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1417!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(5) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1418# 809 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1419#endif
1420# 809 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1421
1422 do i = nvar + 1, nvar + 4
1423 do l = 0, p
1424 do k = 0, buff_size - 1
1425 do j = -buff_size, m + buff_size
1426 do q = 1, nb
1427 r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
1428 ((j + buff_size) + (m + 2*buff_size + 1)* &
1429 (k + buff_size*l))
1430 buff_send(r) = real(mv_in(j, k + pack_offset, l, i - nvar, q), kind=wp)
1431 end do
1432 end do
1433 end do
1434 end do
1435 end do
1436
1437# 824 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1438
1439# 824 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1440#if defined(MFC_OpenACC)
1441# 824 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1442!$acc end parallel loop
1443# 824 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1444#elif defined(MFC_OpenMP)
1445# 824 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1446
1447# 824 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1448
1449# 824 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1450!$omp end target teams loop
1451# 824 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1452#endif
1453# 824 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1454
1455 end if
1456# 878 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1457 end if
1458# 727 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1459 if (mpi_dir == 3) then
1460# 827 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1461
1462# 827 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1463
1464# 827 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1465#if defined(MFC_OpenACC)
1466# 827 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1467!$acc parallel loop collapse(4) gang vector default(present) private(r)
1468# 827 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1469#elif defined(MFC_OpenMP)
1470# 827 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1471
1472# 827 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1473
1474# 827 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1475
1476# 827 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1477!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(4) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1478# 827 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1479#endif
1480# 827 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1481
1482 do i = 1, nvar
1483 do l = 0, buff_size - 1
1484 do k = -buff_size, n + buff_size
1485 do j = -buff_size, m + buff_size
1486 r = (i - 1) + v_size* &
1487 ((j + buff_size) + (m + 2*buff_size + 1)* &
1488 ((k + buff_size) + (n + 2*buff_size + 1)*l))
1489 buff_send(r) = real(q_comm(i)%sf(j, k, l + pack_offset), kind=wp)
1490 end do
1491 end do
1492 end do
1493 end do
1494
1495# 840 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1496
1497# 840 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1498#if defined(MFC_OpenACC)
1499# 840 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1500!$acc end parallel loop
1501# 840 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1502#elif defined(MFC_OpenMP)
1503# 840 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1504
1505# 840 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1506
1507# 840 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1508!$omp end target teams loop
1509# 840 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1510#endif
1511# 840 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1512
1513
1514 if (qbmm_comm) then
1515
1516# 843 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1517
1518# 843 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1519#if defined(MFC_OpenACC)
1520# 843 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1521!$acc parallel loop collapse(5) gang vector default(present) private(r)
1522# 843 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1523#elif defined(MFC_OpenMP)
1524# 843 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1525
1526# 843 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1527
1528# 843 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1529
1530# 843 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1531!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(5) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1532# 843 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1533#endif
1534# 843 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1535
1536 do i = nvar + 1, nvar + 4
1537 do l = 0, buff_size - 1
1538 do k = -buff_size, n + buff_size
1539 do j = -buff_size, m + buff_size
1540 do q = 1, nb
1541 r = (i - 1) + (q - 1)*4 + v_size* &
1542 ((j + buff_size) + (m + 2*buff_size + 1)* &
1543 ((k + buff_size) + (n + 2*buff_size + 1)*l))
1544 buff_send(r) = real(pb_in(j, k, l + pack_offset, i - nvar, q), kind=wp)
1545 end do
1546 end do
1547 end do
1548 end do
1549 end do
1550
1551# 858 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1552
1553# 858 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1554#if defined(MFC_OpenACC)
1555# 858 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1556!$acc end parallel loop
1557# 858 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1558#elif defined(MFC_OpenMP)
1559# 858 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1560
1561# 858 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1562
1563# 858 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1564!$omp end target teams loop
1565# 858 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1566#endif
1567# 858 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1568
1569
1570
1571# 860 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1572
1573# 860 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1574#if defined(MFC_OpenACC)
1575# 860 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1576!$acc parallel loop collapse(5) gang vector default(present) private(r)
1577# 860 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1578#elif defined(MFC_OpenMP)
1579# 860 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1580
1581# 860 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1582
1583# 860 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1584
1585# 860 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1586!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(5) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1587# 860 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1588#endif
1589# 860 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1590
1591 do i = nvar + 1, nvar + 4
1592 do l = 0, buff_size - 1
1593 do k = -buff_size, n + buff_size
1594 do j = -buff_size, m + buff_size
1595 do q = 1, nb
1596 r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
1597 ((j + buff_size) + (m + 2*buff_size + 1)* &
1598 ((k + buff_size) + (n + 2*buff_size + 1)*l))
1599 buff_send(r) = real(mv_in(j, k, l + pack_offset, i - nvar, q), kind=wp)
1600 end do
1601 end do
1602 end do
1603 end do
1604 end do
1605
1606# 875 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1607
1608# 875 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1609#if defined(MFC_OpenACC)
1610# 875 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1611!$acc end parallel loop
1612# 875 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1613#elif defined(MFC_OpenMP)
1614# 875 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1615
1616# 875 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1617
1618# 875 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1619!$omp end target teams loop
1620# 875 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1621#endif
1622# 875 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1623
1624 end if
1625# 878 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1626 end if
1627# 880 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1628 call nvtxendrange ! Packbuf
1629
1630 ! Send/Recv
1631#ifdef MFC_SIMULATION
1632# 885 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1633 if (rdma_mpi .eqv. .false.) then
1634# 900 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1635 call nvtxstartrange("RHS-COMM-DEV2HOST")
1636
1637# 901 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1638#if defined(MFC_OpenACC)
1639# 901 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1640!$acc update host(buff_send)
1641# 901 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1642#elif defined(MFC_OpenMP)
1643# 901 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1644!$omp target update from(buff_send)
1645# 901 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1646#endif
1647 call nvtxendrange
1648 call nvtxstartrange("RHS-COMM-SENDRECV-NO-RMDA")
1649
1650 call mpi_sendrecv( &
1651 buff_send, buffer_count, mpi_p, dst_proc, send_tag, &
1652 buff_recv, buffer_count, mpi_p, src_proc, recv_tag, &
1653 mpi_comm_world, mpi_status_ignore, ierr)
1654
1655 call nvtxendrange ! RHS-MPI-SENDRECV-(NO)-RDMA
1656
1657 call nvtxstartrange("RHS-COMM-HOST2DEV")
1658
1659# 913 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1660#if defined(MFC_OpenACC)
1661# 913 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1662!$acc update device(buff_recv)
1663# 913 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1664#elif defined(MFC_OpenMP)
1665# 913 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1666!$omp target update to(buff_recv)
1667# 913 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1668#endif
1669 call nvtxendrange
1670# 916 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1671 end if
1672# 885 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1673 if (rdma_mpi .eqv. .true.) then
1674# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1675
1676# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1677
1678# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1679#if defined(MFC_OpenACC)
1680# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1681!$acc host_data use_device(buff_send, buff_recv)
1682# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1683 call nvtxstartrange("RHS-COMM-SENDRECV-RDMA")
1684# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1685
1686# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1687 call mpi_sendrecv( &
1688# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1689 buff_send, buffer_count, mpi_p, dst_proc, send_tag, &
1690# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1691 buff_recv, buffer_count, mpi_p, src_proc, recv_tag, &
1692# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1693 mpi_comm_world, mpi_status_ignore, ierr)
1694# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1695
1696# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1697 call nvtxendrange ! RHS-MPI-SENDRECV-(NO)-RDMA
1698# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1699
1700# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1701!$acc end host_data
1702# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1703#elif defined(MFC_OpenMP)
1704# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1705!$omp target data use_device_addr(buff_send, buff_recv)
1706# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1707 call nvtxstartrange("RHS-COMM-SENDRECV-RDMA")
1708# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1709
1710# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1711 call mpi_sendrecv( &
1712# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1713 buff_send, buffer_count, mpi_p, dst_proc, send_tag, &
1714# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1715 buff_recv, buffer_count, mpi_p, src_proc, recv_tag, &
1716# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1717 mpi_comm_world, mpi_status_ignore, ierr)
1718# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1719
1720# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1721 call nvtxendrange ! RHS-MPI-SENDRECV-(NO)-RDMA
1722# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1723
1724# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1725!$omp end target data
1726# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1727#else
1728# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1729 call nvtxstartrange("RHS-COMM-SENDRECV-RDMA")
1730# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1731
1732# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1733 call mpi_sendrecv( &
1734# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1735 buff_send, buffer_count, mpi_p, dst_proc, send_tag, &
1736# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1737 buff_recv, buffer_count, mpi_p, src_proc, recv_tag, &
1738# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1739 mpi_comm_world, mpi_status_ignore, ierr)
1740# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1741
1742# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1743 call nvtxendrange ! RHS-MPI-SENDRECV-(NO)-RDMA
1744# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1745
1746# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1747#endif
1748# 898 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1749
1750# 898 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1751#if defined(MFC_OpenACC)
1752# 898 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1753!$acc wait
1754# 898 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1755#elif defined(MFC_OpenMP)
1756# 898 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1757!$omp barrier
1758# 898 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1759#endif
1760# 916 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1761 end if
1762# 918 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1763#else
1764 call mpi_sendrecv( &
1765 buff_send, buffer_count, mpi_p, dst_proc, send_tag, &
1766 buff_recv, buffer_count, mpi_p, src_proc, recv_tag, &
1767 mpi_comm_world, mpi_status_ignore, ierr)
1768#endif
1769
1770 ! Unpack Received Buffer
1771 call nvtxstartrange("RHS-COMM-UNPACKBUF")
1772# 928 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1773 if (mpi_dir == 1) then
1774# 930 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1775
1776# 930 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1777
1778# 930 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1779#if defined(MFC_OpenACC)
1780# 930 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1781!$acc parallel loop collapse(4) gang vector default(present) private(r)
1782# 930 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1783#elif defined(MFC_OpenMP)
1784# 930 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1785
1786# 930 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1787
1788# 930 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1789
1790# 930 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1791!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(4) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1792# 930 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1793#endif
1794# 930 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1795
1796 do l = 0, p
1797 do k = 0, n
1798 do j = -buff_size, -1
1799 do i = 1, nvar
1800 r = (i - 1) + v_size* &
1801 (j + buff_size*((k + 1) + (n + 1)*l))
1802 q_comm(i)%sf(j + unpack_offset, k, l) = real(buff_recv(r), kind=stp)
1803#if defined(__INTEL_COMPILER)
1804 if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then
1805 print *, "Error", j, k, l, i
1806 error stop "NaN(s) in recv"
1807 end if
1808#endif
1809 end do
1810 end do
1811 end do
1812 end do
1813
1814# 948 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1815
1816# 948 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1817#if defined(MFC_OpenACC)
1818# 948 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1819!$acc end parallel loop
1820# 948 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1821#elif defined(MFC_OpenMP)
1822# 948 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1823
1824# 948 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1825
1826# 948 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1827!$omp end target teams loop
1828# 948 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1829#endif
1830# 948 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1831
1832
1833 if (qbmm_comm) then
1834
1835# 951 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1836
1837# 951 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1838#if defined(MFC_OpenACC)
1839# 951 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1840!$acc parallel loop collapse(5) gang vector default(present) private(r)
1841# 951 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1842#elif defined(MFC_OpenMP)
1843# 951 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1844
1845# 951 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1846
1847# 951 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1848
1849# 951 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1850!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(5) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1851# 951 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1852#endif
1853# 951 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1854
1855 do l = 0, p
1856 do k = 0, n
1857 do j = -buff_size, -1
1858 do i = nvar + 1, nvar + 4
1859 do q = 1, nb
1860 r = (i - 1) + (q - 1)*4 + v_size* &
1861 (j + buff_size*((k + 1) + (n + 1)*l))
1862 pb_in(j + unpack_offset, k, l, i - nvar, q) = real(buff_recv(r), kind=stp)
1863 end do
1864 end do
1865 end do
1866 end do
1867 end do
1868
1869# 965 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1870
1871# 965 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1872#if defined(MFC_OpenACC)
1873# 965 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1874!$acc end parallel loop
1875# 965 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1876#elif defined(MFC_OpenMP)
1877# 965 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1878
1879# 965 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1880
1881# 965 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1882!$omp end target teams loop
1883# 965 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1884#endif
1885# 965 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1886
1887
1888
1889# 967 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1890
1891# 967 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1892#if defined(MFC_OpenACC)
1893# 967 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1894!$acc parallel loop collapse(5) gang vector default(present) private(r)
1895# 967 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1896#elif defined(MFC_OpenMP)
1897# 967 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1898
1899# 967 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1900
1901# 967 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1902
1903# 967 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1904!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(5) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1905# 967 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1906#endif
1907# 967 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1908
1909 do l = 0, p
1910 do k = 0, n
1911 do j = -buff_size, -1
1912 do i = nvar + 1, nvar + 4
1913 do q = 1, nb
1914 r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
1915 (j + buff_size*((k + 1) + (n + 1)*l))
1916 mv_in(j + unpack_offset, k, l, i - nvar, q) = real(buff_recv(r), kind=stp)
1917 end do
1918 end do
1919 end do
1920 end do
1921 end do
1922
1923# 981 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1924
1925# 981 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1926#if defined(MFC_OpenACC)
1927# 981 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1928!$acc end parallel loop
1929# 981 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1930#elif defined(MFC_OpenMP)
1931# 981 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1932
1933# 981 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1934
1935# 981 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1936!$omp end target teams loop
1937# 981 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1938#endif
1939# 981 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1940
1941 end if
1942# 1102 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1943 end if
1944# 928 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1945 if (mpi_dir == 2) then
1946# 984 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1947
1948# 984 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1949
1950# 984 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1951#if defined(MFC_OpenACC)
1952# 984 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1953!$acc parallel loop collapse(4) gang vector default(present) private(r)
1954# 984 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1955#elif defined(MFC_OpenMP)
1956# 984 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1957
1958# 984 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1959
1960# 984 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1961
1962# 984 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1963!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(4) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1964# 984 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1965#endif
1966# 984 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1967
1968 do i = 1, nvar
1969 do l = 0, p
1970 do k = -buff_size, -1
1971 do j = -buff_size, m + buff_size
1972 r = (i - 1) + v_size* &
1973 ((j + buff_size) + (m + 2*buff_size + 1)* &
1974 ((k + buff_size) + buff_size*l))
1975 q_comm(i)%sf(j, k + unpack_offset, l) = real(buff_recv(r), kind=stp)
1976#if defined(__INTEL_COMPILER)
1977 if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then
1978 print *, "Error", j, k, l, i
1979 error stop "NaN(s) in recv"
1980 end if
1981#endif
1982 end do
1983 end do
1984 end do
1985 end do
1986
1987# 1003 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1988
1989# 1003 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1990#if defined(MFC_OpenACC)
1991# 1003 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1992!$acc end parallel loop
1993# 1003 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1994#elif defined(MFC_OpenMP)
1995# 1003 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1996
1997# 1003 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1998
1999# 1003 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2000!$omp end target teams loop
2001# 1003 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2002#endif
2003# 1003 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2004
2005
2006 if (qbmm_comm) then
2007
2008# 1006 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2009
2010# 1006 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2011#if defined(MFC_OpenACC)
2012# 1006 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2013!$acc parallel loop collapse(5) gang vector default(present) private(r)
2014# 1006 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2015#elif defined(MFC_OpenMP)
2016# 1006 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2017
2018# 1006 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2019
2020# 1006 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2021
2022# 1006 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2023!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(5) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
2024# 1006 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2025#endif
2026# 1006 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2027
2028 do i = nvar + 1, nvar + 4
2029 do l = 0, p
2030 do k = -buff_size, -1
2031 do j = -buff_size, m + buff_size
2032 do q = 1, nb
2033 r = (i - 1) + (q - 1)*4 + v_size* &
2034 ((j + buff_size) + (m + 2*buff_size + 1)* &
2035 ((k + buff_size) + buff_size*l))
2036 pb_in(j, k + unpack_offset, l, i - nvar, q) = real(buff_recv(r), kind=stp)
2037 end do
2038 end do
2039 end do
2040 end do
2041 end do
2042
2043# 1021 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2044
2045# 1021 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2046#if defined(MFC_OpenACC)
2047# 1021 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2048!$acc end parallel loop
2049# 1021 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2050#elif defined(MFC_OpenMP)
2051# 1021 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2052
2053# 1021 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2054
2055# 1021 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2056!$omp end target teams loop
2057# 1021 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2058#endif
2059# 1021 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2060
2061
2062
2063# 1023 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2064
2065# 1023 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2066#if defined(MFC_OpenACC)
2067# 1023 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2068!$acc parallel loop collapse(5) gang vector default(present) private(r)
2069# 1023 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2070#elif defined(MFC_OpenMP)
2071# 1023 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2072
2073# 1023 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2074
2075# 1023 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2076
2077# 1023 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2078!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(5) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
2079# 1023 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2080#endif
2081# 1023 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2082
2083 do i = nvar + 1, nvar + 4
2084 do l = 0, p
2085 do k = -buff_size, -1
2086 do j = -buff_size, m + buff_size
2087 do q = 1, nb
2088 r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
2089 ((j + buff_size) + (m + 2*buff_size + 1)* &
2090 ((k + buff_size) + buff_size*l))
2091 mv_in(j, k + unpack_offset, l, i - nvar, q) = real(buff_recv(r), kind=stp)
2092 end do
2093 end do
2094 end do
2095 end do
2096 end do
2097
2098# 1038 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2099
2100# 1038 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2101#if defined(MFC_OpenACC)
2102# 1038 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2103!$acc end parallel loop
2104# 1038 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2105#elif defined(MFC_OpenMP)
2106# 1038 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2107
2108# 1038 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2109
2110# 1038 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2111!$omp end target teams loop
2112# 1038 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2113#endif
2114# 1038 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2115
2116 end if
2117# 1102 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2118 end if
2119# 928 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2120 if (mpi_dir == 3) then
2121# 1041 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2122 ! Unpacking buffer from bc_z%beg
2123
2124# 1042 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2125
2126# 1042 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2127#if defined(MFC_OpenACC)
2128# 1042 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2129!$acc parallel loop collapse(4) gang vector default(present) private(r)
2130# 1042 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2131#elif defined(MFC_OpenMP)
2132# 1042 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2133
2134# 1042 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2135
2136# 1042 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2137
2138# 1042 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2139!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(4) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
2140# 1042 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2141#endif
2142# 1042 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2143
2144 do i = 1, nvar
2145 do l = -buff_size, -1
2146 do k = -buff_size, n + buff_size
2147 do j = -buff_size, m + buff_size
2148 r = (i - 1) + v_size* &
2149 ((j + buff_size) + (m + 2*buff_size + 1)* &
2150 ((k + buff_size) + (n + 2*buff_size + 1)* &
2151 (l + buff_size)))
2152 q_comm(i)%sf(j, k, l + unpack_offset) = real(buff_recv(r), kind=stp)
2153#if defined(__INTEL_COMPILER)
2154 if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then
2155 print *, "Error", j, k, l, i
2156 error stop "NaN(s) in recv"
2157 end if
2158#endif
2159 end do
2160 end do
2161 end do
2162 end do
2163
2164# 1062 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2165
2166# 1062 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2167#if defined(MFC_OpenACC)
2168# 1062 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2169!$acc end parallel loop
2170# 1062 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2171#elif defined(MFC_OpenMP)
2172# 1062 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2173
2174# 1062 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2175
2176# 1062 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2177!$omp end target teams loop
2178# 1062 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2179#endif
2180# 1062 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2181
2182
2183 if (qbmm_comm) then
2184
2185# 1065 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2186
2187# 1065 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2188#if defined(MFC_OpenACC)
2189# 1065 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2190!$acc parallel loop collapse(5) gang vector default(present) private(r)
2191# 1065 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2192#elif defined(MFC_OpenMP)
2193# 1065 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2194
2195# 1065 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2196
2197# 1065 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2198
2199# 1065 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2200!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(5) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
2201# 1065 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2202#endif
2203# 1065 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2204
2205 do i = nvar + 1, nvar + 4
2206 do l = -buff_size, -1
2207 do k = -buff_size, n + buff_size
2208 do j = -buff_size, m + buff_size
2209 do q = 1, nb
2210 r = (i - 1) + (q - 1)*4 + v_size* &
2211 ((j + buff_size) + (m + 2*buff_size + 1)* &
2212 ((k + buff_size) + (n + 2*buff_size + 1)* &
2213 (l + buff_size)))
2214 pb_in(j, k, l + unpack_offset, i - nvar, q) = real(buff_recv(r), kind=stp)
2215 end do
2216 end do
2217 end do
2218 end do
2219 end do
2220
2221# 1081 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2222
2223# 1081 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2224#if defined(MFC_OpenACC)
2225# 1081 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2226!$acc end parallel loop
2227# 1081 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2228#elif defined(MFC_OpenMP)
2229# 1081 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2230
2231# 1081 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2232
2233# 1081 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2234!$omp end target teams loop
2235# 1081 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2236#endif
2237# 1081 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2238
2239
2240
2241# 1083 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2242
2243# 1083 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2244#if defined(MFC_OpenACC)
2245# 1083 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2246!$acc parallel loop collapse(5) gang vector default(present) private(r)
2247# 1083 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2248#elif defined(MFC_OpenMP)
2249# 1083 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2250
2251# 1083 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2252
2253# 1083 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2254
2255# 1083 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2256!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(5) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
2257# 1083 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2258#endif
2259# 1083 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2260
2261 do i = nvar + 1, nvar + 4
2262 do l = -buff_size, -1
2263 do k = -buff_size, n + buff_size
2264 do j = -buff_size, m + buff_size
2265 do q = 1, nb
2266 r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
2267 ((j + buff_size) + (m + 2*buff_size + 1)* &
2268 ((k + buff_size) + (n + 2*buff_size + 1)* &
2269 (l + buff_size)))
2270 mv_in(j, k, l + unpack_offset, i - nvar, q) = real(buff_recv(r), kind=stp)
2271 end do
2272 end do
2273 end do
2274 end do
2275 end do
2276
2277# 1099 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2278
2279# 1099 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2280#if defined(MFC_OpenACC)
2281# 1099 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2282!$acc end parallel loop
2283# 1099 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2284#elif defined(MFC_OpenMP)
2285# 1099 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2286
2287# 1099 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2288
2289# 1099 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2290!$omp end target teams loop
2291# 1099 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2292#endif
2293# 1099 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2294
2295 end if
2296# 1102 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2297 end if
2298# 1104 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2299 call nvtxendrange
2300#endif
2301
2303
2304 !> The purpose of this procedure is to optimally decompose
2305 !! the computational domain among the available processors.
2306 !! This is performed by attempting to award each processor,
2307 !! in each of the coordinate directions, approximately the
2308 !! same number of cells, and then recomputing the affected
2309 !! global parameters.
2311
2312#ifdef MFC_MPI
2313
2314 integer :: num_procs_x, num_procs_y, num_procs_z !<
2315 !! Optimal number of processors in the x-, y- and z-directions
2316
2317 real(wp) :: tmp_num_procs_x, tmp_num_procs_y, tmp_num_procs_z !<
2318 !! Non-optimal number of processors in the x-, y- and z-directions
2319
2320 real(wp) :: fct_min !<
2321 !! Processor factorization (fct) minimization parameter
2322
2323 integer :: MPI_COMM_CART !<
2324 !! Cartesian processor topology communicator
2325
2326 integer :: rem_cells !<
2327 !! Remaining number of cells, in a particular coordinate direction,
2328 !! after the majority is divided up among the available processors
2329
2330 integer :: recon_order !<
2331 !! WENO or MUSCL reconstruction order
2332
2333 integer :: i, j !< Generic loop iterators
2334 integer :: ierr !< Generic flag used to identify and report MPI errors
2335
2336 if (recon_type == weno_type) then
2337 recon_order = weno_order
2338 else
2339 recon_order = muscl_order
2340 end if
2341
2342 if (num_procs == 1 .and. parallel_io) then
2343 do i = 1, num_dims
2344 start_idx(i) = 0
2345 end do
2346 return
2347 end if
2348
2349 if (igr) then
2350 recon_order = igr_order
2351 else
2352 recon_order = weno_order
2353 end if
2354
2355 ! 3D Cartesian Processor Topology
2356 if (n > 0) then
2357
2358 if (p > 0) then
2359 if (fft_wrt) then
2360
2361 ! Initial estimate of optimal processor topology
2362 num_procs_x = 1
2363 num_procs_y = 1
2364 num_procs_z = num_procs
2365 ierr = -1
2366
2367 ! Benchmarking the quality of this initial guess
2368 tmp_num_procs_y = num_procs_y
2369 tmp_num_procs_z = num_procs_z
2370 fct_min = 10._wp*abs((n + 1)/tmp_num_procs_y &
2371 - (p + 1)/tmp_num_procs_z)
2372
2373 ! Optimization of the initial processor topology
2374 do i = 1, num_procs
2375
2376 if (mod(num_procs, i) == 0 &
2377 .and. &
2378 (n + 1)/i >= num_stcls_min*recon_order) then
2379
2380 tmp_num_procs_y = i
2381 tmp_num_procs_z = num_procs/i
2382
2383 if (fct_min >= abs((n + 1)/tmp_num_procs_y &
2384 - (p + 1)/tmp_num_procs_z) &
2385 .and. &
2386 (p + 1)/tmp_num_procs_z &
2387 >= &
2388 num_stcls_min*recon_order) then
2389
2390 num_procs_y = i
2391 num_procs_z = num_procs/i
2392 fct_min = abs((n + 1)/tmp_num_procs_y &
2393 - (p + 1)/tmp_num_procs_z)
2394 ierr = 0
2395
2396 end if
2397
2398 end if
2399
2400 end do
2401 else
2402
2403 if (cyl_coord .and. p > 0) then
2404 ! Implement pencil processor blocking if using cylindrical coordinates so
2405 ! that all cells in azimuthal direction are stored on a single processor.
2406 ! This is necessary for efficient application of Fourier filter near axis.
2407
2408 ! Initial values of the processor factorization optimization
2409 num_procs_x = 1
2410 num_procs_y = num_procs
2411 num_procs_z = 1
2412 ierr = -1
2413
2414 ! Computing minimization variable for these initial values
2415 tmp_num_procs_x = num_procs_x
2416 tmp_num_procs_y = num_procs_y
2417 tmp_num_procs_z = num_procs_z
2418 fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
2419 - (n + 1)/tmp_num_procs_y)
2420
2421 ! Searching for optimal computational domain distribution
2422 do i = 1, num_procs
2423
2424 if (mod(num_procs, i) == 0 &
2425 .and. &
2426 (m + 1)/i >= num_stcls_min*recon_order) then
2427
2428 tmp_num_procs_x = i
2429 tmp_num_procs_y = num_procs/i
2430
2431 if (fct_min >= abs((m + 1)/tmp_num_procs_x &
2432 - (n + 1)/tmp_num_procs_y) &
2433 .and. &
2434 (n + 1)/tmp_num_procs_y &
2435 >= &
2436 num_stcls_min*recon_order) then
2437
2438 num_procs_x = i
2439 num_procs_y = num_procs/i
2440 fct_min = abs((m + 1)/tmp_num_procs_x &
2441 - (n + 1)/tmp_num_procs_y)
2442 ierr = 0
2443
2444 end if
2445
2446 end if
2447
2448 end do
2449
2450 else
2451
2452 ! Initial estimate of optimal processor topology
2453 num_procs_x = 1
2454 num_procs_y = 1
2455 num_procs_z = num_procs
2456 ierr = -1
2457
2458 ! Benchmarking the quality of this initial guess
2459 tmp_num_procs_x = num_procs_x
2460 tmp_num_procs_y = num_procs_y
2461 tmp_num_procs_z = num_procs_z
2462 fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
2463 - (n + 1)/tmp_num_procs_y) &
2464 + 10._wp*abs((n + 1)/tmp_num_procs_y &
2465 - (p + 1)/tmp_num_procs_z)
2466
2467 ! Optimization of the initial processor topology
2468 do i = 1, num_procs
2469
2470 if (mod(num_procs, i) == 0 &
2471 .and. &
2472 (m + 1)/i >= num_stcls_min*recon_order) then
2473
2474 do j = 1, num_procs/i
2475
2476 if (mod(num_procs/i, j) == 0 &
2477 .and. &
2478 (n + 1)/j >= num_stcls_min*recon_order) then
2479
2480 tmp_num_procs_x = i
2481 tmp_num_procs_y = j
2482 tmp_num_procs_z = num_procs/(i*j)
2483
2484 if (fct_min >= abs((m + 1)/tmp_num_procs_x &
2485 - (n + 1)/tmp_num_procs_y) &
2486 + abs((n + 1)/tmp_num_procs_y &
2487 - (p + 1)/tmp_num_procs_z) &
2488 .and. &
2489 (p + 1)/tmp_num_procs_z &
2490 >= &
2491 num_stcls_min*recon_order) &
2492 then
2493
2494 num_procs_x = i
2495 num_procs_y = j
2496 num_procs_z = num_procs/(i*j)
2497 fct_min = abs((m + 1)/tmp_num_procs_x &
2498 - (n + 1)/tmp_num_procs_y) &
2499 + abs((n + 1)/tmp_num_procs_y &
2500 - (p + 1)/tmp_num_procs_z)
2501 ierr = 0
2502
2503 end if
2504
2505 end if
2506
2507 end do
2508
2509 end if
2510
2511 end do
2512
2513 end if
2514 end if
2515
2516 ! Verifying that a valid decomposition of the computational
2517 ! domain has been established. If not, the simulation exits.
2518 if (proc_rank == 0 .and. ierr == -1) then
2519 call s_mpi_abort('Unsupported combination of values '// &
2520 'of num_procs, m, n, p and '// &
2521 'weno/muscl/igr_order. Exiting.')
2522 end if
2523
2524 ! Creating new communicator using the Cartesian topology
2525 call mpi_cart_create(mpi_comm_world, 3, (/num_procs_x, &
2526 num_procs_y, num_procs_z/), &
2527 (/.true., .true., .true./), &
2528 .false., mpi_comm_cart, ierr)
2529
2530 ! Finding the Cartesian coordinates of the local process
2531 call mpi_cart_coords(mpi_comm_cart, proc_rank, 3, &
2532 proc_coords, ierr)
2533 ! END: 3D Cartesian Processor Topology
2534
2535 ! Global Parameters for z-direction
2536
2537 ! Number of remaining cells
2538 rem_cells = mod(p + 1, num_procs_z)
2539
2540 ! Optimal number of cells per processor
2541 p = (p + 1)/num_procs_z - 1
2542
2543 ! Distributing the remaining cells
2544 do i = 1, rem_cells
2545 if (proc_coords(3) == i - 1) then
2546 p = p + 1; exit
2547 end if
2548 end do
2549
2550 ! Boundary condition at the beginning
2551 if (proc_coords(3) > 0 .or. (bc_z%beg == bc_periodic .and. num_procs_z > 1)) then
2552 proc_coords(3) = proc_coords(3) - 1
2553 call mpi_cart_rank(mpi_comm_cart, proc_coords, &
2554 bc_z%beg, ierr)
2555 proc_coords(3) = proc_coords(3) + 1
2556 end if
2557
2558 ! Boundary condition at the end
2559 if (proc_coords(3) < num_procs_z - 1 .or. (bc_z%end == bc_periodic .and. num_procs_z > 1)) then
2560 proc_coords(3) = proc_coords(3) + 1
2561 call mpi_cart_rank(mpi_comm_cart, proc_coords, &
2562 bc_z%end, ierr)
2563 proc_coords(3) = proc_coords(3) - 1
2564 end if
2565
2566#ifdef MFC_POST_PROCESS
2567 ! Ghost zone at the beginning
2568 if (proc_coords(3) > 0 .and. format == 1) then
2569 offset_z%beg = 2
2570 else
2571 offset_z%beg = 0
2572 end if
2573
2574 ! Ghost zone at the end
2575 if (proc_coords(3) < num_procs_z - 1 .and. format == 1) then
2576 offset_z%end = 2
2577 else
2578 offset_z%end = 0
2579 end if
2580#endif
2581
2582 ! Beginning and end sub-domain boundary locations
2583 if (parallel_io) then
2584 if (proc_coords(3) < rem_cells) then
2585 start_idx(3) = (p + 1)*proc_coords(3)
2586 else
2587 start_idx(3) = (p + 1)*proc_coords(3) + rem_cells
2588 end if
2589 else
2590#ifdef MFC_PRE_PROCESS
2591 if (old_grid .neqv. .true.) then
2592 dz = (z_domain%end - z_domain%beg)/real(p_glb + 1, wp)
2593
2594 if (proc_coords(3) < rem_cells) then
2595 z_domain%beg = z_domain%beg + dz*real((p + 1)* &
2596 proc_coords(3))
2597 z_domain%end = z_domain%end - dz*real((p + 1)* &
2598 (num_procs_z - proc_coords(3) - 1) &
2599 - (num_procs_z - rem_cells))
2600 else
2601 z_domain%beg = z_domain%beg + dz*real((p + 1)* &
2602 proc_coords(3) + rem_cells)
2603 z_domain%end = z_domain%end - dz*real((p + 1)* &
2604 (num_procs_z - proc_coords(3) - 1))
2605 end if
2606 end if
2607#endif
2608 end if
2609
2610 ! 2D Cartesian Processor Topology
2611 else
2612
2613 ! Initial estimate of optimal processor topology
2614 num_procs_x = 1
2615 num_procs_y = num_procs
2616 ierr = -1
2617
2618 ! Benchmarking the quality of this initial guess
2619 tmp_num_procs_x = num_procs_x
2620 tmp_num_procs_y = num_procs_y
2621 fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
2622 - (n + 1)/tmp_num_procs_y)
2623
2624 ! Optimization of the initial processor topology
2625 do i = 1, num_procs
2626
2627 if (mod(num_procs, i) == 0 &
2628 .and. &
2629 (m + 1)/i >= num_stcls_min*recon_order) then
2630
2631 tmp_num_procs_x = i
2632 tmp_num_procs_y = num_procs/i
2633
2634 if (fct_min >= abs((m + 1)/tmp_num_procs_x &
2635 - (n + 1)/tmp_num_procs_y) &
2636 .and. &
2637 (n + 1)/tmp_num_procs_y &
2638 >= &
2639 num_stcls_min*recon_order) then
2640
2641 num_procs_x = i
2642 num_procs_y = num_procs/i
2643 fct_min = abs((m + 1)/tmp_num_procs_x &
2644 - (n + 1)/tmp_num_procs_y)
2645 ierr = 0
2646
2647 end if
2648
2649 end if
2650
2651 end do
2652
2653 ! Verifying that a valid decomposition of the computational
2654 ! domain has been established. If not, the simulation exits.
2655 if (proc_rank == 0 .and. ierr == -1) then
2656 call s_mpi_abort('Unsupported combination of values '// &
2657 'of num_procs, m, n and '// &
2658 'weno/muscl/igr_order. Exiting.')
2659 end if
2660
2661 ! Creating new communicator using the Cartesian topology
2662 call mpi_cart_create(mpi_comm_world, 2, (/num_procs_x, &
2663 num_procs_y/), (/.true., &
2664 .true./), .false., mpi_comm_cart, &
2665 ierr)
2666
2667 ! Finding the Cartesian coordinates of the local process
2668 call mpi_cart_coords(mpi_comm_cart, proc_rank, 2, &
2669 proc_coords, ierr)
2670
2671 end if
2672 ! END: 2D Cartesian Processor Topology
2673
2674 ! Global Parameters for y-direction
2675
2676 ! Number of remaining cells
2677 rem_cells = mod(n + 1, num_procs_y)
2678
2679 ! Optimal number of cells per processor
2680 n = (n + 1)/num_procs_y - 1
2681
2682 ! Distributing the remaining cells
2683 do i = 1, rem_cells
2684 if (proc_coords(2) == i - 1) then
2685 n = n + 1; exit
2686 end if
2687 end do
2688
2689 ! Boundary condition at the beginning
2690 if (proc_coords(2) > 0 .or. (bc_y%beg == bc_periodic .and. num_procs_y > 1)) then
2691 proc_coords(2) = proc_coords(2) - 1
2692 call mpi_cart_rank(mpi_comm_cart, proc_coords, &
2693 bc_y%beg, ierr)
2694 proc_coords(2) = proc_coords(2) + 1
2695 end if
2696
2697 ! Boundary condition at the end
2698 if (proc_coords(2) < num_procs_y - 1 .or. (bc_y%end == bc_periodic .and. num_procs_y > 1)) then
2699 proc_coords(2) = proc_coords(2) + 1
2700 call mpi_cart_rank(mpi_comm_cart, proc_coords, &
2701 bc_y%end, ierr)
2702 proc_coords(2) = proc_coords(2) - 1
2703 end if
2704
2705#ifdef MFC_POST_PROCESS
2706 ! Ghost zone at the beginning
2707 if (proc_coords(2) > 0 .and. format == 1) then
2708 offset_y%beg = 2
2709 else
2710 offset_y%beg = 0
2711 end if
2712
2713 ! Ghost zone at the end
2714 if (proc_coords(2) < num_procs_y - 1 .and. format == 1) then
2715 offset_y%end = 2
2716 else
2717 offset_y%end = 0
2718 end if
2719#endif
2720
2721 ! Beginning and end sub-domain boundary locations
2722 if (parallel_io) then
2723 if (proc_coords(2) < rem_cells) then
2724 start_idx(2) = (n + 1)*proc_coords(2)
2725 else
2726 start_idx(2) = (n + 1)*proc_coords(2) + rem_cells
2727 end if
2728 else
2729#ifdef MFC_PRE_PROCESS
2730 if (old_grid .neqv. .true.) then
2731 dy = (y_domain%end - y_domain%beg)/real(n_glb + 1, wp)
2732
2733 if (proc_coords(2) < rem_cells) then
2734 y_domain%beg = y_domain%beg + dy*real((n + 1)* &
2735 proc_coords(2))
2736 y_domain%end = y_domain%end - dy*real((n + 1)* &
2737 (num_procs_y - proc_coords(2) - 1) &
2738 - (num_procs_y - rem_cells))
2739 else
2740 y_domain%beg = y_domain%beg + dy*real((n + 1)* &
2741 proc_coords(2) + rem_cells)
2742 y_domain%end = y_domain%end - dy*real((n + 1)* &
2743 (num_procs_y - proc_coords(2) - 1))
2744 end if
2745 end if
2746#endif
2747 end if
2748
2749 ! 1D Cartesian Processor Topology
2750 else
2751
2752 ! Optimal processor topology
2753 num_procs_x = num_procs
2754
2755 ! Creating new communicator using the Cartesian topology
2756 call mpi_cart_create(mpi_comm_world, 1, (/num_procs_x/), &
2757 (/.true./), .false., mpi_comm_cart, &
2758 ierr)
2759
2760 ! Finding the Cartesian coordinates of the local process
2761 call mpi_cart_coords(mpi_comm_cart, proc_rank, 1, &
2762 proc_coords, ierr)
2763
2764 end if
2765
2766 ! Global Parameters for x-direction
2767
2768 ! Number of remaining cells
2769 rem_cells = mod(m + 1, num_procs_x)
2770
2771 ! Optimal number of cells per processor
2772 m = (m + 1)/num_procs_x - 1
2773
2774 ! Distributing the remaining cells
2775 do i = 1, rem_cells
2776 if (proc_coords(1) == i - 1) then
2777 m = m + 1; exit
2778 end if
2779 end do
2780
2781 call s_update_cell_bounds(cells_bounds, m, n, p)
2782
2783 ! Boundary condition at the beginning
2784 if (proc_coords(1) > 0 .or. (bc_x%beg == bc_periodic .and. num_procs_x > 1)) then
2785 proc_coords(1) = proc_coords(1) - 1
2786 call mpi_cart_rank(mpi_comm_cart, proc_coords, bc_x%beg, ierr)
2787 proc_coords(1) = proc_coords(1) + 1
2788 end if
2789
2790 ! Boundary condition at the end
2791 if (proc_coords(1) < num_procs_x - 1 .or. (bc_x%end == bc_periodic .and. num_procs_x > 1)) then
2792 proc_coords(1) = proc_coords(1) + 1
2793 call mpi_cart_rank(mpi_comm_cart, proc_coords, bc_x%end, ierr)
2794 proc_coords(1) = proc_coords(1) - 1
2795 end if
2796
2797#ifdef MFC_POST_PROCESS
2798 ! Ghost zone at the beginning
2799 if (proc_coords(1) > 0 .and. format == 1) then
2800 offset_x%beg = 2
2801 else
2802 offset_x%beg = 0
2803 end if
2804
2805 ! Ghost zone at the end
2806 if (proc_coords(1) < num_procs_x - 1 .and. format == 1) then
2807 offset_x%end = 2
2808 else
2809 offset_x%end = 0
2810 end if
2811#endif
2812
2813 ! Beginning and end sub-domain boundary locations
2814 if (parallel_io) then
2815 if (proc_coords(1) < rem_cells) then
2816 start_idx(1) = (m + 1)*proc_coords(1)
2817 else
2818 start_idx(1) = (m + 1)*proc_coords(1) + rem_cells
2819 end if
2820 else
2821#ifdef MFC_PRE_PROCESS
2822 if (old_grid .neqv. .true.) then
2823 dx = (x_domain%end - x_domain%beg)/real(m_glb + 1, wp)
2824
2825 if (proc_coords(1) < rem_cells) then
2826 x_domain%beg = x_domain%beg + dx*real((m + 1)* &
2827 proc_coords(1))
2828 x_domain%end = x_domain%end - dx*real((m + 1)* &
2829 (num_procs_x - proc_coords(1) - 1) &
2830 - (num_procs_x - rem_cells))
2831 else
2832 x_domain%beg = x_domain%beg + dx*real((m + 1)* &
2833 proc_coords(1) + rem_cells)
2834 x_domain%end = x_domain%end - dx*real((m + 1)* &
2835 (num_procs_x - proc_coords(1) - 1))
2836 end if
2837 end if
2838#endif
2839 end if
2840#endif
2841
2843
2844 !> The goal of this procedure is to populate the buffers of
2845 !! the grid variables by communicating with the neighboring
2846 !! processors. Note that only the buffers of the cell-width
2847 !! distributions are handled in such a way. This is because
2848 !! the buffers of cell-boundary locations may be calculated
2849 !! directly from those of the cell-width distributions.
2850 !! @param mpi_dir MPI communication coordinate direction
2851 !! @param pbc_loc Processor boundary condition (PBC) location
2852#ifndef MFC_PRE_PROCESS
2853 subroutine s_mpi_sendrecv_grid_variables_buffers(mpi_dir, pbc_loc)
2854
2855 integer, intent(in) :: mpi_dir
2856 integer, intent(in) :: pbc_loc
2857
2858#ifdef MFC_MPI
2859 integer :: ierr !< Generic flag used to identify and report MPI errors
2860
2861 ! MPI Communication in x-direction
2862 if (mpi_dir == 1) then
2863
2864 if (pbc_loc == -1) then ! PBC at the beginning
2865
2866 if (bc_x%end >= 0) then ! PBC at the beginning and end
2867
2868 ! Send/receive buffer to/from bc_x%end/bc_x%beg
2869 call mpi_sendrecv( &
2870 dx(m - buff_size + 1), buff_size, &
2871 mpi_p, bc_x%end, 0, &
2872 dx(-buff_size), buff_size, &
2873 mpi_p, bc_x%beg, 0, &
2874 mpi_comm_world, mpi_status_ignore, ierr)
2875
2876 else ! PBC at the beginning only
2877
2878 ! Send/receive buffer to/from bc_x%beg/bc_x%beg
2879 call mpi_sendrecv( &
2880 dx(0), buff_size, &
2881 mpi_p, bc_x%beg, 1, &
2882 dx(-buff_size), buff_size, &
2883 mpi_p, bc_x%beg, 0, &
2884 mpi_comm_world, mpi_status_ignore, ierr)
2885
2886 end if
2887
2888 else ! PBC at the end
2889
2890 if (bc_x%beg >= 0) then ! PBC at the end and beginning
2891
2892 ! Send/receive buffer to/from bc_x%beg/bc_x%end
2893 call mpi_sendrecv( &
2894 dx(0), buff_size, &
2895 mpi_p, bc_x%beg, 1, &
2896 dx(m + 1), buff_size, &
2897 mpi_p, bc_x%end, 1, &
2898 mpi_comm_world, mpi_status_ignore, ierr)
2899
2900 else ! PBC at the end only
2901
2902 ! Send/receive buffer to/from bc_x%end/bc_x%end
2903 call mpi_sendrecv( &
2904 dx(m - buff_size + 1), buff_size, &
2905 mpi_p, bc_x%end, 0, &
2906 dx(m + 1), buff_size, &
2907 mpi_p, bc_x%end, 1, &
2908 mpi_comm_world, mpi_status_ignore, ierr)
2909
2910 end if
2911
2912 end if
2913 ! END: MPI Communication in x-direction
2914
2915 ! MPI Communication in y-direction
2916 elseif (mpi_dir == 2) then
2917
2918 if (pbc_loc == -1) then ! PBC at the beginning
2919
2920 if (bc_y%end >= 0) then ! PBC at the beginning and end
2921
2922 ! Send/receive buffer to/from bc_y%end/bc_y%beg
2923 call mpi_sendrecv( &
2924 dy(n - buff_size + 1), buff_size, &
2925 mpi_p, bc_y%end, 0, &
2926 dy(-buff_size), buff_size, &
2927 mpi_p, bc_y%beg, 0, &
2928 mpi_comm_world, mpi_status_ignore, ierr)
2929
2930 else ! PBC at the beginning only
2931
2932 ! Send/receive buffer to/from bc_y%beg/bc_y%beg
2933 call mpi_sendrecv( &
2934 dy(0), buff_size, &
2935 mpi_p, bc_y%beg, 1, &
2936 dy(-buff_size), buff_size, &
2937 mpi_p, bc_y%beg, 0, &
2938 mpi_comm_world, mpi_status_ignore, ierr)
2939
2940 end if
2941
2942 else ! PBC at the end
2943
2944 if (bc_y%beg >= 0) then ! PBC at the end and beginning
2945
2946 ! Send/receive buffer to/from bc_y%beg/bc_y%end
2947 call mpi_sendrecv( &
2948 dy(0), buff_size, &
2949 mpi_p, bc_y%beg, 1, &
2950 dy(n + 1), buff_size, &
2951 mpi_p, bc_y%end, 1, &
2952 mpi_comm_world, mpi_status_ignore, ierr)
2953
2954 else ! PBC at the end only
2955
2956 ! Send/receive buffer to/from bc_y%end/bc_y%end
2957 call mpi_sendrecv( &
2958 dy(n - buff_size + 1), buff_size, &
2959 mpi_p, bc_y%end, 0, &
2960 dy(n + 1), buff_size, &
2961 mpi_p, bc_y%end, 1, &
2962 mpi_comm_world, mpi_status_ignore, ierr)
2963
2964 end if
2965
2966 end if
2967 ! END: MPI Communication in y-direction
2968
2969 ! MPI Communication in z-direction
2970 else
2971
2972 if (pbc_loc == -1) then ! PBC at the beginning
2973
2974 if (bc_z%end >= 0) then ! PBC at the beginning and end
2975
2976 ! Send/receive buffer to/from bc_z%end/bc_z%beg
2977 call mpi_sendrecv( &
2978 dz(p - buff_size + 1), buff_size, &
2979 mpi_p, bc_z%end, 0, &
2980 dz(-buff_size), buff_size, &
2981 mpi_p, bc_z%beg, 0, &
2982 mpi_comm_world, mpi_status_ignore, ierr)
2983
2984 else ! PBC at the beginning only
2985
2986 ! Send/receive buffer to/from bc_z%beg/bc_z%beg
2987 call mpi_sendrecv( &
2988 dz(0), buff_size, &
2989 mpi_p, bc_z%beg, 1, &
2990 dz(-buff_size), buff_size, &
2991 mpi_p, bc_z%beg, 0, &
2992 mpi_comm_world, mpi_status_ignore, ierr)
2993
2994 end if
2995
2996 else ! PBC at the end
2997
2998 if (bc_z%beg >= 0) then ! PBC at the end and beginning
2999
3000 ! Send/receive buffer to/from bc_z%beg/bc_z%end
3001 call mpi_sendrecv( &
3002 dz(0), buff_size, &
3003 mpi_p, bc_z%beg, 1, &
3004 dz(p + 1), buff_size, &
3005 mpi_p, bc_z%end, 1, &
3006 mpi_comm_world, mpi_status_ignore, ierr)
3007
3008 else ! PBC at the end only
3009
3010 ! Send/receive buffer to/from bc_z%end/bc_z%end
3011 call mpi_sendrecv( &
3012 dz(p - buff_size + 1), buff_size, &
3013 mpi_p, bc_z%end, 0, &
3014 dz(p + 1), buff_size, &
3015 mpi_p, bc_z%end, 1, &
3016 mpi_comm_world, mpi_status_ignore, ierr)
3017
3018 end if
3019
3020 end if
3021
3022 end if
3023 ! END: MPI Communication in z-direction
3024#endif
3025
3027#endif
3028
3029 !> Module deallocation and/or disassociation procedures
3031
3032#ifdef MFC_MPI
3033 deallocate (buff_send, buff_recv)
3034#endif
3035
3036 end subroutine s_finalize_mpi_common_module
3037
3038end module m_mpi_common
type(scalar_field), dimension(sys_size), intent(inout) q_cons_vf
integer, intent(in) j
Shared derived types for field data, patch geometry, bubble dynamics, and MPI I/O structures.
Defines global parameters for the computational domain, simulation algorithm, and initial conditions.
integer sys_size
Number of unknowns in the system of equations.
integer buff_size
The number of cells that are necessary to be able to store enough boundary conditions data to march t...
logical qbmm
Quadrature moment method.
type(cell_num_bounds) cells_bounds
Utility routines for bubble model setup, coordinate transforms, array sampling, and special functions...
MPI communication layer: domain decomposition, halo exchange, reductions, and parallel I/O setup.
impure subroutine s_mpi_abort(prnt, code)
The subroutine terminates the MPI execution environment.
impure subroutine s_initialize_mpi_common_module
The computation of parameters, the allocation of memory, the association of pointers and/or the execu...
impure subroutine s_mpi_gather_data(my_vector, counts, gathered_vector, root)
Gathers variable-length real vectors from all MPI ranks onto the root process.
impure subroutine s_mpi_barrier
Halts all processes until all have reached barrier.
impure subroutine s_mpi_initialize
The subroutine initializes the MPI execution environment and queries both the number of processors wh...
impure subroutine s_mpi_allreduce_vectors_sum(var_loc, var_glb, num_vectors, vector_length)
This subroutine follows the behavior of the s_mpi_allreduce_sum subroutine with the additional featur...
real(wp), dimension(:), allocatable, private buff_recv
buff_recv is utilized to receive and unpack the buffer of the cell- average primitive variables,...
impure subroutine s_initialize_mpi_data(q_cons_vf, ib_markers, beta)
impure subroutine s_mpi_reduce_maxloc(var_loc)
The following subroutine takes the first element of the 2-element inputted variable and determines it...
subroutine s_mpi_sendrecv_grid_variables_buffers(mpi_dir, pbc_loc)
The goal of this procedure is to populate the buffers of the grid variables by communicating with the...
impure subroutine s_mpi_reduce_stability_criteria_extrema(icfl_max_loc, vcfl_max_loc, rc_min_loc, icfl_max_glb, vcfl_max_glb, rc_min_glb)
The goal of this subroutine is to determine the global extrema of the stability criteria in the compu...
impure subroutine s_mpi_allreduce_sum(var_loc, var_glb)
The following subroutine takes the input local variable from all processors and reduces to the sum of...
real(wp), dimension(:), allocatable, private buff_send
This variable is utilized to pack and send the buffer of the cell-average primitive variables,...
impure subroutine s_mpi_allreduce_min(var_loc, var_glb)
The following subroutine takes the input local variable from all processors and reduces to the minimu...
subroutine s_mpi_sendrecv_variables_buffers(q_comm, mpi_dir, pbc_loc, nvar, pb_in, mv_in)
The goal of this procedure is to populate the buffers of the cell-average conservative variables by c...
impure subroutine s_prohibit_abort(condition, message)
Prints a case file error with the prohibited condition and message, then aborts execution.
impure subroutine s_mpi_finalize
The subroutine finalizes the MPI execution environment.
subroutine s_initialize_mpi_data_ds(q_cons_vf)
impure subroutine s_mpi_allreduce_max(var_loc, var_glb)
The following subroutine takes the input local variable from all processors and reduces to the maximu...
impure subroutine s_mpi_allreduce_integer_sum(var_loc, var_glb)
The following subroutine takes the input local variable from all processors and reduces to the sum of...
integer, private v_size
impure subroutine mpi_bcast_time_step_values(proc_time, time_avg)
Gathers per-rank time step wall-clock times onto rank 0 for performance reporting.
impure subroutine s_mpi_reduce_min(var_loc)
The following subroutine takes the inputted variable and determines its minimum value on the entire c...
impure subroutine s_finalize_mpi_common_module
Module deallocation and/or disassociation procedures.
integer(kind=8) halo_size
subroutine s_mpi_decompose_computational_domain
The purpose of this procedure is to optimally decompose the computational domain among the available ...
NVIDIA NVTX profiling API bindings for GPU performance instrumentation.
Definition m_nvtx.f90:6