MFC
Exascale flow solver
Loading...
Searching...
No Matches
m_mpi_common.fpp.f90
Go to the documentation of this file.
1# 1 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2!>
3!! @file
4!! @brief Contains module m_mpi_common
5
6# 1 "/home/runner/work/MFC/MFC/src/common/include/case.fpp" 1
7! This file exists so that Fypp can be run without generating case.fpp files for
8! each target. This is useful when generating documentation, for example. This
9! should also let MFC be built with CMake directly, without invoking mfc.sh.
10
11! For pre-process.
12# 9 "/home/runner/work/MFC/MFC/src/common/include/case.fpp"
13
14! For moving immersed boundaries in simulation
15# 14 "/home/runner/work/MFC/MFC/src/common/include/case.fpp"
16# 6 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp" 2
17# 1 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp" 1
18# 1 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp" 1
19# 1 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp" 1
20# 2 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
21# 3 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
22# 4 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
23# 5 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
24# 6 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
25
26# 8 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
27# 9 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
28# 10 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
29
30# 17 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
31
32# 46 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
33
34# 58 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
35
36# 68 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
37
38# 98 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
39
40# 110 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
41
42# 120 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
43! New line at end of file is required for FYPP
44# 2 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp" 2
45# 1 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp" 1
46# 1 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp" 1
47# 2 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
48# 3 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
49# 4 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
50# 5 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
51# 6 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
52
53# 8 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
54# 9 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
55# 10 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
56
57# 17 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
58
59# 46 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
60
61# 58 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
62
63# 68 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
64
65# 98 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
66
67# 110 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
68
69# 120 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
70! New line at end of file is required for FYPP
71# 2 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp" 2
72
73# 4 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
74# 5 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
75# 6 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
76# 7 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
77# 8 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
78
79# 20 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
80
81# 43 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
82
83# 48 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
84
85# 53 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
86
87# 58 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
88
89# 63 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
90
91# 68 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
92
93# 76 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
94
95# 81 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
96
97# 86 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
98
99# 91 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
100
101# 96 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
102
103# 101 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
104
105# 106 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
106
107# 111 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
108
109# 116 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
110
111# 121 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
112
113# 151 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
114
115# 192 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
116
117# 207 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
118
119# 232 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
120
121# 243 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
122
123# 245 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
124# 255 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
125
126# 283 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
127
128# 293 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
129
130# 303 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
131
132# 312 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
133
134# 329 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
135
136# 339 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
137
138# 346 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
139
140# 352 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
141
142# 358 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
143
144# 364 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
145
146# 370 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
147
148# 376 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
149! New line at end of file is required for FYPP
150# 3 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp" 2
151# 1 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp" 1
152# 1 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp" 1
153# 2 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
154# 3 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
155# 4 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
156# 5 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
157# 6 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
158
159# 8 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
160# 9 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
161# 10 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
162
163# 17 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
164
165# 46 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
166
167# 58 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
168
169# 68 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
170
171# 98 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
172
173# 110 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
174
175# 120 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
176! New line at end of file is required for FYPP
177# 2 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp" 2
178
179# 7 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
180
181# 17 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
182
183# 22 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
184
185# 27 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
186
187# 32 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
188
189# 37 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
190
191# 42 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
192
193# 47 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
194
195# 52 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
196
197# 57 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
198
199# 62 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
200
201# 73 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
202
203# 78 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
204
205# 83 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
206
207# 88 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
208
209# 103 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
210
211# 131 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
212
213# 160 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
214
215# 175 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
216
217# 192 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
218
219# 213 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
220
221# 241 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
222
223# 256 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
224
225# 266 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
226
227# 275 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
228
229# 291 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
230
231# 301 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
232
233# 308 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
234! New line at end of file is required for FYPP
235# 4 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp" 2
236
237# 21 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
238
239# 37 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
240
241# 50 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
242
243# 104 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
244
245# 119 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
246
247# 130 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
248
249# 143 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
250
251# 171 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
252
253# 182 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
254
255# 193 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
256
257# 204 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
258
259# 214 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
260
261# 225 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
262
263# 236 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
264
265# 246 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
266
267# 252 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
268
269# 258 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
270
271# 264 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
272
273# 270 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
274
275# 272 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
276# 273 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
277! New line at end of file is required for FYPP
278# 2 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp" 2
279
280# 14 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
281
282! Caution:
283! This macro requires the use of a binding script to set CUDA_VISIBLE_DEVICES, such that we have one GPU device per MPI rank.
284! That's because for both cudaMemAdvise (preferred location) and cudaMemPrefetchAsync we use location = device_id = 0.
285! For an example see misc/nvidia_uvm/bind.sh.
286# 63 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
287
288# 81 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
289
290# 88 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
291
292# 111 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
293
294# 127 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
295
296# 153 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
297
298# 159 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
299
300# 167 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
301! New line at end of file is required for FYPP
302# 7 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp" 2
303
304!> @brief MPI communication layer: domain decomposition, halo exchange, reductions, and parallel I/O setup
306
307#ifdef MFC_MPI
308 use mpi !< message passing interface (mpi) module
309#endif
310
311 use m_derived_types !< definitions of the derived types
312
313 use m_global_parameters !< definitions of the global parameters
314
315 use m_helper
316
317 use ieee_arithmetic
318
319 use m_nvtx
320
321 implicit none
322
323 integer, private :: v_size
324
325# 28 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
326#if defined(MFC_OpenACC)
327# 28 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
328!$acc declare create(v_size)
329# 28 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
330#elif defined(MFC_OpenMP)
331# 28 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
332!$omp declare target (v_size)
333# 28 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
334#endif
335 !! Generic flags used to identify and report MPI errors
336
337 real(wp), private, allocatable, dimension(:) :: buff_send !<
338 !! This variable is utilized to pack and send the buffer of the cell-average
339 !! primitive variables, for a single computational domain boundary at the
340 !! time, to the relevant neighboring processor.
341
342 real(wp), private, allocatable, dimension(:) :: buff_recv !<
343 !! buff_recv is utilized to receive and unpack the buffer of the cell-
344 !! average primitive variables, for a single computational domain boundary
345 !! at the time, from the relevant neighboring processor.
346
347#ifndef __NVCOMPILER_GPU_UNIFIED_MEM
348
349# 42 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
350#if defined(MFC_OpenACC)
351# 42 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
352!$acc declare create(buff_send, buff_recv)
353# 42 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
354#elif defined(MFC_OpenMP)
355# 42 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
356!$omp declare target (buff_send, buff_recv)
357# 42 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
358#endif
359#endif
360
361 integer(kind=8) :: halo_size
362
363# 46 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
364#if defined(MFC_OpenACC)
365# 46 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
366!$acc declare create(halo_size)
367# 46 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
368#elif defined(MFC_OpenMP)
369# 46 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
370!$omp declare target (halo_size)
371# 46 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
372#endif
373
374contains
375
376 !> The computation of parameters, the allocation of memory,
377 !! the association of pointers and/or the execution of any
378 !! other procedures that are necessary to setup the module.
380
381#ifdef MFC_MPI
382 ! Allocating buff_send/recv and. Please note that for the sake of
383 ! simplicity, both variables are provided sufficient storage to hold
384 ! the largest buffer in the computational domain.
385
386 if (qbmm .and. .not. polytropic) then
387 v_size = sys_size + 2*nb*4
388 else
390 end if
391
392 if (n > 0) then
393 if (p > 0) then
394 halo_size = nint(-1._wp + 1._wp*buff_size*(v_size)* &
395 & (m + 2*buff_size + 1)* &
396 & (n + 2*buff_size + 1)* &
397 & (p + 2*buff_size + 1)/ &
398 & (cells_bounds%mnp_min + 2*buff_size + 1))
399 else
400 halo_size = -1 + buff_size*(v_size)* &
401 & (cells_bounds%mn_max + 2*buff_size + 1)
402 end if
403 else
404 halo_size = -1 + buff_size*(v_size)
405 end if
406
407
408# 81 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
409#if defined(MFC_OpenACC)
410# 81 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
411!$acc update device(halo_size, v_size)
412# 81 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
413#elif defined(MFC_OpenMP)
414# 81 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
415!$omp target update to(halo_size, v_size)
416# 81 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
417#endif
418
419#ifndef __NVCOMPILER_GPU_UNIFIED_MEM
420#ifdef MFC_DEBUG
421# 84 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
422 block
423# 84 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
424 use iso_fortran_env, only: output_unit
425# 84 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
426
427# 84 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
428 print *, 'm_mpi_common.fpp:84: ', '@:ALLOCATE(buff_send(0:halo_size), buff_recv(0:halo_size))'
429# 84 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
430
431# 84 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
432 call flush (output_unit)
433# 84 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
434 end block
435# 84 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
436#endif
437# 84 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
438 allocate (buff_send(0:halo_size), buff_recv(0:halo_size))
439# 84 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
440
441# 84 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
442
443# 84 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
444
445# 84 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
446#if defined(MFC_OpenACC)
447# 84 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
448!$acc enter data create(buff_send, buff_recv)
449# 84 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
450#elif defined(MFC_OpenMP)
451# 84 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
452!$omp target enter data map(always,alloc:buff_send, buff_recv)
453# 84 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
454#endif
455#else
456 allocate (buff_send(0:halo_size), buff_recv(0:halo_size))
457
458# 87 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
459#if defined(MFC_OpenACC)
460# 87 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
461!$acc enter data create(capture:buff_send)
462# 87 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
463#elif defined(MFC_OpenMP)
464# 87 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
465!$omp target enter data map(always,alloc:capture:buff_send)
466# 87 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
467#endif
468
469# 88 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
470#if defined(MFC_OpenACC)
471# 88 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
472!$acc enter data create(capture:buff_recv)
473# 88 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
474#elif defined(MFC_OpenMP)
475# 88 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
476!$omp target enter data map(always,alloc:capture:buff_recv)
477# 88 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
478#endif
479#endif
480#endif
481
482 end subroutine s_initialize_mpi_common_module
483
484 !> The subroutine initializes the MPI execution environment
485 !! and queries both the number of processors which will be
486 !! available for the job and the local processor rank.
487 impure subroutine s_mpi_initialize
488
489#ifdef MFC_MPI
490 integer :: ierr !< Generic flag used to identify and report MPI errors
491
492 ! Initializing the MPI environment
493 call mpi_init(ierr)
494
495 ! Checking whether the MPI environment has been properly initialized
496 if (ierr /= mpi_success) then
497 print '(A)', 'Unable to initialize MPI environment. Exiting.'
498 call mpi_abort(mpi_comm_world, 1, ierr)
499 end if
500
501 ! Querying the number of processors available for the job
502 call mpi_comm_size(mpi_comm_world, num_procs, ierr)
503
504 ! Querying the rank of the local processor
505 call mpi_comm_rank(mpi_comm_world, proc_rank, ierr)
506#else
507 ! Serial run only has 1 processor
508 num_procs = 1
509 ! Local processor rank is 0
510 proc_rank = 0
511#endif
512
513 end subroutine s_mpi_initialize
514
515 !! @param q_cons_vf Conservative variables
516 !! @param ib_markers track if a cell is within the immersed boundary
517 !! @param beta Eulerian void fraction from lagrangian bubbles
518 impure subroutine s_initialize_mpi_data(q_cons_vf, ib_markers, beta)
519
520 type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf
521 type(integer_field), optional, intent(in) :: ib_markers
522 type(scalar_field), intent(in), optional :: beta
523
524 integer, dimension(num_dims) :: sizes_glb, sizes_loc
525 integer, dimension(1) :: airfoil_glb, airfoil_loc, airfoil_start
526
527#ifdef MFC_MPI
528
529 ! Generic loop iterator
530 integer :: i, j
531 integer :: ierr !< Generic flag used to identify and report MPI errors
532
533 !Altered system size for the lagrangian subgrid bubble model
534 integer :: alt_sys
535
536 if (present(beta)) then
537 alt_sys = sys_size + 1
538 else
539 alt_sys = sys_size
540 end if
541
542 do i = 1, sys_size
543 mpi_io_data%var(i)%sf => q_cons_vf(i)%sf(0:m, 0:n, 0:p)
544 end do
545
546 if (present(beta)) then
547 mpi_io_data%var(alt_sys)%sf => beta%sf(0:m, 0:n, 0:p)
548 end if
549
550 !Additional variables pb and mv for non-polytropic qbmm
551 if (qbmm .and. .not. polytropic) then
552 do i = 1, nb
553 do j = 1, nnode
554#ifdef MFC_PRE_PROCESS
555 mpi_io_data%var(sys_size + (i - 1)*nnode + j)%sf => pb%sf(0:m, 0:n, 0:p, j, i)
556 mpi_io_data%var(sys_size + (i - 1)*nnode + j + nb*nnode)%sf => mv%sf(0:m, 0:n, 0:p, j, i)
557#elif defined (MFC_SIMULATION)
558 mpi_io_data%var(sys_size + (i - 1)*nnode + j)%sf => pb_ts(1)%sf(0:m, 0:n, 0:p, j, i)
559 mpi_io_data%var(sys_size + (i - 1)*nnode + j + nb*nnode)%sf => mv_ts(1)%sf(0:m, 0:n, 0:p, j, i)
560#endif
561 end do
562 end do
563 end if
564
565 ! Define global(g) and local(l) sizes for flow variables
566 sizes_glb(1) = m_glb + 1; sizes_loc(1) = m + 1
567 if (n > 0) then
568 sizes_glb(2) = n_glb + 1; sizes_loc(2) = n + 1
569 if (p > 0) then
570 sizes_glb(num_dims) = p_glb + 1; sizes_loc(num_dims) = p + 1
571 end if
572 end if
573
574 ! Define the view for each variable
575 do i = 1, alt_sys
576 call mpi_type_create_subarray(num_dims, sizes_glb, sizes_loc, start_idx, &
577 mpi_order_fortran, mpi_p, mpi_io_data%view(i), ierr)
578 call mpi_type_commit(mpi_io_data%view(i), ierr)
579 end do
580
581#ifndef MFC_POST_PROCESS
582 if (qbmm .and. .not. polytropic) then
583 do i = sys_size + 1, sys_size + 2*nb*4
584 call mpi_type_create_subarray(num_dims, sizes_glb, sizes_loc, start_idx, &
585 mpi_order_fortran, mpi_p, mpi_io_data%view(i), ierr)
586 call mpi_type_commit(mpi_io_data%view(i), ierr)
587
588 end do
589 end if
590#endif
591
592#ifndef MFC_PRE_PROCESS
593 if (present(ib_markers)) then
594 mpi_io_ib_data%var%sf => ib_markers%sf(0:m, 0:n, 0:p)
595
596 call mpi_type_create_subarray(num_dims, sizes_glb, sizes_loc, start_idx, &
597 mpi_order_fortran, mpi_integer, mpi_io_ib_data%view, ierr)
598 call mpi_type_commit(mpi_io_ib_data%view, ierr)
599 end if
600#endif
601
602#endif
603
604 end subroutine s_initialize_mpi_data
605
606 !! @param q_cons_vf Conservative variables
607 subroutine s_initialize_mpi_data_ds(q_cons_vf)
608
609 type(scalar_field), &
610 dimension(sys_size), &
611 intent(in) :: q_cons_vf
612
613 integer, dimension(num_dims) :: sizes_glb, sizes_loc
614 integer, dimension(3) :: sf_start_idx
615
616#ifdef MFC_MPI
617
618 ! Generic loop iterator
619 integer :: i, j, q, k, l, m_ds, n_ds, p_ds, ierr
620
621 sf_start_idx = (/0, 0, 0/)
622
623#ifndef MFC_POST_PROCESS
624 m_ds = int((m + 1)/3) - 1
625 n_ds = int((n + 1)/3) - 1
626 p_ds = int((p + 1)/3) - 1
627#else
628 m_ds = m
629 n_ds = n
630 p_ds = p
631#endif
632
633#ifdef MFC_POST_PROCESS
634 do i = 1, sys_size
635 mpi_io_data%var(i)%sf => q_cons_vf(i)%sf(-1:m_ds + 1, -1:n_ds + 1, -1:p_ds + 1)
636 end do
637#endif
638 ! Define global(g) and local(l) sizes for flow variables
639 sizes_loc(1) = m_ds + 3
640 if (n > 0) then
641 sizes_loc(2) = n_ds + 3
642 if (p > 0) then
643 sizes_loc(num_dims) = p_ds + 3
644 end if
645 end if
646
647 ! Define the view for each variable
648 do i = 1, sys_size
649 call mpi_type_create_subarray(num_dims, sizes_loc, sizes_loc, sf_start_idx, &
650 mpi_order_fortran, mpi_p, mpi_io_data%view(i), ierr)
651 call mpi_type_commit(mpi_io_data%view(i), ierr)
652 end do
653#endif
654
655 end subroutine s_initialize_mpi_data_ds
656
657 !> @brief Gathers variable-length real vectors from all MPI ranks onto the root process.
658 impure subroutine s_mpi_gather_data(my_vector, counts, gathered_vector, root)
659
660 integer, intent(in) :: counts ! Array of vector lengths for each process
661 real(wp), intent(in), dimension(counts) :: my_vector ! Input vector on each process
662 integer, intent(in) :: root ! Rank of the root process
663 real(wp), allocatable, intent(out) :: gathered_vector(:) ! Gathered vector on the root process
664
665 integer :: i
666 integer :: ierr !< Generic flag used to identify and report MPI errors
667 integer, allocatable :: recounts(:), displs(:)
668
669#ifdef MFC_MPI
670
671 allocate (recounts(num_procs))
672
673 call mpi_gather(counts, 1, mpi_integer, recounts, 1, mpi_integer, root, &
674 mpi_comm_world, ierr)
675
676 allocate (displs(size(recounts)))
677
678 displs(1) = 0
679
680 do i = 2, size(recounts)
681 displs(i) = displs(i - 1) + recounts(i - 1)
682 end do
683
684 allocate (gathered_vector(sum(recounts)))
685 call mpi_gatherv(my_vector, counts, mpi_p, gathered_vector, recounts, displs, mpi_p, &
686 root, mpi_comm_world, ierr)
687#endif
688 end subroutine s_mpi_gather_data
689
690 !> @brief Gathers per-rank time step wall-clock times onto rank 0 for performance reporting.
691 impure subroutine mpi_bcast_time_step_values(proc_time, time_avg)
692
693 real(wp), dimension(0:num_procs - 1), intent(inout) :: proc_time
694 real(wp), intent(inout) :: time_avg
695
696#ifdef MFC_MPI
697 integer :: ierr !< Generic flag used to identify and report MPI errors
698
699 call mpi_gather(time_avg, 1, mpi_p, proc_time(0), 1, mpi_p, 0, mpi_comm_world, ierr)
700
701#endif
702
703 end subroutine mpi_bcast_time_step_values
704
705 !> @brief Prints a case file error with the prohibited condition and message, then aborts execution.
706 impure subroutine s_prohibit_abort(condition, message)
707 character(len=*), intent(in) :: condition, message
708
709 print *, ""
710 print *, "CASE FILE ERROR"
711 print *, " - Prohibited condition: ", trim(condition)
712 if (len_trim(message) > 0) then
713 print *, " - Note: ", trim(message)
714 end if
715 print *, ""
716 call s_mpi_abort(code=case_file_error_code)
717 end subroutine s_prohibit_abort
718
719 !> The goal of this subroutine is to determine the global
720 !! extrema of the stability criteria in the computational
721 !! domain. This is performed by sifting through the local
722 !! extrema of each stability criterion. Note that each of
723 !! the local extrema is from a single process, within its
724 !! assigned section of the computational domain. Finally,
725 !! note that the global extrema values are only bookkeept
726 !! on the rank 0 processor.
727 !! @param icfl_max_loc Local maximum ICFL stability criterion
728 !! @param vcfl_max_loc Local maximum VCFL stability criterion
729 !! @param Rc_min_loc Local minimum Rc stability criterion
730 !! @param icfl_max_glb Global maximum ICFL stability criterion
731 !! @param vcfl_max_glb Global maximum VCFL stability criterion
732 !! @param Rc_min_glb Global minimum Rc stability criterion
733 impure subroutine s_mpi_reduce_stability_criteria_extrema(icfl_max_loc, &
734 vcfl_max_loc, &
735 Rc_min_loc, &
736 icfl_max_glb, &
737 vcfl_max_glb, &
738 Rc_min_glb)
739
740 real(wp), intent(in) :: icfl_max_loc
741 real(wp), intent(in) :: vcfl_max_loc
742 real(wp), intent(in) :: rc_min_loc
743
744 real(wp), intent(out) :: icfl_max_glb
745 real(wp), intent(out) :: vcfl_max_glb
746 real(wp), intent(out) :: rc_min_glb
747
748#ifdef MFC_SIMULATION
749#ifdef MFC_MPI
750 integer :: ierr !< Generic flag used to identify and report MPI errors
751
752 ! Reducing local extrema of ICFL, VCFL, CCFL and Rc numbers to their
753 ! global extrema and bookkeeping the results on the rank 0 processor
754 call mpi_reduce(icfl_max_loc, icfl_max_glb, 1, &
755 mpi_p, mpi_max, 0, &
756 mpi_comm_world, ierr)
757
758 if (viscous) then
759 call mpi_reduce(vcfl_max_loc, vcfl_max_glb, 1, &
760 mpi_p, mpi_max, 0, &
761 mpi_comm_world, ierr)
762 call mpi_reduce(rc_min_loc, rc_min_glb, 1, &
763 mpi_p, mpi_min, 0, &
764 mpi_comm_world, ierr)
765 end if
766
767#else
768
769 icfl_max_glb = icfl_max_loc
770
771 if (viscous) then
772 vcfl_max_glb = vcfl_max_loc
773 rc_min_glb = rc_min_loc
774 end if
775
776#endif
777#endif
778
780
781 !> The following subroutine takes the input local variable
782 !! from all processors and reduces to the sum of all
783 !! values. The reduced variable is recorded back onto the
784 !! original local variable on each processor.
785 !! @param var_loc Some variable containing the local value which should be
786 !! reduced amongst all the processors in the communicator.
787 !! @param var_glb The globally reduced value
788 impure subroutine s_mpi_allreduce_sum(var_loc, var_glb)
789
790 real(wp), intent(in) :: var_loc
791 real(wp), intent(out) :: var_glb
792
793#ifdef MFC_MPI
794 integer :: ierr !< Generic flag used to identify and report MPI errors
795
796 ! Performing the reduction procedure
797 call mpi_allreduce(var_loc, var_glb, 1, mpi_p, &
798 mpi_sum, mpi_comm_world, ierr)
799
800#endif
801
802 end subroutine s_mpi_allreduce_sum
803
804 !> This subroutine follows the behavior of the s_mpi_allreduce_sum subroutine
805 !> with the additional feature that it reduces an array of vectors.
806 impure subroutine s_mpi_allreduce_vectors_sum(var_loc, var_glb, num_vectors, vector_length)
807
808 integer, intent(in) :: num_vectors, vector_length
809 real(wp), dimension(:, :), intent(in) :: var_loc
810 real(wp), dimension(:, :), intent(out) :: var_glb
811
812#ifdef MFC_MPI
813 integer :: ierr !< Generic flag used to identify and report MPI errors
814
815 ! Performing the reduction procedure
816 if (loc(var_loc) == loc(var_glb)) then
817 call mpi_allreduce(mpi_in_place, var_glb, num_vectors*vector_length, &
818 mpi_p, mpi_sum, mpi_comm_world, ierr)
819 else
820 call mpi_allreduce(var_loc, var_glb, num_vectors*vector_length, &
821 mpi_p, mpi_sum, mpi_comm_world, ierr)
822 end if
823
824#else
825 var_glb(1:num_vectors, 1:vector_length) = var_loc(1:num_vectors, 1:vector_length)
826#endif
827
828 end subroutine s_mpi_allreduce_vectors_sum
829
830 !> The following subroutine takes the input local variable
831 !! from all processors and reduces to the sum of all
832 !! values. The reduced variable is recorded back onto the
833 !! original local variable on each processor.
834 !! @param var_loc Some variable containing the local value which should be
835 !! reduced amongst all the processors in the communicator.
836 !! @param var_glb The globally reduced value
837 impure subroutine s_mpi_allreduce_integer_sum(var_loc, var_glb)
838
839 integer, intent(in) :: var_loc
840 integer, intent(out) :: var_glb
841
842#ifdef MFC_MPI
843 integer :: ierr !< Generic flag used to identify and report MPI errors
844
845 ! Performing the reduction procedure
846 call mpi_allreduce(var_loc, var_glb, 1, mpi_integer, &
847 mpi_sum, mpi_comm_world, ierr)
848#else
849 var_glb = var_loc
850#endif
851
852 end subroutine s_mpi_allreduce_integer_sum
853
854 !> The following subroutine takes the input local variable
855 !! from all processors and reduces to the minimum of all
856 !! values. The reduced variable is recorded back onto the
857 !! original local variable on each processor.
858 !! @param var_loc Some variable containing the local value which should be
859 !! reduced amongst all the processors in the communicator.
860 !! @param var_glb The globally reduced value
861 impure subroutine s_mpi_allreduce_min(var_loc, var_glb)
862
863 real(wp), intent(in) :: var_loc
864 real(wp), intent(out) :: var_glb
865
866#ifdef MFC_MPI
867 integer :: ierr !< Generic flag used to identify and report MPI errors
868
869 ! Performing the reduction procedure
870 call mpi_allreduce(var_loc, var_glb, 1, mpi_p, &
871 mpi_min, mpi_comm_world, ierr)
872
873#endif
874
875 end subroutine s_mpi_allreduce_min
876
877 !> The following subroutine takes the input local variable
878 !! from all processors and reduces to the maximum of all
879 !! values. The reduced variable is recorded back onto the
880 !! original local variable on each processor.
881 !! @param var_loc Some variable containing the local value which should be
882 !! reduced amongst all the processors in the communicator.
883 !! @param var_glb The globally reduced value
884 impure subroutine s_mpi_allreduce_max(var_loc, var_glb)
885
886 real(wp), intent(in) :: var_loc
887 real(wp), intent(out) :: var_glb
888
889#ifdef MFC_MPI
890 integer :: ierr !< Generic flag used to identify and report MPI errors
891
892 ! Performing the reduction procedure
893 call mpi_allreduce(var_loc, var_glb, 1, mpi_p, &
894 mpi_max, mpi_comm_world, ierr)
895
896#endif
897
898 end subroutine s_mpi_allreduce_max
899
900 !> The following subroutine takes the inputted variable and
901 !! determines its minimum value on the entire computational
902 !! domain. The result is stored back into inputted variable.
903 !! @param var_loc holds the local value to be reduced among
904 !! all the processors in communicator. On output, the variable holds
905 !! the minimum value, reduced amongst all of the local values.
906 impure subroutine s_mpi_reduce_min(var_loc)
907
908 real(wp), intent(inout) :: var_loc
909
910#ifdef MFC_MPI
911 integer :: ierr !< Generic flag used to identify and report MPI errors
912
913 ! Temporary storage variable that holds the reduced minimum value
914 real(wp) :: var_glb
915
916 ! Performing reduction procedure and eventually storing its result
917 ! into the variable that was initially inputted into the subroutine
918 call mpi_reduce(var_loc, var_glb, 1, mpi_p, &
919 mpi_min, 0, mpi_comm_world, ierr)
920
921 call mpi_bcast(var_glb, 1, mpi_p, &
922 0, mpi_comm_world, ierr)
923
924 var_loc = var_glb
925
926#endif
927
928 end subroutine s_mpi_reduce_min
929
930 !> The following subroutine takes the first element of the
931 !! 2-element inputted variable and determines its maximum
932 !! value on the entire computational domain. The result is
933 !! stored back into the first element of the variable while
934 !! the rank of the processor that is in charge of the sub-
935 !! domain containing the maximum is stored into the second
936 !! element of the variable.
937 !! @param var_loc On input, this variable holds the local value and processor rank,
938 !! which are to be reduced among all the processors in communicator.
939 !! On output, this variable holds the maximum value, reduced amongst
940 !! all of the local values, and the process rank to which the value
941 !! belongs.
942 impure subroutine s_mpi_reduce_maxloc(var_loc)
943
944 real(wp), dimension(2), intent(inout) :: var_loc
945
946#ifdef MFC_MPI
947 integer :: ierr !< Generic flag used to identify and report MPI errors
948
949 real(wp), dimension(2) :: var_glb !<
950 !! Temporary storage variable that holds the reduced maximum value
951 !! and the rank of the processor with which the value is associated
952
953 ! Performing reduction procedure and eventually storing its result
954 ! into the variable that was initially inputted into the subroutine
955 call mpi_reduce(var_loc, var_glb, 1, mpi_2p, &
956 mpi_maxloc, 0, mpi_comm_world, ierr)
957
958 call mpi_bcast(var_glb, 1, mpi_2p, &
959 0, mpi_comm_world, ierr)
960
961 var_loc = var_glb
962
963#endif
964
965 end subroutine s_mpi_reduce_maxloc
966
967 !> The subroutine terminates the MPI execution environment.
968 !! @param prnt error message to be printed
969 !! @param code optional exit code
970 impure subroutine s_mpi_abort(prnt, code)
971
972 character(len=*), intent(in), optional :: prnt
973 integer, intent(in), optional :: code
974
975#ifdef MFC_MPI
976 integer :: ierr !< Generic flag used to identify and report MPI errors
977#endif
978
979 if (present(prnt)) then
980 print *, prnt
981 call flush (6)
982
983 end if
984
985#ifndef MFC_MPI
986 if (present(code)) then
987 stop code
988 else
989 stop 1
990 end if
991#else
992 ! Terminating the MPI environment
993 if (present(code)) then
994 call mpi_abort(mpi_comm_world, code, ierr)
995 else
996 call mpi_abort(mpi_comm_world, 1, ierr)
997 end if
998#endif
999
1000 end subroutine s_mpi_abort
1001
1002 !>Halts all processes until all have reached barrier.
1003 impure subroutine s_mpi_barrier
1004
1005#ifdef MFC_MPI
1006 integer :: ierr !< Generic flag used to identify and report MPI errors
1007
1008 ! Calling MPI_BARRIER
1009 call mpi_barrier(mpi_comm_world, ierr)
1010
1011#endif
1012
1013 end subroutine s_mpi_barrier
1014
1015 !> The subroutine finalizes the MPI execution environment.
1016 impure subroutine s_mpi_finalize
1017
1018#ifdef MFC_MPI
1019 integer :: ierr !< Generic flag used to identify and report MPI errors
1020
1021 ! Finalizing the MPI environment
1022 call mpi_finalize(ierr)
1023
1024#endif
1025
1026 end subroutine s_mpi_finalize
1027
1028 !> The goal of this procedure is to populate the buffers of
1029 !! the cell-average conservative variables by communicating
1030 !! with the neighboring processors.
1031 !! @param q_comm Cell-average conservative variables
1032 !! @param mpi_dir MPI communication coordinate direction
1033 !! @param pbc_loc Processor boundary condition (PBC) location
1034 !! @param nVar Number of variables to communicate
1035 !! @param pb_in Optional internal bubble pressure
1036 !! @param mv_in Optional bubble mass velocity
1038 mpi_dir, &
1039 pbc_loc, &
1040 nVar, &
1041 pb_in, mv_in)
1042
1043 type(scalar_field), dimension(1:), intent(inout) :: q_comm
1044 real(stp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb_in, mv_in
1045 integer, intent(in) :: mpi_dir, pbc_loc, nVar
1046
1047 integer :: i, j, k, l, r, q !< Generic loop iterators
1048
1049 integer :: buffer_counts(1:3), buffer_count
1050
1051 type(int_bounds_info) :: boundary_conditions(1:3)
1052 integer :: beg_end(1:2), grid_dims(1:3)
1053 integer :: dst_proc, src_proc, recv_tag, send_tag
1054
1055 logical :: beg_end_geq_0, qbmm_comm
1056
1057 integer :: pack_offset, unpack_offset
1058
1059#ifdef MFC_MPI
1060 integer :: ierr !< Generic flag used to identify and report MPI errors
1061
1062 call nvtxstartrange("RHS-COMM-PACKBUF")
1063
1064 qbmm_comm = .false.
1065
1066 if (present(pb_in) .and. present(mv_in) .and. qbmm .and. .not. polytropic) then
1067 qbmm_comm = .true.
1068 v_size = nvar + 2*nb*4
1069 buffer_counts = (/ &
1070 buff_size*v_size*(n + 1)*(p + 1), &
1071 buff_size*v_size*(m + 2*buff_size + 1)*(p + 1), &
1072 buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) &
1073 /)
1074 else
1075 v_size = nvar
1076 buffer_counts = (/ &
1077 buff_size*v_size*(n + 1)*(p + 1), &
1078 buff_size*v_size*(m + 2*buff_size + 1)*(p + 1), &
1079 buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) &
1080 /)
1081 end if
1082
1083
1084# 693 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1085#if defined(MFC_OpenACC)
1086# 693 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1087!$acc update device(v_size)
1088# 693 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1089#elif defined(MFC_OpenMP)
1090# 693 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1091!$omp target update to(v_size)
1092# 693 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1093#endif
1094
1095 buffer_count = buffer_counts(mpi_dir)
1096 boundary_conditions = (/bc_x, bc_y, bc_z/)
1097 beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/)
1098 beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0
1099
1100 ! Implements:
1101 ! pbc_loc bc_x >= 0 -> [send/recv]_tag [dst/src]_proc
1102 ! -1 (=0) 0 -> [1,0] [0,0] | 0 0 [1,0] [beg,beg]
1103 ! -1 (=0) 1 -> [0,0] [1,0] | 0 1 [0,0] [end,beg]
1104 ! +1 (=1) 0 -> [0,1] [1,1] | 1 0 [0,1] [end,end]
1105 ! +1 (=1) 1 -> [1,1] [0,1] | 1 1 [1,1] [beg,end]
1106
1107 send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1))
1108 recv_tag = f_logical_to_int(pbc_loc == 1)
1109
1110 dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0)))
1111 src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1))
1112
1113 grid_dims = (/m, n, p/)
1114
1115 pack_offset = 0
1116 if (f_xor(pbc_loc == 1, beg_end_geq_0)) then
1117 pack_offset = grid_dims(mpi_dir) - buff_size + 1
1118 end if
1119
1120 unpack_offset = 0
1121 if (pbc_loc == 1) then
1122 unpack_offset = grid_dims(mpi_dir) + buff_size + 1
1123 end if
1124
1125 ! Pack Buffer to Send
1126# 727 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1127 if (mpi_dir == 1) then
1128# 729 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1129
1130# 729 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1131
1132# 729 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1133#if defined(MFC_OpenACC)
1134# 729 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1135!$acc parallel loop collapse(4) gang vector default(present) private(r)
1136# 729 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1137#elif defined(MFC_OpenMP)
1138# 729 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1139
1140# 729 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1141
1142# 729 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1143
1144# 729 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1145!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(4) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1146# 729 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1147#endif
1148# 729 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1149
1150 do l = 0, p
1151 do k = 0, n
1152 do j = 0, buff_size - 1
1153 do i = 1, nvar
1154 r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l))
1155 buff_send(r) = real(q_comm(i)%sf(j + pack_offset, k, l), kind=wp)
1156 end do
1157 end do
1158 end do
1159 end do
1160
1161# 740 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1162
1163# 740 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1164#if defined(MFC_OpenACC)
1165# 740 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1166!$acc end parallel loop
1167# 740 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1168#elif defined(MFC_OpenMP)
1169# 740 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1170
1171# 740 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1172
1173# 740 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1174!$omp end target teams loop
1175# 740 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1176#endif
1177# 740 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1178
1179
1180 if (qbmm_comm) then
1181
1182# 743 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1183
1184# 743 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1185#if defined(MFC_OpenACC)
1186# 743 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1187!$acc parallel loop collapse(4) gang vector default(present) private(r)
1188# 743 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1189#elif defined(MFC_OpenMP)
1190# 743 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1191
1192# 743 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1193
1194# 743 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1195
1196# 743 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1197!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(4) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1198# 743 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1199#endif
1200# 743 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1201
1202 do l = 0, p
1203 do k = 0, n
1204 do j = 0, buff_size - 1
1205 do i = nvar + 1, nvar + 4
1206 do q = 1, nb
1207 r = (i - 1) + (q - 1)*4 + v_size* &
1208 (j + buff_size*(k + (n + 1)*l))
1209 buff_send(r) = real(pb_in(j + pack_offset, k, l, i - nvar, q), kind=wp)
1210 end do
1211 end do
1212 end do
1213 end do
1214 end do
1215
1216# 757 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1217
1218# 757 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1219#if defined(MFC_OpenACC)
1220# 757 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1221!$acc end parallel loop
1222# 757 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1223#elif defined(MFC_OpenMP)
1224# 757 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1225
1226# 757 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1227
1228# 757 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1229!$omp end target teams loop
1230# 757 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1231#endif
1232# 757 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1233
1234
1235
1236# 759 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1237
1238# 759 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1239#if defined(MFC_OpenACC)
1240# 759 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1241!$acc parallel loop collapse(5) gang vector default(present) private(r)
1242# 759 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1243#elif defined(MFC_OpenMP)
1244# 759 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1245
1246# 759 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1247
1248# 759 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1249
1250# 759 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1251!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(5) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1252# 759 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1253#endif
1254# 759 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1255
1256 do l = 0, p
1257 do k = 0, n
1258 do j = 0, buff_size - 1
1259 do i = nvar + 1, nvar + 4
1260 do q = 1, nb
1261 r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
1262 (j + buff_size*(k + (n + 1)*l))
1263 buff_send(r) = real(mv_in(j + pack_offset, k, l, i - nvar, q), kind=wp)
1264 end do
1265 end do
1266 end do
1267 end do
1268 end do
1269
1270# 773 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1271
1272# 773 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1273#if defined(MFC_OpenACC)
1274# 773 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1275!$acc end parallel loop
1276# 773 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1277#elif defined(MFC_OpenMP)
1278# 773 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1279
1280# 773 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1281
1282# 773 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1283!$omp end target teams loop
1284# 773 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1285#endif
1286# 773 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1287
1288 end if
1289# 878 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1290 end if
1291# 727 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1292 if (mpi_dir == 2) then
1293# 776 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1294
1295# 776 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1296
1297# 776 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1298#if defined(MFC_OpenACC)
1299# 776 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1300!$acc parallel loop collapse(4) gang vector default(present) private(r)
1301# 776 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1302#elif defined(MFC_OpenMP)
1303# 776 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1304
1305# 776 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1306
1307# 776 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1308
1309# 776 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1310!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(4) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1311# 776 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1312#endif
1313# 776 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1314
1315 do i = 1, nvar
1316 do l = 0, p
1317 do k = 0, buff_size - 1
1318 do j = -buff_size, m + buff_size
1319 r = (i - 1) + v_size* &
1320 ((j + buff_size) + (m + 2*buff_size + 1)* &
1321 (k + buff_size*l))
1322 buff_send(r) = real(q_comm(i)%sf(j, k + pack_offset, l), kind=wp)
1323 end do
1324 end do
1325 end do
1326 end do
1327
1328# 789 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1329
1330# 789 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1331#if defined(MFC_OpenACC)
1332# 789 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1333!$acc end parallel loop
1334# 789 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1335#elif defined(MFC_OpenMP)
1336# 789 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1337
1338# 789 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1339
1340# 789 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1341!$omp end target teams loop
1342# 789 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1343#endif
1344# 789 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1345
1346
1347 if (qbmm_comm) then
1348
1349# 792 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1350
1351# 792 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1352#if defined(MFC_OpenACC)
1353# 792 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1354!$acc parallel loop collapse(5) gang vector default(present) private(r)
1355# 792 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1356#elif defined(MFC_OpenMP)
1357# 792 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1358
1359# 792 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1360
1361# 792 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1362
1363# 792 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1364!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(5) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1365# 792 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1366#endif
1367# 792 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1368
1369 do i = nvar + 1, nvar + 4
1370 do l = 0, p
1371 do k = 0, buff_size - 1
1372 do j = -buff_size, m + buff_size
1373 do q = 1, nb
1374 r = (i - 1) + (q - 1)*4 + v_size* &
1375 ((j + buff_size) + (m + 2*buff_size + 1)* &
1376 (k + buff_size*l))
1377 buff_send(r) = real(pb_in(j, k + pack_offset, l, i - nvar, q), kind=wp)
1378 end do
1379 end do
1380 end do
1381 end do
1382 end do
1383
1384# 807 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1385
1386# 807 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1387#if defined(MFC_OpenACC)
1388# 807 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1389!$acc end parallel loop
1390# 807 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1391#elif defined(MFC_OpenMP)
1392# 807 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1393
1394# 807 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1395
1396# 807 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1397!$omp end target teams loop
1398# 807 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1399#endif
1400# 807 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1401
1402
1403
1404# 809 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1405
1406# 809 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1407#if defined(MFC_OpenACC)
1408# 809 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1409!$acc parallel loop collapse(5) gang vector default(present) private(r)
1410# 809 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1411#elif defined(MFC_OpenMP)
1412# 809 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1413
1414# 809 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1415
1416# 809 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1417
1418# 809 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1419!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(5) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1420# 809 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1421#endif
1422# 809 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1423
1424 do i = nvar + 1, nvar + 4
1425 do l = 0, p
1426 do k = 0, buff_size - 1
1427 do j = -buff_size, m + buff_size
1428 do q = 1, nb
1429 r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
1430 ((j + buff_size) + (m + 2*buff_size + 1)* &
1431 (k + buff_size*l))
1432 buff_send(r) = real(mv_in(j, k + pack_offset, l, i - nvar, q), kind=wp)
1433 end do
1434 end do
1435 end do
1436 end do
1437 end do
1438
1439# 824 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1440
1441# 824 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1442#if defined(MFC_OpenACC)
1443# 824 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1444!$acc end parallel loop
1445# 824 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1446#elif defined(MFC_OpenMP)
1447# 824 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1448
1449# 824 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1450
1451# 824 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1452!$omp end target teams loop
1453# 824 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1454#endif
1455# 824 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1456
1457 end if
1458# 878 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1459 end if
1460# 727 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1461 if (mpi_dir == 3) then
1462# 827 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1463
1464# 827 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1465
1466# 827 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1467#if defined(MFC_OpenACC)
1468# 827 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1469!$acc parallel loop collapse(4) gang vector default(present) private(r)
1470# 827 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1471#elif defined(MFC_OpenMP)
1472# 827 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1473
1474# 827 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1475
1476# 827 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1477
1478# 827 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1479!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(4) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1480# 827 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1481#endif
1482# 827 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1483
1484 do i = 1, nvar
1485 do l = 0, buff_size - 1
1486 do k = -buff_size, n + buff_size
1487 do j = -buff_size, m + buff_size
1488 r = (i - 1) + v_size* &
1489 ((j + buff_size) + (m + 2*buff_size + 1)* &
1490 ((k + buff_size) + (n + 2*buff_size + 1)*l))
1491 buff_send(r) = real(q_comm(i)%sf(j, k, l + pack_offset), kind=wp)
1492 end do
1493 end do
1494 end do
1495 end do
1496
1497# 840 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1498
1499# 840 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1500#if defined(MFC_OpenACC)
1501# 840 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1502!$acc end parallel loop
1503# 840 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1504#elif defined(MFC_OpenMP)
1505# 840 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1506
1507# 840 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1508
1509# 840 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1510!$omp end target teams loop
1511# 840 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1512#endif
1513# 840 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1514
1515
1516 if (qbmm_comm) then
1517
1518# 843 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1519
1520# 843 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1521#if defined(MFC_OpenACC)
1522# 843 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1523!$acc parallel loop collapse(5) gang vector default(present) private(r)
1524# 843 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1525#elif defined(MFC_OpenMP)
1526# 843 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1527
1528# 843 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1529
1530# 843 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1531
1532# 843 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1533!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(5) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1534# 843 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1535#endif
1536# 843 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1537
1538 do i = nvar + 1, nvar + 4
1539 do l = 0, buff_size - 1
1540 do k = -buff_size, n + buff_size
1541 do j = -buff_size, m + buff_size
1542 do q = 1, nb
1543 r = (i - 1) + (q - 1)*4 + v_size* &
1544 ((j + buff_size) + (m + 2*buff_size + 1)* &
1545 ((k + buff_size) + (n + 2*buff_size + 1)*l))
1546 buff_send(r) = real(pb_in(j, k, l + pack_offset, i - nvar, q), kind=wp)
1547 end do
1548 end do
1549 end do
1550 end do
1551 end do
1552
1553# 858 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1554
1555# 858 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1556#if defined(MFC_OpenACC)
1557# 858 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1558!$acc end parallel loop
1559# 858 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1560#elif defined(MFC_OpenMP)
1561# 858 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1562
1563# 858 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1564
1565# 858 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1566!$omp end target teams loop
1567# 858 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1568#endif
1569# 858 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1570
1571
1572
1573# 860 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1574
1575# 860 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1576#if defined(MFC_OpenACC)
1577# 860 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1578!$acc parallel loop collapse(5) gang vector default(present) private(r)
1579# 860 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1580#elif defined(MFC_OpenMP)
1581# 860 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1582
1583# 860 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1584
1585# 860 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1586
1587# 860 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1588!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(5) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1589# 860 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1590#endif
1591# 860 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1592
1593 do i = nvar + 1, nvar + 4
1594 do l = 0, buff_size - 1
1595 do k = -buff_size, n + buff_size
1596 do j = -buff_size, m + buff_size
1597 do q = 1, nb
1598 r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
1599 ((j + buff_size) + (m + 2*buff_size + 1)* &
1600 ((k + buff_size) + (n + 2*buff_size + 1)*l))
1601 buff_send(r) = real(mv_in(j, k, l + pack_offset, i - nvar, q), kind=wp)
1602 end do
1603 end do
1604 end do
1605 end do
1606 end do
1607
1608# 875 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1609
1610# 875 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1611#if defined(MFC_OpenACC)
1612# 875 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1613!$acc end parallel loop
1614# 875 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1615#elif defined(MFC_OpenMP)
1616# 875 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1617
1618# 875 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1619
1620# 875 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1621!$omp end target teams loop
1622# 875 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1623#endif
1624# 875 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1625
1626 end if
1627# 878 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1628 end if
1629# 880 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1630 call nvtxendrange ! Packbuf
1631
1632 ! Send/Recv
1633#ifdef MFC_SIMULATION
1634# 885 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1635 if (rdma_mpi .eqv. .false.) then
1636# 900 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1637 call nvtxstartrange("RHS-COMM-DEV2HOST")
1638
1639# 901 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1640#if defined(MFC_OpenACC)
1641# 901 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1642!$acc update host(buff_send)
1643# 901 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1644#elif defined(MFC_OpenMP)
1645# 901 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1646!$omp target update from(buff_send)
1647# 901 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1648#endif
1649 call nvtxendrange
1650 call nvtxstartrange("RHS-COMM-SENDRECV-NO-RMDA")
1651
1652 call mpi_sendrecv( &
1653 buff_send, buffer_count, mpi_p, dst_proc, send_tag, &
1654 buff_recv, buffer_count, mpi_p, src_proc, recv_tag, &
1655 mpi_comm_world, mpi_status_ignore, ierr)
1656
1657 call nvtxendrange ! RHS-MPI-SENDRECV-(NO)-RDMA
1658
1659 call nvtxstartrange("RHS-COMM-HOST2DEV")
1660
1661# 913 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1662#if defined(MFC_OpenACC)
1663# 913 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1664!$acc update device(buff_recv)
1665# 913 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1666#elif defined(MFC_OpenMP)
1667# 913 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1668!$omp target update to(buff_recv)
1669# 913 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1670#endif
1671 call nvtxendrange
1672# 916 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1673 end if
1674# 885 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1675 if (rdma_mpi .eqv. .true.) then
1676# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1677
1678# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1679
1680# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1681#if defined(MFC_OpenACC)
1682# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1683!$acc host_data use_device(buff_send, buff_recv)
1684# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1685 call nvtxstartrange("RHS-COMM-SENDRECV-RDMA")
1686# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1687
1688# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1689 call mpi_sendrecv( &
1690# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1691 buff_send, buffer_count, mpi_p, dst_proc, send_tag, &
1692# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1693 buff_recv, buffer_count, mpi_p, src_proc, recv_tag, &
1694# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1695 mpi_comm_world, mpi_status_ignore, ierr)
1696# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1697
1698# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1699 call nvtxendrange ! RHS-MPI-SENDRECV-(NO)-RDMA
1700# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1701
1702# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1703!$acc end host_data
1704# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1705#elif defined(MFC_OpenMP)
1706# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1707!$omp target data use_device_addr(buff_send, buff_recv)
1708# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1709 call nvtxstartrange("RHS-COMM-SENDRECV-RDMA")
1710# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1711
1712# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1713 call mpi_sendrecv( &
1714# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1715 buff_send, buffer_count, mpi_p, dst_proc, send_tag, &
1716# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1717 buff_recv, buffer_count, mpi_p, src_proc, recv_tag, &
1718# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1719 mpi_comm_world, mpi_status_ignore, ierr)
1720# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1721
1722# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1723 call nvtxendrange ! RHS-MPI-SENDRECV-(NO)-RDMA
1724# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1725
1726# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1727!$omp end target data
1728# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1729#else
1730# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1731 call nvtxstartrange("RHS-COMM-SENDRECV-RDMA")
1732# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1733
1734# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1735 call mpi_sendrecv( &
1736# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1737 buff_send, buffer_count, mpi_p, dst_proc, send_tag, &
1738# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1739 buff_recv, buffer_count, mpi_p, src_proc, recv_tag, &
1740# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1741 mpi_comm_world, mpi_status_ignore, ierr)
1742# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1743
1744# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1745 call nvtxendrange ! RHS-MPI-SENDRECV-(NO)-RDMA
1746# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1747
1748# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1749#endif
1750# 898 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1751
1752# 898 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1753#if defined(MFC_OpenACC)
1754# 898 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1755!$acc wait
1756# 898 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1757#elif defined(MFC_OpenMP)
1758# 898 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1759!$omp barrier
1760# 898 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1761#endif
1762# 916 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1763 end if
1764# 918 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1765#else
1766 call mpi_sendrecv( &
1767 buff_send, buffer_count, mpi_p, dst_proc, send_tag, &
1768 buff_recv, buffer_count, mpi_p, src_proc, recv_tag, &
1769 mpi_comm_world, mpi_status_ignore, ierr)
1770#endif
1771
1772 ! Unpack Received Buffer
1773 call nvtxstartrange("RHS-COMM-UNPACKBUF")
1774# 928 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1775 if (mpi_dir == 1) then
1776# 930 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1777
1778# 930 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1779
1780# 930 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1781#if defined(MFC_OpenACC)
1782# 930 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1783!$acc parallel loop collapse(4) gang vector default(present) private(r)
1784# 930 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1785#elif defined(MFC_OpenMP)
1786# 930 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1787
1788# 930 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1789
1790# 930 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1791
1792# 930 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1793!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(4) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1794# 930 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1795#endif
1796# 930 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1797
1798 do l = 0, p
1799 do k = 0, n
1800 do j = -buff_size, -1
1801 do i = 1, nvar
1802 r = (i - 1) + v_size* &
1803 (j + buff_size*((k + 1) + (n + 1)*l))
1804 q_comm(i)%sf(j + unpack_offset, k, l) = real(buff_recv(r), kind=stp)
1805#if defined(__INTEL_COMPILER)
1806 if (ieee_is_nan(q_comm(i)%sf(j + unpack_offset, k, l))) then
1807 print *, "Error", j, k, l, i
1808 call s_mpi_abort("NaN(s) in recv")
1809 end if
1810#endif
1811 end do
1812 end do
1813 end do
1814 end do
1815
1816# 948 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1817
1818# 948 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1819#if defined(MFC_OpenACC)
1820# 948 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1821!$acc end parallel loop
1822# 948 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1823#elif defined(MFC_OpenMP)
1824# 948 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1825
1826# 948 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1827
1828# 948 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1829!$omp end target teams loop
1830# 948 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1831#endif
1832# 948 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1833
1834
1835 if (qbmm_comm) then
1836
1837# 951 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1838
1839# 951 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1840#if defined(MFC_OpenACC)
1841# 951 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1842!$acc parallel loop collapse(5) gang vector default(present) private(r)
1843# 951 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1844#elif defined(MFC_OpenMP)
1845# 951 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1846
1847# 951 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1848
1849# 951 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1850
1851# 951 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1852!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(5) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1853# 951 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1854#endif
1855# 951 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1856
1857 do l = 0, p
1858 do k = 0, n
1859 do j = -buff_size, -1
1860 do i = nvar + 1, nvar + 4
1861 do q = 1, nb
1862 r = (i - 1) + (q - 1)*4 + v_size* &
1863 (j + buff_size*((k + 1) + (n + 1)*l))
1864 pb_in(j + unpack_offset, k, l, i - nvar, q) = real(buff_recv(r), kind=stp)
1865 end do
1866 end do
1867 end do
1868 end do
1869 end do
1870
1871# 965 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1872
1873# 965 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1874#if defined(MFC_OpenACC)
1875# 965 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1876!$acc end parallel loop
1877# 965 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1878#elif defined(MFC_OpenMP)
1879# 965 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1880
1881# 965 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1882
1883# 965 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1884!$omp end target teams loop
1885# 965 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1886#endif
1887# 965 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1888
1889
1890
1891# 967 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1892
1893# 967 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1894#if defined(MFC_OpenACC)
1895# 967 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1896!$acc parallel loop collapse(5) gang vector default(present) private(r)
1897# 967 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1898#elif defined(MFC_OpenMP)
1899# 967 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1900
1901# 967 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1902
1903# 967 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1904
1905# 967 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1906!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(5) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1907# 967 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1908#endif
1909# 967 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1910
1911 do l = 0, p
1912 do k = 0, n
1913 do j = -buff_size, -1
1914 do i = nvar + 1, nvar + 4
1915 do q = 1, nb
1916 r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
1917 (j + buff_size*((k + 1) + (n + 1)*l))
1918 mv_in(j + unpack_offset, k, l, i - nvar, q) = real(buff_recv(r), kind=stp)
1919 end do
1920 end do
1921 end do
1922 end do
1923 end do
1924
1925# 981 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1926
1927# 981 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1928#if defined(MFC_OpenACC)
1929# 981 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1930!$acc end parallel loop
1931# 981 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1932#elif defined(MFC_OpenMP)
1933# 981 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1934
1935# 981 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1936
1937# 981 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1938!$omp end target teams loop
1939# 981 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1940#endif
1941# 981 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1942
1943 end if
1944# 1102 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1945 end if
1946# 928 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1947 if (mpi_dir == 2) then
1948# 984 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1949
1950# 984 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1951
1952# 984 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1953#if defined(MFC_OpenACC)
1954# 984 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1955!$acc parallel loop collapse(4) gang vector default(present) private(r)
1956# 984 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1957#elif defined(MFC_OpenMP)
1958# 984 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1959
1960# 984 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1961
1962# 984 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1963
1964# 984 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1965!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(4) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1966# 984 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1967#endif
1968# 984 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1969
1970 do i = 1, nvar
1971 do l = 0, p
1972 do k = -buff_size, -1
1973 do j = -buff_size, m + buff_size
1974 r = (i - 1) + v_size* &
1975 ((j + buff_size) + (m + 2*buff_size + 1)* &
1976 ((k + buff_size) + buff_size*l))
1977 q_comm(i)%sf(j, k + unpack_offset, l) = real(buff_recv(r), kind=stp)
1978#if defined(__INTEL_COMPILER)
1979 if (ieee_is_nan(q_comm(i)%sf(j, k + unpack_offset, l))) then
1980 print *, "Error", j, k, l, i
1981 call s_mpi_abort("NaN(s) in recv")
1982 end if
1983#endif
1984 end do
1985 end do
1986 end do
1987 end do
1988
1989# 1003 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1990
1991# 1003 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1992#if defined(MFC_OpenACC)
1993# 1003 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1994!$acc end parallel loop
1995# 1003 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1996#elif defined(MFC_OpenMP)
1997# 1003 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1998
1999# 1003 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2000
2001# 1003 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2002!$omp end target teams loop
2003# 1003 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2004#endif
2005# 1003 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2006
2007
2008 if (qbmm_comm) then
2009
2010# 1006 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2011
2012# 1006 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2013#if defined(MFC_OpenACC)
2014# 1006 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2015!$acc parallel loop collapse(5) gang vector default(present) private(r)
2016# 1006 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2017#elif defined(MFC_OpenMP)
2018# 1006 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2019
2020# 1006 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2021
2022# 1006 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2023
2024# 1006 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2025!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(5) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
2026# 1006 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2027#endif
2028# 1006 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2029
2030 do i = nvar + 1, nvar + 4
2031 do l = 0, p
2032 do k = -buff_size, -1
2033 do j = -buff_size, m + buff_size
2034 do q = 1, nb
2035 r = (i - 1) + (q - 1)*4 + v_size* &
2036 ((j + buff_size) + (m + 2*buff_size + 1)* &
2037 ((k + buff_size) + buff_size*l))
2038 pb_in(j, k + unpack_offset, l, i - nvar, q) = real(buff_recv(r), kind=stp)
2039 end do
2040 end do
2041 end do
2042 end do
2043 end do
2044
2045# 1021 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2046
2047# 1021 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2048#if defined(MFC_OpenACC)
2049# 1021 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2050!$acc end parallel loop
2051# 1021 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2052#elif defined(MFC_OpenMP)
2053# 1021 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2054
2055# 1021 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2056
2057# 1021 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2058!$omp end target teams loop
2059# 1021 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2060#endif
2061# 1021 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2062
2063
2064
2065# 1023 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2066
2067# 1023 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2068#if defined(MFC_OpenACC)
2069# 1023 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2070!$acc parallel loop collapse(5) gang vector default(present) private(r)
2071# 1023 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2072#elif defined(MFC_OpenMP)
2073# 1023 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2074
2075# 1023 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2076
2077# 1023 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2078
2079# 1023 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2080!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(5) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
2081# 1023 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2082#endif
2083# 1023 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2084
2085 do i = nvar + 1, nvar + 4
2086 do l = 0, p
2087 do k = -buff_size, -1
2088 do j = -buff_size, m + buff_size
2089 do q = 1, nb
2090 r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
2091 ((j + buff_size) + (m + 2*buff_size + 1)* &
2092 ((k + buff_size) + buff_size*l))
2093 mv_in(j, k + unpack_offset, l, i - nvar, q) = real(buff_recv(r), kind=stp)
2094 end do
2095 end do
2096 end do
2097 end do
2098 end do
2099
2100# 1038 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2101
2102# 1038 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2103#if defined(MFC_OpenACC)
2104# 1038 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2105!$acc end parallel loop
2106# 1038 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2107#elif defined(MFC_OpenMP)
2108# 1038 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2109
2110# 1038 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2111
2112# 1038 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2113!$omp end target teams loop
2114# 1038 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2115#endif
2116# 1038 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2117
2118 end if
2119# 1102 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2120 end if
2121# 928 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2122 if (mpi_dir == 3) then
2123# 1041 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2124 ! Unpacking buffer from bc_z%beg
2125
2126# 1042 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2127
2128# 1042 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2129#if defined(MFC_OpenACC)
2130# 1042 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2131!$acc parallel loop collapse(4) gang vector default(present) private(r)
2132# 1042 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2133#elif defined(MFC_OpenMP)
2134# 1042 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2135
2136# 1042 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2137
2138# 1042 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2139
2140# 1042 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2141!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(4) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
2142# 1042 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2143#endif
2144# 1042 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2145
2146 do i = 1, nvar
2147 do l = -buff_size, -1
2148 do k = -buff_size, n + buff_size
2149 do j = -buff_size, m + buff_size
2150 r = (i - 1) + v_size* &
2151 ((j + buff_size) + (m + 2*buff_size + 1)* &
2152 ((k + buff_size) + (n + 2*buff_size + 1)* &
2153 (l + buff_size)))
2154 q_comm(i)%sf(j, k, l + unpack_offset) = real(buff_recv(r), kind=stp)
2155#if defined(__INTEL_COMPILER)
2156 if (ieee_is_nan(q_comm(i)%sf(j, k, l + unpack_offset))) then
2157 print *, "Error", j, k, l, i
2158 call s_mpi_abort("NaN(s) in recv")
2159 end if
2160#endif
2161 end do
2162 end do
2163 end do
2164 end do
2165
2166# 1062 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2167
2168# 1062 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2169#if defined(MFC_OpenACC)
2170# 1062 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2171!$acc end parallel loop
2172# 1062 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2173#elif defined(MFC_OpenMP)
2174# 1062 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2175
2176# 1062 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2177
2178# 1062 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2179!$omp end target teams loop
2180# 1062 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2181#endif
2182# 1062 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2183
2184
2185 if (qbmm_comm) then
2186
2187# 1065 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2188
2189# 1065 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2190#if defined(MFC_OpenACC)
2191# 1065 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2192!$acc parallel loop collapse(5) gang vector default(present) private(r)
2193# 1065 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2194#elif defined(MFC_OpenMP)
2195# 1065 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2196
2197# 1065 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2198
2199# 1065 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2200
2201# 1065 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2202!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(5) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
2203# 1065 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2204#endif
2205# 1065 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2206
2207 do i = nvar + 1, nvar + 4
2208 do l = -buff_size, -1
2209 do k = -buff_size, n + buff_size
2210 do j = -buff_size, m + buff_size
2211 do q = 1, nb
2212 r = (i - 1) + (q - 1)*4 + v_size* &
2213 ((j + buff_size) + (m + 2*buff_size + 1)* &
2214 ((k + buff_size) + (n + 2*buff_size + 1)* &
2215 (l + buff_size)))
2216 pb_in(j, k, l + unpack_offset, i - nvar, q) = real(buff_recv(r), kind=stp)
2217 end do
2218 end do
2219 end do
2220 end do
2221 end do
2222
2223# 1081 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2224
2225# 1081 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2226#if defined(MFC_OpenACC)
2227# 1081 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2228!$acc end parallel loop
2229# 1081 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2230#elif defined(MFC_OpenMP)
2231# 1081 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2232
2233# 1081 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2234
2235# 1081 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2236!$omp end target teams loop
2237# 1081 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2238#endif
2239# 1081 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2240
2241
2242
2243# 1083 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2244
2245# 1083 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2246#if defined(MFC_OpenACC)
2247# 1083 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2248!$acc parallel loop collapse(5) gang vector default(present) private(r)
2249# 1083 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2250#elif defined(MFC_OpenMP)
2251# 1083 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2252
2253# 1083 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2254
2255# 1083 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2256
2257# 1083 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2258!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(5) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
2259# 1083 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2260#endif
2261# 1083 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2262
2263 do i = nvar + 1, nvar + 4
2264 do l = -buff_size, -1
2265 do k = -buff_size, n + buff_size
2266 do j = -buff_size, m + buff_size
2267 do q = 1, nb
2268 r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
2269 ((j + buff_size) + (m + 2*buff_size + 1)* &
2270 ((k + buff_size) + (n + 2*buff_size + 1)* &
2271 (l + buff_size)))
2272 mv_in(j, k, l + unpack_offset, i - nvar, q) = real(buff_recv(r), kind=stp)
2273 end do
2274 end do
2275 end do
2276 end do
2277 end do
2278
2279# 1099 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2280
2281# 1099 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2282#if defined(MFC_OpenACC)
2283# 1099 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2284!$acc end parallel loop
2285# 1099 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2286#elif defined(MFC_OpenMP)
2287# 1099 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2288
2289# 1099 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2290
2291# 1099 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2292!$omp end target teams loop
2293# 1099 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2294#endif
2295# 1099 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2296
2297 end if
2298# 1102 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2299 end if
2300# 1104 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2301 call nvtxendrange
2302#endif
2303
2305
2306 !> The purpose of this procedure is to optimally decompose
2307 !! the computational domain among the available processors.
2308 !! This is performed by attempting to award each processor,
2309 !! in each of the coordinate directions, approximately the
2310 !! same number of cells, and then recomputing the affected
2311 !! global parameters.
2313
2314#ifdef MFC_MPI
2315
2316 integer :: num_procs_x, num_procs_y, num_procs_z !<
2317 !! Optimal number of processors in the x-, y- and z-directions
2318
2319 real(wp) :: tmp_num_procs_x, tmp_num_procs_y, tmp_num_procs_z !<
2320 !! Non-optimal number of processors in the x-, y- and z-directions
2321
2322 real(wp) :: fct_min !<
2323 !! Processor factorization (fct) minimization parameter
2324
2325 integer :: MPI_COMM_CART !<
2326 !! Cartesian processor topology communicator
2327
2328 integer :: rem_cells !<
2329 !! Remaining number of cells, in a particular coordinate direction,
2330 !! after the majority is divided up among the available processors
2331
2332 integer :: recon_order !<
2333 !! WENO or MUSCL reconstruction order
2334
2335 integer :: i, j !< Generic loop iterators
2336 integer :: ierr !< Generic flag used to identify and report MPI errors
2337
2338 if (recon_type == weno_type) then
2339 recon_order = weno_order
2340 else
2341 recon_order = muscl_order
2342 end if
2343
2344 if (num_procs == 1 .and. parallel_io) then
2345 do i = 1, num_dims
2346 start_idx(i) = 0
2347 end do
2348 return
2349 end if
2350
2351 if (igr) then
2352 recon_order = igr_order
2353 end if
2354
2355 ! 3D Cartesian Processor Topology
2356 if (n > 0) then
2357
2358 if (p > 0) then
2359 if (fft_wrt) then
2360
2361 ! Initial estimate of optimal processor topology
2362 num_procs_x = 1
2363 num_procs_y = 1
2364 num_procs_z = num_procs
2365 ierr = -1
2366
2367 ! Benchmarking the quality of this initial guess
2368 tmp_num_procs_y = num_procs_y
2369 tmp_num_procs_z = num_procs_z
2370 fct_min = 10._wp*abs((n + 1)/tmp_num_procs_y &
2371 - (p + 1)/tmp_num_procs_z)
2372
2373 ! Optimization of the initial processor topology
2374 do i = 1, num_procs
2375
2376 if (mod(num_procs, i) == 0 &
2377 .and. &
2378 (n + 1)/i >= num_stcls_min*recon_order) then
2379
2380 tmp_num_procs_y = i
2381 tmp_num_procs_z = num_procs/i
2382
2383 if (fct_min >= abs((n + 1)/tmp_num_procs_y &
2384 - (p + 1)/tmp_num_procs_z) &
2385 .and. &
2386 (p + 1)/tmp_num_procs_z &
2387 >= &
2388 num_stcls_min*recon_order) then
2389
2390 num_procs_y = i
2391 num_procs_z = num_procs/i
2392 fct_min = abs((n + 1)/tmp_num_procs_y &
2393 - (p + 1)/tmp_num_procs_z)
2394 ierr = 0
2395
2396 end if
2397
2398 end if
2399
2400 end do
2401 else
2402
2403 if (cyl_coord .and. p > 0) then
2404 ! Implement pencil processor blocking if using cylindrical coordinates so
2405 ! that all cells in azimuthal direction are stored on a single processor.
2406 ! This is necessary for efficient application of Fourier filter near axis.
2407
2408 ! Initial values of the processor factorization optimization
2409 num_procs_x = 1
2410 num_procs_y = num_procs
2411 num_procs_z = 1
2412 ierr = -1
2413
2414 ! Computing minimization variable for these initial values
2415 tmp_num_procs_x = num_procs_x
2416 tmp_num_procs_y = num_procs_y
2417 tmp_num_procs_z = num_procs_z
2418 fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
2419 - (n + 1)/tmp_num_procs_y)
2420
2421 ! Searching for optimal computational domain distribution
2422 do i = 1, num_procs
2423
2424 if (mod(num_procs, i) == 0 &
2425 .and. &
2426 (m + 1)/i >= num_stcls_min*recon_order) then
2427
2428 tmp_num_procs_x = i
2429 tmp_num_procs_y = num_procs/i
2430
2431 if (fct_min >= abs((m + 1)/tmp_num_procs_x &
2432 - (n + 1)/tmp_num_procs_y) &
2433 .and. &
2434 (n + 1)/tmp_num_procs_y &
2435 >= &
2436 num_stcls_min*recon_order) then
2437
2438 num_procs_x = i
2439 num_procs_y = num_procs/i
2440 fct_min = abs((m + 1)/tmp_num_procs_x &
2441 - (n + 1)/tmp_num_procs_y)
2442 ierr = 0
2443
2444 end if
2445
2446 end if
2447
2448 end do
2449
2450 else
2451
2452 ! Initial estimate of optimal processor topology
2453 num_procs_x = 1
2454 num_procs_y = 1
2455 num_procs_z = num_procs
2456 ierr = -1
2457
2458 ! Benchmarking the quality of this initial guess
2459 tmp_num_procs_x = num_procs_x
2460 tmp_num_procs_y = num_procs_y
2461 tmp_num_procs_z = num_procs_z
2462 fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
2463 - (n + 1)/tmp_num_procs_y) &
2464 + 10._wp*abs((n + 1)/tmp_num_procs_y &
2465 - (p + 1)/tmp_num_procs_z)
2466
2467 ! Optimization of the initial processor topology
2468 do i = 1, num_procs
2469
2470 if (mod(num_procs, i) == 0 &
2471 .and. &
2472 (m + 1)/i >= num_stcls_min*recon_order) then
2473
2474 do j = 1, num_procs/i
2475
2476 if (mod(num_procs/i, j) == 0 &
2477 .and. &
2478 (n + 1)/j >= num_stcls_min*recon_order) then
2479
2480 tmp_num_procs_x = i
2481 tmp_num_procs_y = j
2482 tmp_num_procs_z = num_procs/(i*j)
2483
2484 if (fct_min >= abs((m + 1)/tmp_num_procs_x &
2485 - (n + 1)/tmp_num_procs_y) &
2486 + abs((n + 1)/tmp_num_procs_y &
2487 - (p + 1)/tmp_num_procs_z) &
2488 .and. &
2489 (p + 1)/tmp_num_procs_z &
2490 >= &
2491 num_stcls_min*recon_order) &
2492 then
2493
2494 num_procs_x = i
2495 num_procs_y = j
2496 num_procs_z = num_procs/(i*j)
2497 fct_min = abs((m + 1)/tmp_num_procs_x &
2498 - (n + 1)/tmp_num_procs_y) &
2499 + abs((n + 1)/tmp_num_procs_y &
2500 - (p + 1)/tmp_num_procs_z)
2501 ierr = 0
2502
2503 end if
2504
2505 end if
2506
2507 end do
2508
2509 end if
2510
2511 end do
2512
2513 end if
2514 end if
2515
2516 ! Verifying that a valid decomposition of the computational
2517 ! domain has been established. If not, the simulation exits.
2518 if (proc_rank == 0 .and. ierr == -1) then
2519 call s_mpi_abort('Unsupported combination of values '// &
2520 'of num_procs, m, n, p and '// &
2521 'weno/muscl/igr_order. Exiting.')
2522 end if
2523
2524 ! Creating new communicator using the Cartesian topology
2525 call mpi_cart_create(mpi_comm_world, 3, (/num_procs_x, &
2526 num_procs_y, num_procs_z/), &
2527 (/.true., .true., .true./), &
2528 .false., mpi_comm_cart, ierr)
2529
2530 ! Finding the Cartesian coordinates of the local process
2531 call mpi_cart_coords(mpi_comm_cart, proc_rank, 3, &
2532 proc_coords, ierr)
2533 ! END: 3D Cartesian Processor Topology
2534
2535 ! Global Parameters for z-direction
2536
2537 ! Number of remaining cells
2538 rem_cells = mod(p + 1, num_procs_z)
2539
2540 ! Optimal number of cells per processor
2541 p = (p + 1)/num_procs_z - 1
2542
2543 ! Distributing the remaining cells
2544 do i = 1, rem_cells
2545 if (proc_coords(3) == i - 1) then
2546 p = p + 1; exit
2547 end if
2548 end do
2549
2550 ! Boundary condition at the beginning
2551 if (proc_coords(3) > 0 .or. (bc_z%beg == bc_periodic .and. num_procs_z > 1)) then
2552 proc_coords(3) = proc_coords(3) - 1
2553 call mpi_cart_rank(mpi_comm_cart, proc_coords, &
2554 bc_z%beg, ierr)
2555 proc_coords(3) = proc_coords(3) + 1
2556 end if
2557
2558 ! Boundary condition at the end
2559 if (proc_coords(3) < num_procs_z - 1 .or. (bc_z%end == bc_periodic .and. num_procs_z > 1)) then
2560 proc_coords(3) = proc_coords(3) + 1
2561 call mpi_cart_rank(mpi_comm_cart, proc_coords, &
2562 bc_z%end, ierr)
2563 proc_coords(3) = proc_coords(3) - 1
2564 end if
2565
2566#ifdef MFC_POST_PROCESS
2567 ! Ghost zone at the beginning
2568 if (proc_coords(3) > 0 .and. format == 1) then
2569 offset_z%beg = 2
2570 else
2571 offset_z%beg = 0
2572 end if
2573
2574 ! Ghost zone at the end
2575 if (proc_coords(3) < num_procs_z - 1 .and. format == 1) then
2576 offset_z%end = 2
2577 else
2578 offset_z%end = 0
2579 end if
2580#endif
2581
2582 ! Beginning and end sub-domain boundary locations
2583 if (parallel_io) then
2584 if (proc_coords(3) < rem_cells) then
2585 start_idx(3) = (p + 1)*proc_coords(3)
2586 else
2587 start_idx(3) = (p + 1)*proc_coords(3) + rem_cells
2588 end if
2589 else
2590#ifdef MFC_PRE_PROCESS
2591 if (old_grid .neqv. .true.) then
2592 dz = (z_domain%end - z_domain%beg)/real(p_glb + 1, wp)
2593
2594 if (proc_coords(3) < rem_cells) then
2595 z_domain%beg = z_domain%beg + dz*real((p + 1)* &
2596 proc_coords(3))
2597 z_domain%end = z_domain%end - dz*real((p + 1)* &
2598 (num_procs_z - proc_coords(3) - 1) &
2599 - (num_procs_z - rem_cells))
2600 else
2601 z_domain%beg = z_domain%beg + dz*real((p + 1)* &
2602 proc_coords(3) + rem_cells)
2603 z_domain%end = z_domain%end - dz*real((p + 1)* &
2604 (num_procs_z - proc_coords(3) - 1))
2605 end if
2606 end if
2607#endif
2608 end if
2609
2610 ! 2D Cartesian Processor Topology
2611 else
2612
2613 ! Initial estimate of optimal processor topology
2614 num_procs_x = 1
2615 num_procs_y = num_procs
2616 ierr = -1
2617
2618 ! Benchmarking the quality of this initial guess
2619 tmp_num_procs_x = num_procs_x
2620 tmp_num_procs_y = num_procs_y
2621 fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
2622 - (n + 1)/tmp_num_procs_y)
2623
2624 ! Optimization of the initial processor topology
2625 do i = 1, num_procs
2626
2627 if (mod(num_procs, i) == 0 &
2628 .and. &
2629 (m + 1)/i >= num_stcls_min*recon_order) then
2630
2631 tmp_num_procs_x = i
2632 tmp_num_procs_y = num_procs/i
2633
2634 if (fct_min >= abs((m + 1)/tmp_num_procs_x &
2635 - (n + 1)/tmp_num_procs_y) &
2636 .and. &
2637 (n + 1)/tmp_num_procs_y &
2638 >= &
2639 num_stcls_min*recon_order) then
2640
2641 num_procs_x = i
2642 num_procs_y = num_procs/i
2643 fct_min = abs((m + 1)/tmp_num_procs_x &
2644 - (n + 1)/tmp_num_procs_y)
2645 ierr = 0
2646
2647 end if
2648
2649 end if
2650
2651 end do
2652
2653 ! Verifying that a valid decomposition of the computational
2654 ! domain has been established. If not, the simulation exits.
2655 if (proc_rank == 0 .and. ierr == -1) then
2656 call s_mpi_abort('Unsupported combination of values '// &
2657 'of num_procs, m, n and '// &
2658 'weno/muscl/igr_order. Exiting.')
2659 end if
2660
2661 ! Creating new communicator using the Cartesian topology
2662 call mpi_cart_create(mpi_comm_world, 2, (/num_procs_x, &
2663 num_procs_y/), (/.true., &
2664 .true./), .false., mpi_comm_cart, &
2665 ierr)
2666
2667 ! Finding the Cartesian coordinates of the local process
2668 call mpi_cart_coords(mpi_comm_cart, proc_rank, 2, &
2669 proc_coords, ierr)
2670
2671 end if
2672 ! END: 2D Cartesian Processor Topology
2673
2674 ! Global Parameters for y-direction
2675
2676 ! Number of remaining cells
2677 rem_cells = mod(n + 1, num_procs_y)
2678
2679 ! Optimal number of cells per processor
2680 n = (n + 1)/num_procs_y - 1
2681
2682 ! Distributing the remaining cells
2683 do i = 1, rem_cells
2684 if (proc_coords(2) == i - 1) then
2685 n = n + 1; exit
2686 end if
2687 end do
2688
2689 ! Boundary condition at the beginning
2690 if (proc_coords(2) > 0 .or. (bc_y%beg == bc_periodic .and. num_procs_y > 1)) then
2691 proc_coords(2) = proc_coords(2) - 1
2692 call mpi_cart_rank(mpi_comm_cart, proc_coords, &
2693 bc_y%beg, ierr)
2694 proc_coords(2) = proc_coords(2) + 1
2695 end if
2696
2697 ! Boundary condition at the end
2698 if (proc_coords(2) < num_procs_y - 1 .or. (bc_y%end == bc_periodic .and. num_procs_y > 1)) then
2699 proc_coords(2) = proc_coords(2) + 1
2700 call mpi_cart_rank(mpi_comm_cart, proc_coords, &
2701 bc_y%end, ierr)
2702 proc_coords(2) = proc_coords(2) - 1
2703 end if
2704
2705#ifdef MFC_POST_PROCESS
2706 ! Ghost zone at the beginning
2707 if (proc_coords(2) > 0 .and. format == 1) then
2708 offset_y%beg = 2
2709 else
2710 offset_y%beg = 0
2711 end if
2712
2713 ! Ghost zone at the end
2714 if (proc_coords(2) < num_procs_y - 1 .and. format == 1) then
2715 offset_y%end = 2
2716 else
2717 offset_y%end = 0
2718 end if
2719#endif
2720
2721 ! Beginning and end sub-domain boundary locations
2722 if (parallel_io) then
2723 if (proc_coords(2) < rem_cells) then
2724 start_idx(2) = (n + 1)*proc_coords(2)
2725 else
2726 start_idx(2) = (n + 1)*proc_coords(2) + rem_cells
2727 end if
2728 else
2729#ifdef MFC_PRE_PROCESS
2730 if (old_grid .neqv. .true.) then
2731 dy = (y_domain%end - y_domain%beg)/real(n_glb + 1, wp)
2732
2733 if (proc_coords(2) < rem_cells) then
2734 y_domain%beg = y_domain%beg + dy*real((n + 1)* &
2735 proc_coords(2))
2736 y_domain%end = y_domain%end - dy*real((n + 1)* &
2737 (num_procs_y - proc_coords(2) - 1) &
2738 - (num_procs_y - rem_cells))
2739 else
2740 y_domain%beg = y_domain%beg + dy*real((n + 1)* &
2741 proc_coords(2) + rem_cells)
2742 y_domain%end = y_domain%end - dy*real((n + 1)* &
2743 (num_procs_y - proc_coords(2) - 1))
2744 end if
2745 end if
2746#endif
2747 end if
2748
2749 ! 1D Cartesian Processor Topology
2750 else
2751
2752 ! Optimal processor topology
2753 num_procs_x = num_procs
2754
2755 ! Creating new communicator using the Cartesian topology
2756 call mpi_cart_create(mpi_comm_world, 1, (/num_procs_x/), &
2757 (/.true./), .false., mpi_comm_cart, &
2758 ierr)
2759
2760 ! Finding the Cartesian coordinates of the local process
2761 call mpi_cart_coords(mpi_comm_cart, proc_rank, 1, &
2762 proc_coords, ierr)
2763
2764 end if
2765
2766 ! Global Parameters for x-direction
2767
2768 ! Number of remaining cells
2769 rem_cells = mod(m + 1, num_procs_x)
2770
2771 ! Optimal number of cells per processor
2772 m = (m + 1)/num_procs_x - 1
2773
2774 ! Distributing the remaining cells
2775 do i = 1, rem_cells
2776 if (proc_coords(1) == i - 1) then
2777 m = m + 1; exit
2778 end if
2779 end do
2780
2781 call s_update_cell_bounds(cells_bounds, m, n, p)
2782
2783 ! Boundary condition at the beginning
2784 if (proc_coords(1) > 0 .or. (bc_x%beg == bc_periodic .and. num_procs_x > 1)) then
2785 proc_coords(1) = proc_coords(1) - 1
2786 call mpi_cart_rank(mpi_comm_cart, proc_coords, bc_x%beg, ierr)
2787 proc_coords(1) = proc_coords(1) + 1
2788 end if
2789
2790 ! Boundary condition at the end
2791 if (proc_coords(1) < num_procs_x - 1 .or. (bc_x%end == bc_periodic .and. num_procs_x > 1)) then
2792 proc_coords(1) = proc_coords(1) + 1
2793 call mpi_cart_rank(mpi_comm_cart, proc_coords, bc_x%end, ierr)
2794 proc_coords(1) = proc_coords(1) - 1
2795 end if
2796
2797#ifdef MFC_POST_PROCESS
2798 ! Ghost zone at the beginning
2799 if (proc_coords(1) > 0 .and. format == 1) then
2800 offset_x%beg = 2
2801 else
2802 offset_x%beg = 0
2803 end if
2804
2805 ! Ghost zone at the end
2806 if (proc_coords(1) < num_procs_x - 1 .and. format == 1) then
2807 offset_x%end = 2
2808 else
2809 offset_x%end = 0
2810 end if
2811#endif
2812
2813 ! Beginning and end sub-domain boundary locations
2814 if (parallel_io) then
2815 if (proc_coords(1) < rem_cells) then
2816 start_idx(1) = (m + 1)*proc_coords(1)
2817 else
2818 start_idx(1) = (m + 1)*proc_coords(1) + rem_cells
2819 end if
2820 else
2821#ifdef MFC_PRE_PROCESS
2822 if (old_grid .neqv. .true.) then
2823 dx = (x_domain%end - x_domain%beg)/real(m_glb + 1, wp)
2824
2825 if (proc_coords(1) < rem_cells) then
2826 x_domain%beg = x_domain%beg + dx*real((m + 1)* &
2827 proc_coords(1))
2828 x_domain%end = x_domain%end - dx*real((m + 1)* &
2829 (num_procs_x - proc_coords(1) - 1) &
2830 - (num_procs_x - rem_cells))
2831 else
2832 x_domain%beg = x_domain%beg + dx*real((m + 1)* &
2833 proc_coords(1) + rem_cells)
2834 x_domain%end = x_domain%end - dx*real((m + 1)* &
2835 (num_procs_x - proc_coords(1) - 1))
2836 end if
2837 end if
2838#endif
2839 end if
2840#endif
2841
2843
2844 !> The goal of this procedure is to populate the buffers of
2845 !! the grid variables by communicating with the neighboring
2846 !! processors. Note that only the buffers of the cell-width
2847 !! distributions are handled in such a way. This is because
2848 !! the buffers of cell-boundary locations may be calculated
2849 !! directly from those of the cell-width distributions.
2850 !! @param mpi_dir MPI communication coordinate direction
2851 !! @param pbc_loc Processor boundary condition (PBC) location
2852#ifndef MFC_PRE_PROCESS
2853 subroutine s_mpi_sendrecv_grid_variables_buffers(mpi_dir, pbc_loc)
2854
2855 integer, intent(in) :: mpi_dir
2856 integer, intent(in) :: pbc_loc
2857
2858#ifdef MFC_MPI
2859 integer :: ierr !< Generic flag used to identify and report MPI errors
2860
2861 ! MPI Communication in x-direction
2862 if (mpi_dir == 1) then
2863
2864 if (pbc_loc == -1) then ! PBC at the beginning
2865
2866 if (bc_x%end >= 0) then ! PBC at the beginning and end
2867
2868 ! Send/receive buffer to/from bc_x%end/bc_x%beg
2869 call mpi_sendrecv( &
2870 dx(m - buff_size + 1), buff_size, &
2871 mpi_p, bc_x%end, 0, &
2872 dx(-buff_size), buff_size, &
2873 mpi_p, bc_x%beg, 0, &
2874 mpi_comm_world, mpi_status_ignore, ierr)
2875
2876 else ! PBC at the beginning only
2877
2878 ! Send/receive buffer to/from bc_x%beg/bc_x%beg
2879 call mpi_sendrecv( &
2880 dx(0), buff_size, &
2881 mpi_p, bc_x%beg, 1, &
2882 dx(-buff_size), buff_size, &
2883 mpi_p, bc_x%beg, 0, &
2884 mpi_comm_world, mpi_status_ignore, ierr)
2885
2886 end if
2887
2888 else ! PBC at the end
2889
2890 if (bc_x%beg >= 0) then ! PBC at the end and beginning
2891
2892 ! Send/receive buffer to/from bc_x%beg/bc_x%end
2893 call mpi_sendrecv( &
2894 dx(0), buff_size, &
2895 mpi_p, bc_x%beg, 1, &
2896 dx(m + 1), buff_size, &
2897 mpi_p, bc_x%end, 1, &
2898 mpi_comm_world, mpi_status_ignore, ierr)
2899
2900 else ! PBC at the end only
2901
2902 ! Send/receive buffer to/from bc_x%end/bc_x%end
2903 call mpi_sendrecv( &
2904 dx(m - buff_size + 1), buff_size, &
2905 mpi_p, bc_x%end, 0, &
2906 dx(m + 1), buff_size, &
2907 mpi_p, bc_x%end, 1, &
2908 mpi_comm_world, mpi_status_ignore, ierr)
2909
2910 end if
2911
2912 end if
2913 ! END: MPI Communication in x-direction
2914
2915 ! MPI Communication in y-direction
2916 elseif (mpi_dir == 2) then
2917
2918 if (pbc_loc == -1) then ! PBC at the beginning
2919
2920 if (bc_y%end >= 0) then ! PBC at the beginning and end
2921
2922 ! Send/receive buffer to/from bc_y%end/bc_y%beg
2923 call mpi_sendrecv( &
2924 dy(n - buff_size + 1), buff_size, &
2925 mpi_p, bc_y%end, 0, &
2926 dy(-buff_size), buff_size, &
2927 mpi_p, bc_y%beg, 0, &
2928 mpi_comm_world, mpi_status_ignore, ierr)
2929
2930 else ! PBC at the beginning only
2931
2932 ! Send/receive buffer to/from bc_y%beg/bc_y%beg
2933 call mpi_sendrecv( &
2934 dy(0), buff_size, &
2935 mpi_p, bc_y%beg, 1, &
2936 dy(-buff_size), buff_size, &
2937 mpi_p, bc_y%beg, 0, &
2938 mpi_comm_world, mpi_status_ignore, ierr)
2939
2940 end if
2941
2942 else ! PBC at the end
2943
2944 if (bc_y%beg >= 0) then ! PBC at the end and beginning
2945
2946 ! Send/receive buffer to/from bc_y%beg/bc_y%end
2947 call mpi_sendrecv( &
2948 dy(0), buff_size, &
2949 mpi_p, bc_y%beg, 1, &
2950 dy(n + 1), buff_size, &
2951 mpi_p, bc_y%end, 1, &
2952 mpi_comm_world, mpi_status_ignore, ierr)
2953
2954 else ! PBC at the end only
2955
2956 ! Send/receive buffer to/from bc_y%end/bc_y%end
2957 call mpi_sendrecv( &
2958 dy(n - buff_size + 1), buff_size, &
2959 mpi_p, bc_y%end, 0, &
2960 dy(n + 1), buff_size, &
2961 mpi_p, bc_y%end, 1, &
2962 mpi_comm_world, mpi_status_ignore, ierr)
2963
2964 end if
2965
2966 end if
2967 ! END: MPI Communication in y-direction
2968
2969 ! MPI Communication in z-direction
2970 else
2971
2972 if (pbc_loc == -1) then ! PBC at the beginning
2973
2974 if (bc_z%end >= 0) then ! PBC at the beginning and end
2975
2976 ! Send/receive buffer to/from bc_z%end/bc_z%beg
2977 call mpi_sendrecv( &
2978 dz(p - buff_size + 1), buff_size, &
2979 mpi_p, bc_z%end, 0, &
2980 dz(-buff_size), buff_size, &
2981 mpi_p, bc_z%beg, 0, &
2982 mpi_comm_world, mpi_status_ignore, ierr)
2983
2984 else ! PBC at the beginning only
2985
2986 ! Send/receive buffer to/from bc_z%beg/bc_z%beg
2987 call mpi_sendrecv( &
2988 dz(0), buff_size, &
2989 mpi_p, bc_z%beg, 1, &
2990 dz(-buff_size), buff_size, &
2991 mpi_p, bc_z%beg, 0, &
2992 mpi_comm_world, mpi_status_ignore, ierr)
2993
2994 end if
2995
2996 else ! PBC at the end
2997
2998 if (bc_z%beg >= 0) then ! PBC at the end and beginning
2999
3000 ! Send/receive buffer to/from bc_z%beg/bc_z%end
3001 call mpi_sendrecv( &
3002 dz(0), buff_size, &
3003 mpi_p, bc_z%beg, 1, &
3004 dz(p + 1), buff_size, &
3005 mpi_p, bc_z%end, 1, &
3006 mpi_comm_world, mpi_status_ignore, ierr)
3007
3008 else ! PBC at the end only
3009
3010 ! Send/receive buffer to/from bc_z%end/bc_z%end
3011 call mpi_sendrecv( &
3012 dz(p - buff_size + 1), buff_size, &
3013 mpi_p, bc_z%end, 0, &
3014 dz(p + 1), buff_size, &
3015 mpi_p, bc_z%end, 1, &
3016 mpi_comm_world, mpi_status_ignore, ierr)
3017
3018 end if
3019
3020 end if
3021
3022 end if
3023 ! END: MPI Communication in z-direction
3024#endif
3025
3027#endif
3028
3029 !> Module deallocation and/or disassociation procedures
3031
3032#ifdef MFC_MPI
3033 deallocate (buff_send, buff_recv)
3034#endif
3035
3036 end subroutine s_finalize_mpi_common_module
3037
3038end module m_mpi_common
type(scalar_field), dimension(sys_size), intent(inout) q_cons_vf
integer, intent(in) j
Shared derived types for field data, patch geometry, bubble dynamics, and MPI I/O structures.
Global parameters for the computational domain, fluid properties, and simulation algorithm configurat...
integer sys_size
Number of unknowns in system of eqns.
integer buff_size
The number of cells that are necessary to be able to store enough boundary conditions data to march t...
logical polytropic
Polytropic switch.
logical qbmm
Quadrature moment method.
type(cell_num_bounds) cells_bounds
integer nb
Number of eq. bubble sizes.
Utility routines for bubble model setup, coordinate transforms, array sampling, and special functions...
MPI communication layer: domain decomposition, halo exchange, reductions, and parallel I/O setup.
impure subroutine s_mpi_abort(prnt, code)
The subroutine terminates the MPI execution environment.
impure subroutine s_initialize_mpi_common_module
The computation of parameters, the allocation of memory, the association of pointers and/or the execu...
impure subroutine s_mpi_gather_data(my_vector, counts, gathered_vector, root)
Gathers variable-length real vectors from all MPI ranks onto the root process.
impure subroutine s_mpi_barrier
Halts all processes until all have reached barrier.
impure subroutine s_mpi_initialize
The subroutine initializes the MPI execution environment and queries both the number of processors wh...
impure subroutine s_mpi_allreduce_vectors_sum(var_loc, var_glb, num_vectors, vector_length)
This subroutine follows the behavior of the s_mpi_allreduce_sum subroutine with the additional featur...
real(wp), dimension(:), allocatable, private buff_recv
buff_recv is utilized to receive and unpack the buffer of the cell- average primitive variables,...
impure subroutine s_initialize_mpi_data(q_cons_vf, ib_markers, beta)
impure subroutine s_mpi_reduce_maxloc(var_loc)
The following subroutine takes the first element of the 2-element inputted variable and determines it...
subroutine s_mpi_sendrecv_grid_variables_buffers(mpi_dir, pbc_loc)
The goal of this procedure is to populate the buffers of the grid variables by communicating with the...
impure subroutine s_mpi_reduce_stability_criteria_extrema(icfl_max_loc, vcfl_max_loc, rc_min_loc, icfl_max_glb, vcfl_max_glb, rc_min_glb)
The goal of this subroutine is to determine the global extrema of the stability criteria in the compu...
impure subroutine s_mpi_allreduce_sum(var_loc, var_glb)
The following subroutine takes the input local variable from all processors and reduces to the sum of...
real(wp), dimension(:), allocatable, private buff_send
This variable is utilized to pack and send the buffer of the cell-average primitive variables,...
impure subroutine s_mpi_allreduce_min(var_loc, var_glb)
The following subroutine takes the input local variable from all processors and reduces to the minimu...
subroutine s_mpi_sendrecv_variables_buffers(q_comm, mpi_dir, pbc_loc, nvar, pb_in, mv_in)
The goal of this procedure is to populate the buffers of the cell-average conservative variables by c...
impure subroutine s_prohibit_abort(condition, message)
Prints a case file error with the prohibited condition and message, then aborts execution.
impure subroutine s_mpi_finalize
The subroutine finalizes the MPI execution environment.
subroutine s_initialize_mpi_data_ds(q_cons_vf)
impure subroutine s_mpi_allreduce_max(var_loc, var_glb)
The following subroutine takes the input local variable from all processors and reduces to the maximu...
impure subroutine s_mpi_allreduce_integer_sum(var_loc, var_glb)
The following subroutine takes the input local variable from all processors and reduces to the sum of...
integer, private v_size
impure subroutine mpi_bcast_time_step_values(proc_time, time_avg)
Gathers per-rank time step wall-clock times onto rank 0 for performance reporting.
impure subroutine s_mpi_reduce_min(var_loc)
The following subroutine takes the inputted variable and determines its minimum value on the entire c...
impure subroutine s_finalize_mpi_common_module
Module deallocation and/or disassociation procedures.
integer(kind=8) halo_size
subroutine s_mpi_decompose_computational_domain
The purpose of this procedure is to optimally decompose the computational domain among the available ...
NVIDIA NVTX profiling API bindings for GPU performance instrumentation.
Definition m_nvtx.f90:6