MFC
Exascale flow solver
Loading...
Searching...
No Matches
m_mpi_common.fpp.f90
Go to the documentation of this file.
1# 1 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2!>
3!! @file
4!! @brief Contains module m_mpi_common
5
6# 1 "/home/runner/work/MFC/MFC/src/common/include/case.fpp" 1
7! This file exists so that Fypp can be run without generating case.fpp files for
8! each target. This is useful when generating documentation, for example. This
9! should also let MFC be built with CMake directly, without invoking mfc.sh.
10
11! For pre-process.
12# 8 "/home/runner/work/MFC/MFC/src/common/include/case.fpp"
13
14! For moving immersed boundaries in simulation
15# 12 "/home/runner/work/MFC/MFC/src/common/include/case.fpp"
16# 6 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp" 2
17# 1 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp" 1
18# 1 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp" 1
19# 1 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp" 1
20# 2 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
21# 3 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
22# 4 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
23# 5 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
24# 6 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
25
26# 8 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
27# 9 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
28# 10 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
29
30# 17 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
31
32# 46 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
33
34# 58 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
35
36# 68 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
37
38# 98 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
39
40# 110 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
41
42# 120 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
43
44# 145 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
45! New line at end of file is required for FYPP
46# 2 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp" 2
47# 1 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp" 1
48# 1 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp" 1
49# 2 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
50# 3 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
51# 4 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
52# 5 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
53# 6 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
54
55# 8 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
56# 9 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
57# 10 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
58
59# 17 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
60
61# 46 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
62
63# 58 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
64
65# 68 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
66
67# 98 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
68
69# 110 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
70
71# 120 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
72
73# 145 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
74! New line at end of file is required for FYPP
75# 2 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp" 2
76
77# 4 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
78# 5 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
79# 6 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
80# 7 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
81# 8 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
82
83# 20 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
84
85# 43 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
86
87# 48 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
88
89# 53 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
90
91# 58 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
92
93# 63 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
94
95# 68 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
96
97# 76 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
98
99# 81 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
100
101# 86 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
102
103# 91 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
104
105# 96 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
106
107# 101 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
108
109# 106 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
110
111# 111 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
112
113# 116 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
114
115# 121 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
116
117# 151 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
118
119# 192 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
120
121# 206 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
122
123# 231 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
124
125# 242 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
126
127# 244 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
128# 255 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
129
130# 284 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
131
132# 294 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
133
134# 304 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
135
136# 313 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
137
138# 330 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
139
140# 340 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
141
142# 347 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
143
144# 353 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
145
146# 359 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
147
148# 365 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
149
150# 371 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
151
152# 377 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
153! New line at end of file is required for FYPP
154# 3 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp" 2
155# 1 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp" 1
156# 1 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp" 1
157# 2 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
158# 3 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
159# 4 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
160# 5 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
161# 6 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
162
163# 8 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
164# 9 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
165# 10 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
166
167# 17 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
168
169# 46 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
170
171# 58 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
172
173# 68 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
174
175# 98 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
176
177# 110 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
178
179# 120 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
180
181# 145 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
182! New line at end of file is required for FYPP
183# 2 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp" 2
184
185# 7 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
186
187# 17 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
188
189# 22 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
190
191# 27 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
192
193# 32 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
194
195# 37 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
196
197# 42 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
198
199# 47 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
200
201# 52 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
202
203# 57 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
204
205# 62 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
206
207# 73 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
208
209# 78 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
210
211# 83 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
212
213# 88 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
214
215# 103 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
216
217# 131 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
218
219# 160 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
220
221# 175 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
222
223# 193 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
224
225# 215 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
226
227# 244 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
228
229# 259 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
230
231# 269 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
232
233# 278 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
234
235# 294 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
236
237# 304 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
238
239# 311 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
240! New line at end of file is required for FYPP
241# 4 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp" 2
242
243! GPU parallel region (scalar reductions, maxval/minval)
244# 23 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
245
246! GPU parallel loop over threads (most common GPU macro)
247# 43 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
248
249! Required closing for GPU_PARALLEL_LOOP
250# 55 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
251
252! Mark routine for device compilation
253# 112 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
254
255! Declare device-resident data
256# 130 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
257
258! Inner loop within a GPU parallel region
259# 145 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
260
261! Scoped GPU data region
262# 164 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
263
264! Host code with device pointers (for MPI with GPU buffers)
265# 193 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
266
267! Allocate device memory (unscoped)
268# 207 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
269
270! Free device memory
271# 219 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
272
273! Atomic operation on device
274# 231 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
275
276! End atomic capture block
277# 242 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
278
279! Copy data between host and device
280# 254 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
281
282! Synchronization barrier
283# 266 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
284
285! Import GPU library module (openacc or omp_lib)
286# 275 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
287
288! Emit code only for AMD compiler
289# 282 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
290
291! Emit code for non-Cray compilers
292# 289 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
293
294! Emit code only for Cray compiler
295# 296 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
296
297! Emit code for non-NVIDIA compilers
298# 303 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
299
300# 305 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
301# 306 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
302! New line at end of file is required for FYPP
303# 2 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp" 2
304
305# 14 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
306
307! Caution: This macro requires the use of a binding script to set CUDA_VISIBLE_DEVICES, such that we have one GPU device per MPI
308! rank. That's because for both cudaMemAdvise (preferred location) and cudaMemPrefetchAsync we use location = device_id = 0. For an
309! example see misc/nvidia_uvm/bind.sh. NVIDIA unified memory page placement hint
310# 57 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
311
312! Allocate and create GPU device memory
313# 77 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
314
315! Free GPU device memory and deallocate
316# 85 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
317
318! Cray-specific GPU pointer setup for vector fields
319# 109 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
320
321! Cray-specific GPU pointer setup for scalar fields
322# 125 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
323
324! Cray-specific GPU pointer setup for acoustic source spatials
325# 150 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
326
327# 156 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
328
329# 163 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
330! New line at end of file is required for FYPP
331# 7 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp" 2
332
333!> @brief MPI communication layer: domain decomposition, halo exchange, reductions, and parallel I/O setup
335
336#ifdef MFC_MPI
337 use mpi !< message passing interface (mpi) module
338#endif
339
342 use m_helper
343 use ieee_arithmetic
344 use m_nvtx
346
347 implicit none
348
349 integer, private :: v_size
350
351# 25 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
352#if defined(MFC_OpenACC)
353# 25 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
354!$acc declare create(v_size)
355# 25 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
356#elif defined(MFC_OpenMP)
357# 25 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
358!$omp declare target (v_size)
359# 25 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
360#endif
361
362 real(wp), private, allocatable, dimension(:) :: buff_send !< Primitive variable send buffer for halo exchange
363 real(wp), private, allocatable, dimension(:) :: buff_recv !< Primitive variable receive buffer for halo exchange
364#ifndef __NVCOMPILER_GPU_UNIFIED_MEM
365
366# 30 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
367#if defined(MFC_OpenACC)
368# 30 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
369!$acc declare create(buff_send, buff_recv)
370# 30 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
371#elif defined(MFC_OpenMP)
372# 30 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
373!$omp declare target (buff_send, buff_recv)
374# 30 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
375#endif
376#endif
377
378 integer(kind=8) :: halo_size
379
380# 34 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
381#if defined(MFC_OpenACC)
382# 34 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
383!$acc declare create(halo_size)
384# 34 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
385#elif defined(MFC_OpenMP)
386# 34 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
387!$omp declare target (halo_size)
388# 34 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
389#endif
390
391contains
392
393 !> Initialize the module.
395
396#ifdef MFC_MPI
397 ! Allocating buff_send/recv and. Please note that for the sake of simplicity, both variables are provided sufficient storage
398 ! to hold the largest buffer in the computational domain.
399
400 if (qbmm .and. .not. polytropic) then
401 v_size = sys_size + 2*nb*nnode
402 else if (chemistry .and. chem_params%diffusion) then
403 v_size = sys_size + 1
404 else
405 v_size = sys_size
406 end if
407
408 if (n > 0) then
409 if (p > 0) then
410 halo_size = nint(-1._wp + 1._wp*buff_size*(v_size)*(m + 2*buff_size + 1)*(n + 2*buff_size + 1)*(p + 2*buff_size &
411 & + 1)/(cells_bounds%mnp_min + 2*buff_size + 1))
412 else
413 halo_size = -1 + buff_size*(v_size)*(cells_bounds%mn_max + 2*buff_size + 1)
414 end if
415 else
416 halo_size = -1 + buff_size*(v_size)
417 end if
418
419
420# 64 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
421#if defined(MFC_OpenACC)
422# 64 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
423!$acc update device(halo_size, v_size)
424# 64 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
425#elif defined(MFC_OpenMP)
426# 64 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
427!$omp target update to(halo_size, v_size)
428# 64 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
429#endif
430
431#ifndef __NVCOMPILER_GPU_UNIFIED_MEM
432#ifdef MFC_DEBUG
433# 67 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
434 block
435# 67 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
436 use iso_fortran_env, only: output_unit
437# 67 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
438
439# 67 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
440 print *, 'm_mpi_common.fpp:67: ', '@:ALLOCATE(buff_send(0:halo_size), buff_recv(0:halo_size))'
441# 67 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
442
443# 67 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
444 call flush (output_unit)
445# 67 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
446 end block
447# 67 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
448#endif
449# 67 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
450 allocate (buff_send(0:halo_size), buff_recv(0:halo_size))
451# 67 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
452
453# 67 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
454
455# 67 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
456
457# 67 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
458#if defined(MFC_OpenACC)
459# 67 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
460!$acc enter data create(buff_send, buff_recv)
461# 67 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
462#elif defined(MFC_OpenMP)
463# 67 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
464!$omp target enter data map(always,alloc:buff_send, buff_recv)
465# 67 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
466#endif
467#else
468 allocate (buff_send(0:halo_size), buff_recv(0:halo_size))
469
470# 70 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
471#if defined(MFC_OpenACC)
472# 70 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
473!$acc enter data create(capture:buff_send)
474# 70 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
475#elif defined(MFC_OpenMP)
476# 70 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
477!$omp target enter data map(always,alloc:capture:buff_send)
478# 70 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
479#endif
480
481# 71 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
482#if defined(MFC_OpenACC)
483# 71 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
484!$acc enter data create(capture:buff_recv)
485# 71 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
486#elif defined(MFC_OpenMP)
487# 71 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
488!$omp target enter data map(always,alloc:capture:buff_recv)
489# 71 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
490#endif
491#endif
492#endif
493
494 end subroutine s_initialize_mpi_common_module
495
496 !> Initialize the MPI execution environment and query the number of processors and local rank.
497 impure subroutine s_mpi_initialize
498
499#ifdef MFC_MPI
500 integer :: ierr !< Generic flag used to identify and report MPI errors
501
502 call mpi_init(ierr)
503
504 if (ierr /= mpi_success) then
505 print '(A)', 'Unable to initialize MPI environment. Exiting.'
506 call mpi_abort(mpi_comm_world, 1, ierr)
507 end if
508
509 call mpi_comm_size(mpi_comm_world, num_procs, ierr)
510
511 call mpi_comm_rank(mpi_comm_world, proc_rank, ierr)
512#else
513 num_procs = 1
514 proc_rank = 0
515#endif
516
517
518# 98 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
519#if defined(MFC_OpenACC)
520# 98 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
521!$acc update device(num_procs, proc_rank)
522# 98 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
523#elif defined(MFC_OpenMP)
524# 98 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
525!$omp target update to(num_procs, proc_rank)
526# 98 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
527#endif
528
529 end subroutine s_mpi_initialize
530
531 !> Set up MPI I/O data views and variable pointers for parallel file output.
532 impure subroutine s_initialize_mpi_data(q_cons_vf, ib_markers, beta)
533
534 type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf
535 type(integer_field), optional, intent(in) :: ib_markers
536 type(scalar_field), intent(in), optional :: beta
537 integer, dimension(num_dims) :: sizes_glb, sizes_loc
538
539#ifdef MFC_MPI
540 integer :: i, j
541 integer :: ierr !< Generic flag used to identify and report MPI errors
542 integer :: alt_sys
543
544 if (present(beta)) then
545 alt_sys = sys_size + 1
546 else
547 alt_sys = sys_size
548 end if
549
550 do i = 1, sys_size
551 mpi_io_data%var(i)%sf => q_cons_vf(i)%sf(0:m,0:n,0:p)
552 end do
553
554 if (present(beta)) then
555 mpi_io_data%var(alt_sys)%sf => beta%sf(0:m,0:n,0:p)
556 end if
557
558 ! Additional variables pb and mv for non-polytropic qbmm
559 if (qbmm .and. .not. polytropic) then
560 do i = 1, nb
561 do j = 1, nnode
562#ifdef MFC_PRE_PROCESS
563 mpi_io_data%var(sys_size + (i - 1)*nnode + j)%sf => pb%sf(0:m,0:n,0:p,j, i)
564 mpi_io_data%var(sys_size + (i - 1)*nnode + j + nb*nnode)%sf => mv%sf(0:m,0:n,0:p,j, i)
565#elif defined (MFC_SIMULATION)
566 mpi_io_data%var(sys_size + (i - 1)*nnode + j)%sf => pb_ts(1)%sf(0:m,0:n,0:p,j, i)
567 mpi_io_data%var(sys_size + (i - 1)*nnode + j + nb*nnode)%sf => mv_ts(1)%sf(0:m,0:n,0:p,j, i)
568#endif
569 end do
570 end do
571 end if
572
573 ! Define global(g) and local(l) sizes for flow variables
574 sizes_glb(1) = m_glb + 1; sizes_loc(1) = m + 1
575 if (n > 0) then
576 sizes_glb(2) = n_glb + 1; sizes_loc(2) = n + 1
577 if (p > 0) then
578 sizes_glb(num_dims) = p_glb + 1; sizes_loc(num_dims) = p + 1
579 end if
580 end if
581
582 ! Define the view for each variable
583 do i = 1, alt_sys
584 call mpi_type_create_subarray(num_dims, sizes_glb, sizes_loc, start_idx, mpi_order_fortran, mpi_p, &
585 & mpi_io_data%view(i), ierr)
586 call mpi_type_commit(mpi_io_data%view(i), ierr)
587 end do
588
589#ifndef MFC_POST_PROCESS
590 if (qbmm .and. .not. polytropic) then
591 do i = sys_size + 1, sys_size + 2*nb*nnode
592 call mpi_type_create_subarray(num_dims, sizes_glb, sizes_loc, start_idx, mpi_order_fortran, mpi_p, &
593 & mpi_io_data%view(i), ierr)
594 call mpi_type_commit(mpi_io_data%view(i), ierr)
595 end do
596 end if
597#endif
598
599#ifndef MFC_PRE_PROCESS
600 if (present(ib_markers)) then
601 mpi_io_ib_data%var%sf => ib_markers%sf(0:m,0:n,0:p)
602
603 call mpi_type_create_subarray(num_dims, sizes_glb, sizes_loc, start_idx, mpi_order_fortran, mpi_integer, &
604 & mpi_io_ib_data%view, ierr)
605 call mpi_type_commit(mpi_io_ib_data%view, ierr)
606 end if
607#endif
608#endif
609
610 end subroutine s_initialize_mpi_data
611
612 !> Set up MPI I/O data views for downsampled (coarsened) parallel file output.
613 subroutine s_initialize_mpi_data_ds(q_cons_vf)
614
615 type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf
616 integer, dimension(num_dims) :: sizes_loc
617 integer, dimension(3) :: sf_start_idx
618
619#ifdef MFC_MPI
620 integer :: i, m_ds, n_ds, p_ds, ierr
621
622 sf_start_idx = (/0, 0, 0/)
623
624#ifndef MFC_POST_PROCESS
625 m_ds = int((m + 1)/3) - 1
626 n_ds = int((n + 1)/3) - 1
627 p_ds = int((p + 1)/3) - 1
628#else
629 m_ds = m
630 n_ds = n
631 p_ds = p
632#endif
633
634#ifdef MFC_POST_PROCESS
635 do i = 1, sys_size
636 mpi_io_data%var(i)%sf => q_cons_vf(i)%sf(-1:m_ds + 1,-1:n_ds + 1,-1:p_ds + 1)
637 end do
638#endif
639 ! Define global(g) and local(l) sizes for flow variables
640 sizes_loc(1) = m_ds + 3
641 if (n > 0) then
642 sizes_loc(2) = n_ds + 3
643 if (p > 0) then
644 sizes_loc(num_dims) = p_ds + 3
645 end if
646 end if
647
648 ! Define the view for each variable
649 do i = 1, sys_size
650 call mpi_type_create_subarray(num_dims, sizes_loc, sizes_loc, sf_start_idx, mpi_order_fortran, mpi_p, &
651 & mpi_io_data%view(i), ierr)
652 call mpi_type_commit(mpi_io_data%view(i), ierr)
653 end do
654#endif
655
656 end subroutine s_initialize_mpi_data_ds
657
658 !> Gather variable-length real vectors from all MPI ranks onto the root process.
659 impure subroutine s_mpi_gather_data(my_vector, counts, gathered_vector, root)
660
661 integer, intent(in) :: counts !< Array of vector lengths for each process
662 real(wp), intent(in), dimension(counts) :: my_vector !< Input vector on each process
663 integer, intent(in) :: root !< Rank of the root process
664 real(wp), allocatable, intent(out) :: gathered_vector(:) !< Gathered vector on the root process
665 integer :: i
666 integer :: ierr !< Generic flag used to identify and report MPI errors
667 integer, allocatable :: recounts(:), displs(:)
668
669#ifdef MFC_MPI
670 allocate (recounts(num_procs))
671
672 call mpi_gather(counts, 1, mpi_integer, recounts, 1, mpi_integer, root, mpi_comm_world, ierr)
673
674 allocate (displs(size(recounts)))
675
676 displs(1) = 0
677
678 do i = 2, size(recounts)
679 displs(i) = displs(i - 1) + recounts(i - 1)
680 end do
681
682 allocate (gathered_vector(sum(recounts)))
683 call mpi_gatherv(my_vector, counts, mpi_p, gathered_vector, recounts, displs, mpi_p, root, mpi_comm_world, ierr)
684#endif
685
686 end subroutine s_mpi_gather_data
687
688 !> Gather per-rank time step wall-clock times onto rank 0 for performance reporting.
689 impure subroutine mpi_bcast_time_step_values(proc_time, time_avg)
690
691 real(wp), dimension(0:num_procs - 1), intent(inout) :: proc_time
692 real(wp), intent(inout) :: time_avg
693
694#ifdef MFC_MPI
695 integer :: ierr !< Generic flag used to identify and report MPI errors
696
697 call mpi_gather(time_avg, 1, mpi_p, proc_time(0), 1, mpi_p, 0, mpi_comm_world, ierr)
698#endif
699
700 end subroutine mpi_bcast_time_step_values
701
702 !> Print a case file error with the prohibited condition and message, then abort execution.
703 impure subroutine s_prohibit_abort(condition, message)
704
705 character(len=*), intent(in) :: condition, message
706
707 print *, ""
708 print *, "CASE FILE ERROR"
709 print *, " - Prohibited condition: ", trim(condition)
710 if (len_trim(message) > 0) then
711 print *, " - Note: ", trim(message)
712 end if
713 print *, ""
714 call s_mpi_abort(code=case_file_error_code)
715
716 end subroutine s_prohibit_abort
717
718 !> The goal of this subroutine is to determine the global extrema of the stability criteria in the computational domain. This is
719 !! performed by sifting through the local extrema of each stability criterion. Note that each of the local extrema is from a
720 !! single process, within its assigned section of the computational domain. Finally, note that the global extrema values are
721 !! only bookkeept on the rank 0 processor.
722 impure subroutine s_mpi_reduce_stability_criteria_extrema(icfl_max_loc, vcfl_max_loc, Rc_min_loc, icfl_max_glb, vcfl_max_glb, &
723 & Rc_min_glb)
724
725 real(wp), intent(in) :: icfl_max_loc
726 real(wp), intent(in) :: vcfl_max_loc
727 real(wp), intent(in) :: rc_min_loc
728 real(wp), intent(out) :: icfl_max_glb
729 real(wp), intent(out) :: vcfl_max_glb
730 real(wp), intent(out) :: rc_min_glb
731
732 icfl_max_glb = icfl_max_loc
733 vcfl_max_glb = vcfl_max_loc
734 rc_min_glb = rc_min_loc
735
736#ifdef MFC_SIMULATION
737#ifdef MFC_MPI
738 block
739 integer :: ierr
740
741 call mpi_reduce(icfl_max_loc, icfl_max_glb, 1, mpi_p, mpi_max, 0, mpi_comm_world, ierr)
742
743 if (viscous) then
744 call mpi_reduce(vcfl_max_loc, vcfl_max_glb, 1, mpi_p, mpi_max, 0, mpi_comm_world, ierr)
745 call mpi_reduce(rc_min_loc, rc_min_glb, 1, mpi_p, mpi_min, 0, mpi_comm_world, ierr)
746 end if
747 end block
748#else
749 icfl_max_glb = icfl_max_loc
750
751 if (viscous) then
752 vcfl_max_glb = vcfl_max_loc
753 rc_min_glb = rc_min_loc
754 end if
755#endif
756#endif
757
759
760 !> Reduce a local real value to its global sum across all MPI ranks.
761 impure subroutine s_mpi_allreduce_sum(var_loc, var_glb)
762
763 real(wp), intent(in) :: var_loc
764 real(wp), intent(out) :: var_glb
765
766#ifdef MFC_MPI
767 integer :: ierr !< Generic flag used to identify and report MPI errors
768
769 call mpi_allreduce(var_loc, var_glb, 1, mpi_p, mpi_sum, mpi_comm_world, ierr)
770#endif
771
772 end subroutine s_mpi_allreduce_sum
773
774 !> Reduce an array of vectors to their global sums across all MPI ranks.
775 impure subroutine s_mpi_allreduce_vectors_sum(var_loc, var_glb, num_vectors, vector_length)
776
777 integer, intent(in) :: num_vectors, vector_length
778 real(wp), dimension(:,:), intent(in) :: var_loc
779 real(wp), dimension(:,:), intent(inout) :: var_glb
780
781#ifdef MFC_MPI
782 integer :: ierr !< Generic flag used to identify and report MPI errors
783
784 if (loc(var_loc) == loc(var_glb)) then
785 call mpi_allreduce(mpi_in_place, var_glb, num_vectors*vector_length, mpi_p, mpi_sum, mpi_comm_world, ierr)
786 else
787 call mpi_allreduce(var_loc, var_glb, num_vectors*vector_length, mpi_p, mpi_sum, mpi_comm_world, ierr)
788 end if
789#else
790 var_glb(1:num_vectors,1:vector_length) = var_loc(1:num_vectors,1:vector_length)
791#endif
792
793 end subroutine s_mpi_allreduce_vectors_sum
794
795 !> Reduce a local integer value to its global sum across all MPI ranks.
796 impure subroutine s_mpi_allreduce_integer_sum(var_loc, var_glb)
797
798 integer(kind=8), intent(in) :: var_loc
799 integer(kind=8), intent(out) :: var_glb
800
801#ifdef MFC_MPI
802 integer :: ierr !< Generic flag used to identify and report MPI errors
803
804 call mpi_allreduce(var_loc, var_glb, 1, mpi_integer8, mpi_sum, mpi_comm_world, ierr)
805#else
806 var_glb = var_loc
807#endif
808
809 end subroutine s_mpi_allreduce_integer_sum
810
811 !> Reduce a local real value to its global minimum across all MPI ranks.
812 impure subroutine s_mpi_allreduce_min(var_loc, var_glb)
813
814 real(wp), intent(in) :: var_loc
815 real(wp), intent(out) :: var_glb
816
817#ifdef MFC_MPI
818 integer :: ierr !< Generic flag used to identify and report MPI errors
819
820 call mpi_allreduce(var_loc, var_glb, 1, mpi_p, mpi_min, mpi_comm_world, ierr)
821#endif
822
823 end subroutine s_mpi_allreduce_min
824
825 !> Reduce a local real value to its global maximum across all MPI ranks.
826 impure subroutine s_mpi_allreduce_max(var_loc, var_glb)
827
828 real(wp), intent(in) :: var_loc
829 real(wp), intent(out) :: var_glb
830
831#ifdef MFC_MPI
832 integer :: ierr !< Generic flag used to identify and report MPI errors
833
834 call mpi_allreduce(var_loc, var_glb, 1, mpi_p, mpi_max, mpi_comm_world, ierr)
835#endif
836
837 end subroutine s_mpi_allreduce_max
838
839 !> Reduce a local real value to its global minimum across all ranks
840 impure subroutine s_mpi_reduce_min(var_loc)
841
842 real(wp), intent(inout) :: var_loc
843
844#ifdef MFC_MPI
845 integer :: ierr !< Generic flag used to identify and report MPI errors
846 real(wp) :: var_glb
847
848 call mpi_reduce(var_loc, var_glb, 1, mpi_p, mpi_min, 0, mpi_comm_world, ierr)
849
850 call mpi_bcast(var_glb, 1, mpi_p, 0, mpi_comm_world, ierr)
851
852 var_loc = var_glb
853#endif
854
855 end subroutine s_mpi_reduce_min
856
857 !> Reduce a 2-element variable to its global maximum value with the owning processor rank (MPI_MAXLOC).
858 !> Reduce a local value to its global maximum with location (rank) across all ranks
859 impure subroutine s_mpi_reduce_maxloc(var_loc)
860
861 real(wp), dimension(2), intent(inout) :: var_loc
862
863#ifdef MFC_MPI
864 integer :: ierr !< Generic flag used to identify and report MPI errors
865 real(wp), dimension(2) :: var_glb !< Reduced (max value, rank) pair
866 call mpi_reduce(var_loc, var_glb, 1, mpi_2p, mpi_maxloc, 0, mpi_comm_world, ierr)
867
868 call mpi_bcast(var_glb, 1, mpi_2p, 0, mpi_comm_world, ierr)
869
870 var_loc = var_glb
871#endif
872
873 end subroutine s_mpi_reduce_maxloc
874
875 !> The subroutine terminates the MPI execution environment.
876 impure subroutine s_mpi_abort(prnt, code)
877
878 character(len=*), intent(in), optional :: prnt
879 integer, intent(in), optional :: code
880
881#ifdef MFC_MPI
882 integer :: ierr !< Generic flag used to identify and report MPI errors
883#endif
884
885 if (present(prnt)) then
886 print *, prnt
887 call flush (6)
888 end if
889
890#ifndef MFC_MPI
891 if (present(code)) then
892 stop code
893 else
894 stop 1
895 end if
896#else
897 if (present(code)) then
898 call mpi_abort(mpi_comm_world, code, ierr)
899 else
900 call mpi_abort(mpi_comm_world, 1, ierr)
901 end if
902#endif
903
904 end subroutine s_mpi_abort
905
906 !> Halts all processes until all have reached barrier.
907 impure subroutine s_mpi_barrier
908
909#ifdef MFC_MPI
910 integer :: ierr !< Generic flag used to identify and report MPI errors
911
912 call mpi_barrier(mpi_comm_world, ierr)
913#endif
914
915 end subroutine s_mpi_barrier
916
917 !> The subroutine finalizes the MPI execution environment.
918 impure subroutine s_mpi_finalize
919
920#ifdef MFC_MPI
921 integer :: ierr !< Generic flag used to identify and report MPI errors
922
923 call mpi_finalize(ierr)
924#endif
925
926 end subroutine s_mpi_finalize
927
928 !> The goal of this procedure is to populate the buffers of the cell-average conservative variables by communicating with the
929 !! neighboring processors.
930 subroutine s_mpi_sendrecv_variables_buffers(q_comm, mpi_dir, pbc_loc, nVar, pb_in, mv_in, q_T_sf)
931
932 type(scalar_field), dimension(1:), intent(inout) :: q_comm
933 real(stp), optional, dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:,1:), intent(inout) :: pb_in, mv_in
934 integer, intent(in) :: mpi_dir, pbc_loc, nVar
935 integer :: i, j, k, l, r, q !< Generic loop iterators
936 integer :: buffer_counts(1:3), buffer_count
937 type(int_bounds_info) :: boundary_conditions(1:3)
938 integer :: beg_end(1:2), grid_dims(1:3)
939 integer :: dst_proc, src_proc, recv_tag, send_tag
940 logical :: beg_end_geq_0, qbmm_comm, chem_diff_comm
941 integer :: pack_offset, unpack_offset
942 type(scalar_field), optional, intent(inout) :: q_T_sf
943
944#ifdef MFC_MPI
945 integer :: ierr !< Generic flag used to identify and report MPI errors
946
947 call nvtxstartrange("RHS-COMM-PACKBUF")
948
949 qbmm_comm = .false.
950 chem_diff_comm = .false.
951
952 if (present(pb_in) .and. present(mv_in) .and. qbmm .and. .not. polytropic) then
953 qbmm_comm = .true.
954 v_size = nvar + 2*nb*nnode
955 buffer_counts = (/buff_size*v_size*(n + 1)*(p + 1), buff_size*v_size*(m + 2*buff_size + 1)*(p + 1), &
956 & buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1)/)
957 else if (present(q_t_sf) .and. chemistry .and. chem_params%diffusion) then
958 chem_diff_comm = .true.
959 v_size = nvar + 1
960 buffer_counts = (/buff_size*v_size*(n + 1)*(p + 1), buff_size*v_size*(m + 2*buff_size + 1)*(p + 1), &
961 & buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1)/)
962 else
963 v_size = nvar
964 buffer_counts = (/buff_size*v_size*(n + 1)*(p + 1), buff_size*v_size*(m + 2*buff_size + 1)*(p + 1), &
965 & buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1)/)
966 end if
967
968
969# 539 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
970#if defined(MFC_OpenACC)
971# 539 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
972!$acc update device(v_size)
973# 539 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
974#elif defined(MFC_OpenMP)
975# 539 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
976!$omp target update to(v_size)
977# 539 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
978#endif
979
980 buffer_count = buffer_counts(mpi_dir)
981 boundary_conditions = (/bc_x, bc_y, bc_z/)
982 beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/)
983 beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0
984
985 ! Implements: pbc_loc bc_x >= 0 -> [send/recv]_tag [dst/src]_proc -1 (=0) 0 -> [1,0] [0,0] | 0 0 [1,0] [beg,beg] -1 (=0) 1
986 ! -> [0,0] [1,0] | 0 1 [0,0] [end,beg] +1 (=1) 0 -> [0,1] [1,1] | 1 0 [0,1] [end,end] +1 (=1) 1 -> [1,1] [0,1] | 1 1 [1,1]
987 ! [beg,end]
988
989 send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1))
990 recv_tag = f_logical_to_int(pbc_loc == 1)
991
992 dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0)))
993 src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1))
994
995 grid_dims = (/m, n, p/)
996
997 pack_offset = 0
998 if (f_xor(pbc_loc == 1, beg_end_geq_0)) then
999 pack_offset = grid_dims(mpi_dir) - buff_size + 1
1000 end if
1001
1002 unpack_offset = 0
1003 if (pbc_loc == 1) then
1004 unpack_offset = grid_dims(mpi_dir) + buff_size + 1
1005 end if
1006
1007 ! Pack Buffer to Send
1008# 570 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1009 if (mpi_dir == 1) then
1010# 572 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1011
1012# 572 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1013
1014# 572 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1015#if defined(MFC_OpenACC)
1016# 572 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1017!$acc parallel loop collapse(4) gang vector default(present) private(r)
1018# 572 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1019#elif defined(MFC_OpenMP)
1020# 572 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1021
1022# 572 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1023
1024# 572 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1025
1026# 572 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1027!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(4) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1028# 572 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1029#endif
1030 do l = 0, p
1031 do k = 0, n
1032 do j = 0, buff_size - 1
1033 do i = 1, nvar
1034 r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l))
1035 buff_send(r) = real(q_comm(i)%sf(j + pack_offset, k, l), kind=wp)
1036 end do
1037 end do
1038 end do
1039 end do
1040
1041# 583 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1042#if defined(MFC_OpenACC)
1043# 583 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1044!$acc end parallel loop
1045# 583 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1046#elif defined(MFC_OpenMP)
1047# 583 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1048
1049# 583 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1050!$omp end target teams loop
1051# 583 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1052#endif
1053
1054 if (chem_diff_comm) then
1055
1056# 586 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1057
1058# 586 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1059#if defined(MFC_OpenACC)
1060# 586 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1061!$acc parallel loop collapse(3) gang vector default(present) private(r)
1062# 586 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1063#elif defined(MFC_OpenMP)
1064# 586 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1065
1066# 586 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1067
1068# 586 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1069
1070# 586 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1071!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(3) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1072# 586 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1073#endif
1074 do l = 0, p
1075 do k = 0, n
1076 do j = 0, buff_size - 1
1077 r = nvar + v_size*(j + buff_size*(k + (n + 1)*l))
1078 buff_send(r) = real(q_t_sf%sf(j + pack_offset, k, l), kind=wp)
1079 end do
1080 end do
1081 end do
1082
1083# 595 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1084#if defined(MFC_OpenACC)
1085# 595 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1086!$acc end parallel loop
1087# 595 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1088#elif defined(MFC_OpenMP)
1089# 595 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1090
1091# 595 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1092!$omp end target teams loop
1093# 595 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1094#endif
1095 end if
1096
1097 if (qbmm_comm) then
1098
1099# 599 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1100
1101# 599 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1102#if defined(MFC_OpenACC)
1103# 599 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1104!$acc parallel loop collapse(4) gang vector default(present) private(r)
1105# 599 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1106#elif defined(MFC_OpenMP)
1107# 599 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1108
1109# 599 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1110
1111# 599 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1112
1113# 599 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1114!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(4) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1115# 599 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1116#endif
1117 do l = 0, p
1118 do k = 0, n
1119 do j = 0, buff_size - 1
1120 do i = nvar + 1, nvar + nnode
1121 do q = 1, nb
1122 r = (i - 1) + (q - 1)*nnode + v_size*(j + buff_size*(k + (n + 1)*l))
1123 buff_send(r) = real(pb_in(j + pack_offset, k, l, i - nvar, q), kind=wp)
1124 end do
1125 end do
1126 end do
1127 end do
1128 end do
1129
1130# 612 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1131#if defined(MFC_OpenACC)
1132# 612 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1133!$acc end parallel loop
1134# 612 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1135#elif defined(MFC_OpenMP)
1136# 612 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1137
1138# 612 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1139!$omp end target teams loop
1140# 612 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1141#endif
1142
1143
1144# 614 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1145
1146# 614 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1147#if defined(MFC_OpenACC)
1148# 614 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1149!$acc parallel loop collapse(5) gang vector default(present) private(r)
1150# 614 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1151#elif defined(MFC_OpenMP)
1152# 614 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1153
1154# 614 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1155
1156# 614 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1157
1158# 614 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1159!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(5) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1160# 614 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1161#endif
1162 do l = 0, p
1163 do k = 0, n
1164 do j = 0, buff_size - 1
1165 do i = nvar + 1, nvar + nnode
1166 do q = 1, nb
1167 r = (i - 1) + (q - 1)*nnode + nb*nnode + v_size*(j + buff_size*(k + (n + 1)*l))
1168 buff_send(r) = real(mv_in(j + pack_offset, k, l, i - nvar, q), kind=wp)
1169 end do
1170 end do
1171 end do
1172 end do
1173 end do
1174
1175# 627 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1176#if defined(MFC_OpenACC)
1177# 627 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1178!$acc end parallel loop
1179# 627 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1180#elif defined(MFC_OpenMP)
1181# 627 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1182
1183# 627 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1184!$omp end target teams loop
1185# 627 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1186#endif
1187 end if
1188# 752 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1189 end if
1190# 570 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1191 if (mpi_dir == 2) then
1192# 630 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1193
1194# 630 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1195
1196# 630 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1197#if defined(MFC_OpenACC)
1198# 630 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1199!$acc parallel loop collapse(4) gang vector default(present) private(r)
1200# 630 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1201#elif defined(MFC_OpenMP)
1202# 630 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1203
1204# 630 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1205
1206# 630 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1207
1208# 630 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1209!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(4) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1210# 630 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1211#endif
1212 do i = 1, nvar
1213 do l = 0, p
1214 do k = 0, buff_size - 1
1215 do j = -buff_size, m + buff_size
1216 r = (i - 1) + v_size*((j + buff_size) + (m + 2*buff_size + 1)*(k + buff_size*l))
1217 buff_send(r) = real(q_comm(i)%sf(j, k + pack_offset, l), kind=wp)
1218 end do
1219 end do
1220 end do
1221 end do
1222
1223# 641 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1224#if defined(MFC_OpenACC)
1225# 641 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1226!$acc end parallel loop
1227# 641 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1228#elif defined(MFC_OpenMP)
1229# 641 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1230
1231# 641 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1232!$omp end target teams loop
1233# 641 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1234#endif
1235
1236 if (chem_diff_comm) then
1237
1238# 644 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1239
1240# 644 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1241#if defined(MFC_OpenACC)
1242# 644 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1243!$acc parallel loop collapse(3) gang vector default(present) private(r)
1244# 644 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1245#elif defined(MFC_OpenMP)
1246# 644 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1247
1248# 644 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1249
1250# 644 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1251
1252# 644 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1253!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(3) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1254# 644 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1255#endif
1256 do l = 0, p
1257 do k = 0, buff_size - 1
1258 do j = -buff_size, m + buff_size
1259 r = nvar + v_size*((j + buff_size) + (m + 2*buff_size + 1)*(k + buff_size*l))
1260 buff_send(r) = real(q_t_sf%sf(j, k + pack_offset, l), kind=wp)
1261 end do
1262 end do
1263 end do
1264
1265# 653 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1266#if defined(MFC_OpenACC)
1267# 653 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1268!$acc end parallel loop
1269# 653 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1270#elif defined(MFC_OpenMP)
1271# 653 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1272
1273# 653 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1274!$omp end target teams loop
1275# 653 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1276#endif
1277 end if
1278
1279 if (qbmm_comm) then
1280
1281# 657 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1282
1283# 657 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1284#if defined(MFC_OpenACC)
1285# 657 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1286!$acc parallel loop collapse(5) gang vector default(present) private(r)
1287# 657 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1288#elif defined(MFC_OpenMP)
1289# 657 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1290
1291# 657 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1292
1293# 657 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1294
1295# 657 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1296!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(5) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1297# 657 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1298#endif
1299 do i = nvar + 1, nvar + nnode
1300 do l = 0, p
1301 do k = 0, buff_size - 1
1302 do j = -buff_size, m + buff_size
1303 do q = 1, nb
1304 r = (i - 1) + (q - 1)*nnode + v_size*((j + buff_size) + (m + 2*buff_size + 1)*(k &
1305 & + buff_size*l))
1306 buff_send(r) = real(pb_in(j, k + pack_offset, l, i - nvar, q), kind=wp)
1307 end do
1308 end do
1309 end do
1310 end do
1311 end do
1312
1313# 671 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1314#if defined(MFC_OpenACC)
1315# 671 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1316!$acc end parallel loop
1317# 671 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1318#elif defined(MFC_OpenMP)
1319# 671 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1320
1321# 671 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1322!$omp end target teams loop
1323# 671 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1324#endif
1325
1326
1327# 673 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1328
1329# 673 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1330#if defined(MFC_OpenACC)
1331# 673 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1332!$acc parallel loop collapse(5) gang vector default(present) private(r)
1333# 673 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1334#elif defined(MFC_OpenMP)
1335# 673 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1336
1337# 673 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1338
1339# 673 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1340
1341# 673 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1342!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(5) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1343# 673 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1344#endif
1345 do i = nvar + 1, nvar + nnode
1346 do l = 0, p
1347 do k = 0, buff_size - 1
1348 do j = -buff_size, m + buff_size
1349 do q = 1, nb
1350 r = (i - 1) + (q - 1)*nnode + nb*nnode + v_size*((j + buff_size) + (m + 2*buff_size &
1351 & + 1)*(k + buff_size*l))
1352 buff_send(r) = real(mv_in(j, k + pack_offset, l, i - nvar, q), kind=wp)
1353 end do
1354 end do
1355 end do
1356 end do
1357 end do
1358
1359# 687 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1360#if defined(MFC_OpenACC)
1361# 687 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1362!$acc end parallel loop
1363# 687 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1364#elif defined(MFC_OpenMP)
1365# 687 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1366
1367# 687 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1368!$omp end target teams loop
1369# 687 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1370#endif
1371 end if
1372# 752 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1373 end if
1374# 570 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1375 if (mpi_dir == 3) then
1376# 690 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1377
1378# 690 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1379
1380# 690 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1381#if defined(MFC_OpenACC)
1382# 690 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1383!$acc parallel loop collapse(4) gang vector default(present) private(r)
1384# 690 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1385#elif defined(MFC_OpenMP)
1386# 690 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1387
1388# 690 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1389
1390# 690 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1391
1392# 690 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1393!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(4) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1394# 690 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1395#endif
1396 do i = 1, nvar
1397 do l = 0, buff_size - 1
1398 do k = -buff_size, n + buff_size
1399 do j = -buff_size, m + buff_size
1400 r = (i - 1) + v_size*((j + buff_size) + (m + 2*buff_size + 1)*((k + buff_size) + (n &
1401 & + 2*buff_size + 1)*l))
1402 buff_send(r) = real(q_comm(i)%sf(j, k, l + pack_offset), kind=wp)
1403 end do
1404 end do
1405 end do
1406 end do
1407
1408# 702 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1409#if defined(MFC_OpenACC)
1410# 702 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1411!$acc end parallel loop
1412# 702 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1413#elif defined(MFC_OpenMP)
1414# 702 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1415
1416# 702 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1417!$omp end target teams loop
1418# 702 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1419#endif
1420
1421 if (chem_diff_comm) then
1422
1423# 705 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1424
1425# 705 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1426#if defined(MFC_OpenACC)
1427# 705 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1428!$acc parallel loop collapse(3) gang vector default(present) private(r)
1429# 705 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1430#elif defined(MFC_OpenMP)
1431# 705 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1432
1433# 705 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1434
1435# 705 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1436
1437# 705 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1438!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(3) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1439# 705 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1440#endif
1441 do l = 0, buff_size - 1
1442 do k = -buff_size, n + buff_size
1443 do j = -buff_size, m + buff_size
1444 r = nvar + v_size*((j + buff_size) + (m + 2*buff_size + 1)*((k + buff_size) + (n &
1445 & + 2*buff_size + 1)*l))
1446 buff_send(r) = real(q_t_sf%sf(j, k, l + pack_offset), kind=wp)
1447 end do
1448 end do
1449 end do
1450
1451# 715 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1452#if defined(MFC_OpenACC)
1453# 715 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1454!$acc end parallel loop
1455# 715 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1456#elif defined(MFC_OpenMP)
1457# 715 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1458
1459# 715 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1460!$omp end target teams loop
1461# 715 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1462#endif
1463 end if
1464
1465 if (qbmm_comm) then
1466
1467# 719 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1468
1469# 719 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1470#if defined(MFC_OpenACC)
1471# 719 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1472!$acc parallel loop collapse(5) gang vector default(present) private(r)
1473# 719 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1474#elif defined(MFC_OpenMP)
1475# 719 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1476
1477# 719 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1478
1479# 719 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1480
1481# 719 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1482!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(5) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1483# 719 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1484#endif
1485 do i = nvar + 1, nvar + nnode
1486 do l = 0, buff_size - 1
1487 do k = -buff_size, n + buff_size
1488 do j = -buff_size, m + buff_size
1489 do q = 1, nb
1490 r = (i - 1) + (q - 1)*nnode + v_size*((j + buff_size) + (m + 2*buff_size + 1)*((k &
1491 & + buff_size) + (n + 2*buff_size + 1)*l))
1492 buff_send(r) = real(pb_in(j, k, l + pack_offset, i - nvar, q), kind=wp)
1493 end do
1494 end do
1495 end do
1496 end do
1497 end do
1498
1499# 733 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1500#if defined(MFC_OpenACC)
1501# 733 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1502!$acc end parallel loop
1503# 733 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1504#elif defined(MFC_OpenMP)
1505# 733 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1506
1507# 733 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1508!$omp end target teams loop
1509# 733 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1510#endif
1511
1512
1513# 735 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1514
1515# 735 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1516#if defined(MFC_OpenACC)
1517# 735 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1518!$acc parallel loop collapse(5) gang vector default(present) private(r)
1519# 735 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1520#elif defined(MFC_OpenMP)
1521# 735 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1522
1523# 735 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1524
1525# 735 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1526
1527# 735 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1528!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(5) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1529# 735 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1530#endif
1531 do i = nvar + 1, nvar + nnode
1532 do l = 0, buff_size - 1
1533 do k = -buff_size, n + buff_size
1534 do j = -buff_size, m + buff_size
1535 do q = 1, nb
1536 r = (i - 1) + (q - 1)*nnode + nb*nnode + v_size*((j + buff_size) + (m + 2*buff_size &
1537 & + 1)*((k + buff_size) + (n + 2*buff_size + 1)*l))
1538 buff_send(r) = real(mv_in(j, k, l + pack_offset, i - nvar, q), kind=wp)
1539 end do
1540 end do
1541 end do
1542 end do
1543 end do
1544
1545# 749 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1546#if defined(MFC_OpenACC)
1547# 749 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1548!$acc end parallel loop
1549# 749 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1550#elif defined(MFC_OpenMP)
1551# 749 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1552
1553# 749 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1554!$omp end target teams loop
1555# 749 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1556#endif
1557 end if
1558# 752 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1559 end if
1560# 754 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1561 call nvtxendrange ! Packbuf
1562
1563 ! Send/Recv
1564#ifdef MFC_SIMULATION
1565# 759 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1566 if (rdma_mpi .eqv. .false.) then
1567# 771 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1568 call nvtxstartrange("RHS-COMM-DEV2HOST")
1569
1570# 772 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1571#if defined(MFC_OpenACC)
1572# 772 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1573!$acc update host(buff_send)
1574# 772 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1575#elif defined(MFC_OpenMP)
1576# 772 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1577!$omp target update from(buff_send)
1578# 772 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1579#endif
1580 call nvtxendrange
1581 call nvtxstartrange("RHS-COMM-SENDRECV-NO-RMDA")
1582
1583 call mpi_sendrecv(buff_send, buffer_count, mpi_p, dst_proc, send_tag, buff_recv, buffer_count, mpi_p, &
1584 & src_proc, recv_tag, mpi_comm_world, mpi_status_ignore, ierr)
1585
1586 call nvtxendrange ! RHS-MPI-SENDRECV-(NO)-RDMA
1587
1588 call nvtxstartrange("RHS-COMM-HOST2DEV")
1589
1590# 782 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1591#if defined(MFC_OpenACC)
1592# 782 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1593!$acc update device(buff_recv)
1594# 782 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1595#elif defined(MFC_OpenMP)
1596# 782 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1597!$omp target update to(buff_recv)
1598# 782 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1599#endif
1600 call nvtxendrange
1601# 785 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1602 end if
1603# 759 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1604 if (rdma_mpi .eqv. .true.) then
1605# 761 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1606
1607# 761 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1608#if defined(MFC_OpenACC)
1609# 761 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1610!$acc host_data use_device(buff_send, buff_recv)
1611# 761 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1612 call nvtxstartrange("RHS-COMM-SENDRECV-RDMA")
1613# 761 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1614
1615# 761 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1616 call mpi_sendrecv(buff_send, buffer_count, mpi_p, dst_proc, send_tag, buff_recv, buffer_count, mpi_p, &
1617# 761 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1618 & src_proc, recv_tag, mpi_comm_world, mpi_status_ignore, ierr)
1619# 761 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1620
1621# 761 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1622 call nvtxendrange ! RHS-MPI-SENDRECV-(NO)-RDMA
1623# 761 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1624!$acc end host_data
1625# 761 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1626#elif defined(MFC_OpenMP)
1627# 761 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1628!$omp target data use_device_addr(buff_send, buff_recv)
1629# 761 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1630 call nvtxstartrange("RHS-COMM-SENDRECV-RDMA")
1631# 761 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1632
1633# 761 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1634 call mpi_sendrecv(buff_send, buffer_count, mpi_p, dst_proc, send_tag, buff_recv, buffer_count, mpi_p, &
1635# 761 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1636 & src_proc, recv_tag, mpi_comm_world, mpi_status_ignore, ierr)
1637# 761 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1638
1639# 761 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1640 call nvtxendrange ! RHS-MPI-SENDRECV-(NO)-RDMA
1641# 761 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1642!$omp end target data
1643# 761 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1644#else
1645# 761 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1646 call nvtxstartrange("RHS-COMM-SENDRECV-RDMA")
1647# 761 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1648
1649# 761 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1650 call mpi_sendrecv(buff_send, buffer_count, mpi_p, dst_proc, send_tag, buff_recv, buffer_count, mpi_p, &
1651# 761 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1652 & src_proc, recv_tag, mpi_comm_world, mpi_status_ignore, ierr)
1653# 761 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1654
1655# 761 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1656 call nvtxendrange ! RHS-MPI-SENDRECV-(NO)-RDMA
1657# 761 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1658#endif
1659# 769 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1660
1661# 769 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1662#if defined(MFC_OpenACC)
1663# 769 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1664!$acc wait
1665# 769 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1666#elif defined(MFC_OpenMP)
1667# 769 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1668!$omp barrier
1669# 769 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1670#endif
1671# 785 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1672 end if
1673# 787 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1674#else
1675 call mpi_sendrecv(buff_send, buffer_count, mpi_p, dst_proc, send_tag, buff_recv, buffer_count, mpi_p, src_proc, recv_tag, &
1676 & mpi_comm_world, mpi_status_ignore, ierr)
1677#endif
1678
1679 ! Unpack Received Buffer
1680 call nvtxstartrange("RHS-COMM-UNPACKBUF")
1681# 795 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1682 if (mpi_dir == 1) then
1683# 797 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1684
1685# 797 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1686
1687# 797 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1688#if defined(MFC_OpenACC)
1689# 797 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1690!$acc parallel loop collapse(4) gang vector default(present) private(r)
1691# 797 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1692#elif defined(MFC_OpenMP)
1693# 797 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1694
1695# 797 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1696
1697# 797 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1698
1699# 797 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1700!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(4) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1701# 797 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1702#endif
1703 do l = 0, p
1704 do k = 0, n
1705 do j = -buff_size, -1
1706 do i = 1, nvar
1707 r = (i - 1) + v_size*(j + buff_size*((k + 1) + (n + 1)*l))
1708 q_comm(i)%sf(j + unpack_offset, k, l) = real(buff_recv(r), kind=stp)
1709#if defined(__INTEL_COMPILER)
1710 if (ieee_is_nan(q_comm(i)%sf(j + unpack_offset, k, l))) then
1711 print *, "Error", j, k, l, i
1712 call s_mpi_abort("NaN(s) in recv")
1713 end if
1714#endif
1715 end do
1716 end do
1717 end do
1718 end do
1719
1720# 814 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1721#if defined(MFC_OpenACC)
1722# 814 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1723!$acc end parallel loop
1724# 814 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1725#elif defined(MFC_OpenMP)
1726# 814 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1727
1728# 814 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1729!$omp end target teams loop
1730# 814 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1731#endif
1732
1733 if (chem_diff_comm) then
1734
1735# 817 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1736
1737# 817 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1738#if defined(MFC_OpenACC)
1739# 817 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1740!$acc parallel loop collapse(3) gang vector default(present) private(r)
1741# 817 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1742#elif defined(MFC_OpenMP)
1743# 817 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1744
1745# 817 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1746
1747# 817 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1748
1749# 817 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1750!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(3) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1751# 817 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1752#endif
1753 do l = 0, p
1754 do k = 0, n
1755 do j = -buff_size, -1
1756 r = nvar + v_size*(j + buff_size*((k + 1) + (n + 1)*l))
1757 q_t_sf%sf(j + unpack_offset, k, l) = real(buff_recv(r), kind=stp)
1758#if defined(__INTEL_COMPILER)
1759 if (ieee_is_nan(q_t_sf%sf(j + unpack_offset, k, l))) then
1760 print *, "Error", j, k, l
1761 call s_mpi_abort("NaN(s) in recv")
1762 end if
1763#endif
1764 end do
1765 end do
1766 end do
1767
1768# 832 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1769#if defined(MFC_OpenACC)
1770# 832 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1771!$acc end parallel loop
1772# 832 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1773#elif defined(MFC_OpenMP)
1774# 832 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1775
1776# 832 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1777!$omp end target teams loop
1778# 832 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1779#endif
1780 end if
1781
1782 if (qbmm_comm) then
1783
1784# 836 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1785
1786# 836 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1787#if defined(MFC_OpenACC)
1788# 836 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1789!$acc parallel loop collapse(5) gang vector default(present) private(r)
1790# 836 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1791#elif defined(MFC_OpenMP)
1792# 836 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1793
1794# 836 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1795
1796# 836 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1797
1798# 836 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1799!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(5) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1800# 836 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1801#endif
1802 do l = 0, p
1803 do k = 0, n
1804 do j = -buff_size, -1
1805 do i = nvar + 1, nvar + nnode
1806 do q = 1, nb
1807 r = (i - 1) + (q - 1)*nnode + v_size*(j + buff_size*((k + 1) + (n + 1)*l))
1808 pb_in(j + unpack_offset, k, l, i - nvar, q) = real(buff_recv(r), kind=stp)
1809 end do
1810 end do
1811 end do
1812 end do
1813 end do
1814
1815# 849 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1816#if defined(MFC_OpenACC)
1817# 849 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1818!$acc end parallel loop
1819# 849 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1820#elif defined(MFC_OpenMP)
1821# 849 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1822
1823# 849 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1824!$omp end target teams loop
1825# 849 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1826#endif
1827
1828
1829# 851 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1830
1831# 851 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1832#if defined(MFC_OpenACC)
1833# 851 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1834!$acc parallel loop collapse(5) gang vector default(present) private(r)
1835# 851 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1836#elif defined(MFC_OpenMP)
1837# 851 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1838
1839# 851 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1840
1841# 851 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1842
1843# 851 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1844!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(5) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1845# 851 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1846#endif
1847 do l = 0, p
1848 do k = 0, n
1849 do j = -buff_size, -1
1850 do i = nvar + 1, nvar + nnode
1851 do q = 1, nb
1852 r = (i - 1) + (q - 1)*nnode + nb*nnode + v_size*(j + buff_size*((k + 1) + (n + 1)*l))
1853 mv_in(j + unpack_offset, k, l, i - nvar, q) = real(buff_recv(r), kind=stp)
1854 end do
1855 end do
1856 end do
1857 end do
1858 end do
1859
1860# 864 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1861#if defined(MFC_OpenACC)
1862# 864 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1863!$acc end parallel loop
1864# 864 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1865#elif defined(MFC_OpenMP)
1866# 864 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1867
1868# 864 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1869!$omp end target teams loop
1870# 864 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1871#endif
1872 end if
1873# 1013 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1874 end if
1875# 795 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1876 if (mpi_dir == 2) then
1877# 867 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1878
1879# 867 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1880
1881# 867 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1882#if defined(MFC_OpenACC)
1883# 867 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1884!$acc parallel loop collapse(4) gang vector default(present) private(r)
1885# 867 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1886#elif defined(MFC_OpenMP)
1887# 867 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1888
1889# 867 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1890
1891# 867 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1892
1893# 867 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1894!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(4) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1895# 867 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1896#endif
1897 do i = 1, nvar
1898 do l = 0, p
1899 do k = -buff_size, -1
1900 do j = -buff_size, m + buff_size
1901 r = (i - 1) + v_size*((j + buff_size) + (m + 2*buff_size + 1)*((k + buff_size) + buff_size*l))
1902 q_comm(i)%sf(j, k + unpack_offset, l) = real(buff_recv(r), kind=stp)
1903#if defined(__INTEL_COMPILER)
1904 if (ieee_is_nan(q_comm(i)%sf(j, k + unpack_offset, l))) then
1905 print *, "Error", j, k, l, i
1906 call s_mpi_abort("NaN(s) in recv")
1907 end if
1908#endif
1909 end do
1910 end do
1911 end do
1912 end do
1913
1914# 884 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1915#if defined(MFC_OpenACC)
1916# 884 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1917!$acc end parallel loop
1918# 884 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1919#elif defined(MFC_OpenMP)
1920# 884 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1921
1922# 884 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1923!$omp end target teams loop
1924# 884 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1925#endif
1926
1927 if (chem_diff_comm) then
1928
1929# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1930
1931# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1932#if defined(MFC_OpenACC)
1933# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1934!$acc parallel loop collapse(3) gang vector default(present) private(r)
1935# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1936#elif defined(MFC_OpenMP)
1937# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1938
1939# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1940
1941# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1942
1943# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1944!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(3) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1945# 887 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1946#endif
1947 do l = 0, p
1948 do k = -buff_size, -1
1949 do j = -buff_size, m + buff_size
1950 r = nvar + v_size*((j + buff_size) + (m + 2*buff_size + 1)*((k + buff_size) + buff_size*l))
1951 q_t_sf%sf(j, k + unpack_offset, l) = real(buff_recv(r), kind=stp)
1952#if defined(__INTEL_COMPILER)
1953 if (ieee_is_nan(q_t_sf%sf(j, k + unpack_offset, l))) then
1954 print *, "Error", j, k, l
1955 call s_mpi_abort("NaN(s) in recv")
1956 end if
1957#endif
1958 end do
1959 end do
1960 end do
1961
1962# 902 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1963#if defined(MFC_OpenACC)
1964# 902 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1965!$acc end parallel loop
1966# 902 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1967#elif defined(MFC_OpenMP)
1968# 902 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1969
1970# 902 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1971!$omp end target teams loop
1972# 902 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1973#endif
1974 end if
1975
1976 if (qbmm_comm) then
1977
1978# 906 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1979
1980# 906 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1981#if defined(MFC_OpenACC)
1982# 906 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1983!$acc parallel loop collapse(5) gang vector default(present) private(r)
1984# 906 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1985#elif defined(MFC_OpenMP)
1986# 906 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1987
1988# 906 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1989
1990# 906 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1991
1992# 906 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1993!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(5) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
1994# 906 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
1995#endif
1996 do i = nvar + 1, nvar + nnode
1997 do l = 0, p
1998 do k = -buff_size, -1
1999 do j = -buff_size, m + buff_size
2000 do q = 1, nb
2001 r = (i - 1) + (q - 1)*nnode + v_size*((j + buff_size) + (m + 2*buff_size + 1)*((k &
2002 & + buff_size) + buff_size*l))
2003 pb_in(j, k + unpack_offset, l, i - nvar, q) = real(buff_recv(r), kind=stp)
2004 end do
2005 end do
2006 end do
2007 end do
2008 end do
2009
2010# 920 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2011#if defined(MFC_OpenACC)
2012# 920 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2013!$acc end parallel loop
2014# 920 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2015#elif defined(MFC_OpenMP)
2016# 920 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2017
2018# 920 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2019!$omp end target teams loop
2020# 920 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2021#endif
2022
2023
2024# 922 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2025
2026# 922 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2027#if defined(MFC_OpenACC)
2028# 922 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2029!$acc parallel loop collapse(5) gang vector default(present) private(r)
2030# 922 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2031#elif defined(MFC_OpenMP)
2032# 922 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2033
2034# 922 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2035
2036# 922 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2037
2038# 922 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2039!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(5) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
2040# 922 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2041#endif
2042 do i = nvar + 1, nvar + nnode
2043 do l = 0, p
2044 do k = -buff_size, -1
2045 do j = -buff_size, m + buff_size
2046 do q = 1, nb
2047 r = (i - 1) + (q - 1)*nnode + nb*nnode + v_size*((j + buff_size) + (m + 2*buff_size &
2048 & + 1)*((k + buff_size) + buff_size*l))
2049 mv_in(j, k + unpack_offset, l, i - nvar, q) = real(buff_recv(r), kind=stp)
2050 end do
2051 end do
2052 end do
2053 end do
2054 end do
2055
2056# 936 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2057#if defined(MFC_OpenACC)
2058# 936 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2059!$acc end parallel loop
2060# 936 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2061#elif defined(MFC_OpenMP)
2062# 936 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2063
2064# 936 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2065!$omp end target teams loop
2066# 936 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2067#endif
2068 end if
2069# 1013 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2070 end if
2071# 795 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2072 if (mpi_dir == 3) then
2073# 939 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2074
2075# 939 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2076
2077# 939 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2078#if defined(MFC_OpenACC)
2079# 939 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2080!$acc parallel loop collapse(4) gang vector default(present) private(r)
2081# 939 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2082#elif defined(MFC_OpenMP)
2083# 939 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2084
2085# 939 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2086
2087# 939 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2088
2089# 939 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2090!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(4) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
2091# 939 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2092#endif
2093 do i = 1, nvar
2094 do l = -buff_size, -1
2095 do k = -buff_size, n + buff_size
2096 do j = -buff_size, m + buff_size
2097 r = (i - 1) + v_size*((j + buff_size) + (m + 2*buff_size + 1)*((k + buff_size) + (n &
2098 & + 2*buff_size + 1)*(l + buff_size)))
2099 q_comm(i)%sf(j, k, l + unpack_offset) = real(buff_recv(r), kind=stp)
2100#if defined(__INTEL_COMPILER)
2101 if (ieee_is_nan(q_comm(i)%sf(j, k, l + unpack_offset))) then
2102 print *, "Error", j, k, l, i
2103 call s_mpi_abort("NaN(s) in recv")
2104 end if
2105#endif
2106 end do
2107 end do
2108 end do
2109 end do
2110
2111# 957 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2112#if defined(MFC_OpenACC)
2113# 957 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2114!$acc end parallel loop
2115# 957 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2116#elif defined(MFC_OpenMP)
2117# 957 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2118
2119# 957 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2120!$omp end target teams loop
2121# 957 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2122#endif
2123
2124 if (chem_diff_comm) then
2125
2126# 960 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2127
2128# 960 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2129#if defined(MFC_OpenACC)
2130# 960 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2131!$acc parallel loop collapse(3) gang vector default(present) private(r)
2132# 960 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2133#elif defined(MFC_OpenMP)
2134# 960 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2135
2136# 960 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2137
2138# 960 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2139
2140# 960 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2141!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(3) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
2142# 960 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2143#endif
2144 do l = -buff_size, -1
2145 do k = -buff_size, n + buff_size
2146 do j = -buff_size, m + buff_size
2147 r = nvar + v_size*((j + buff_size) + (m + 2*buff_size + 1)*((k + buff_size) + (n &
2148 & + 2*buff_size + 1)*(l + buff_size)))
2149 q_t_sf%sf(j, k, l + unpack_offset) = real(buff_recv(r), kind=stp)
2150#if defined(__INTEL_COMPILER)
2151 if (ieee_is_nan(q_t_sf%sf(j, k, l + unpack_offset))) then
2152 print *, "Error", j, k, l
2153 call s_mpi_abort("NaN(s) in recv")
2154 end if
2155#endif
2156 end do
2157 end do
2158 end do
2159
2160# 976 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2161#if defined(MFC_OpenACC)
2162# 976 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2163!$acc end parallel loop
2164# 976 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2165#elif defined(MFC_OpenMP)
2166# 976 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2167
2168# 976 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2169!$omp end target teams loop
2170# 976 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2171#endif
2172 end if
2173
2174 if (qbmm_comm) then
2175
2176# 980 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2177
2178# 980 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2179#if defined(MFC_OpenACC)
2180# 980 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2181!$acc parallel loop collapse(5) gang vector default(present) private(r)
2182# 980 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2183#elif defined(MFC_OpenMP)
2184# 980 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2185
2186# 980 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2187
2188# 980 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2189
2190# 980 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2191!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(5) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
2192# 980 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2193#endif
2194 do i = nvar + 1, nvar + nnode
2195 do l = -buff_size, -1
2196 do k = -buff_size, n + buff_size
2197 do j = -buff_size, m + buff_size
2198 do q = 1, nb
2199 r = (i - 1) + (q - 1)*nnode + v_size*((j + buff_size) + (m + 2*buff_size + 1)*((k &
2200 & + buff_size) + (n + 2*buff_size + 1)*(l + buff_size)))
2201 pb_in(j, k, l + unpack_offset, i - nvar, q) = real(buff_recv(r), kind=stp)
2202 end do
2203 end do
2204 end do
2205 end do
2206 end do
2207
2208# 994 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2209#if defined(MFC_OpenACC)
2210# 994 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2211!$acc end parallel loop
2212# 994 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2213#elif defined(MFC_OpenMP)
2214# 994 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2215
2216# 994 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2217!$omp end target teams loop
2218# 994 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2219#endif
2220
2221
2222# 996 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2223
2224# 996 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2225#if defined(MFC_OpenACC)
2226# 996 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2227!$acc parallel loop collapse(5) gang vector default(present) private(r)
2228# 996 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2229#elif defined(MFC_OpenMP)
2230# 996 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2231
2232# 996 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2233
2234# 996 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2235
2236# 996 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2237!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(5) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) private(r)
2238# 996 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2239#endif
2240 do i = nvar + 1, nvar + nnode
2241 do l = -buff_size, -1
2242 do k = -buff_size, n + buff_size
2243 do j = -buff_size, m + buff_size
2244 do q = 1, nb
2245 r = (i - 1) + (q - 1)*nnode + nb*nnode + v_size*((j + buff_size) + (m + 2*buff_size &
2246 & + 1)*((k + buff_size) + (n + 2*buff_size + 1)*(l + buff_size)))
2247 mv_in(j, k, l + unpack_offset, i - nvar, q) = real(buff_recv(r), kind=stp)
2248 end do
2249 end do
2250 end do
2251 end do
2252 end do
2253
2254# 1010 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2255#if defined(MFC_OpenACC)
2256# 1010 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2257!$acc end parallel loop
2258# 1010 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2259#elif defined(MFC_OpenMP)
2260# 1010 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2261
2262# 1010 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2263!$omp end target teams loop
2264# 1010 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2265#endif
2266 end if
2267# 1013 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2268 end if
2269# 1015 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2270 call nvtxendrange
2271#endif
2272
2274
2275 !> Decompose the computational domain among processors by balancing cells per rank in each coordinate direction.
2277
2278#ifdef MFC_MPI
2279 integer :: num_procs_x, num_procs_y, num_procs_z !< Optimal number of processors in the x-, y- and z-directions
2280 !> Non-optimal number of processors in the x-, y- and z-directions
2281 real(wp) :: tmp_num_procs_x, tmp_num_procs_y, tmp_num_procs_z
2282 real(wp) :: fct_min !< Processor factorization (fct) minimization parameter
2283 integer :: MPI_COMM_CART !< Cartesian processor topology communicator
2284 integer :: rem_cells !< Remaining cells after distribution among processors
2285 integer :: recon_order !< WENO or MUSCL reconstruction order
2286 integer :: i, j !< Generic loop iterators
2287 integer :: ierr !< Generic flag used to identify and report MPI errors
2288
2289 if (recon_type == recon_type_weno) then
2290 recon_order = weno_order
2291 else
2292 recon_order = muscl_order
2293 end if
2294
2295 if (num_procs == 1 .and. parallel_io) then
2296 do i = 1, num_dims
2297 start_idx(i) = 0
2298 end do
2299 return
2300 end if
2301
2302 if (igr) then
2303 recon_order = igr_order
2304 end if
2305
2306 ! 3D Cartesian Processor Topology
2307 if (n > 0) then
2308 if (p > 0) then
2309 if (fft_wrt) then
2310 ! Initial estimate of optimal processor topology
2311 num_procs_x = 1
2312 num_procs_y = 1
2313 num_procs_z = num_procs
2314 ierr = -1
2315
2316 ! Benchmarking the quality of this initial guess
2317 tmp_num_procs_y = num_procs_y
2318 tmp_num_procs_z = num_procs_z
2319 fct_min = 10._wp*abs((n + 1)/tmp_num_procs_y - (p + 1)/tmp_num_procs_z)
2320
2321 ! Optimization of the initial processor topology
2322 do i = 1, num_procs
2323 if (mod(num_procs, i) == 0 .and. (n + 1)/i >= num_stcls_min*recon_order) then
2324 tmp_num_procs_y = i
2325 tmp_num_procs_z = num_procs/i
2326
2327 if (fct_min >= abs((n + 1)/tmp_num_procs_y - (p + 1)/tmp_num_procs_z) .and. (p + 1) &
2328 & /tmp_num_procs_z >= num_stcls_min*recon_order) then
2329 num_procs_y = i
2330 num_procs_z = num_procs/i
2331 fct_min = abs((n + 1)/tmp_num_procs_y - (p + 1)/tmp_num_procs_z)
2332 ierr = 0
2333 end if
2334 end if
2335 end do
2336 else
2337 if (cyl_coord .and. p > 0) then
2338 ! Pencil blocking for cylindrical coordinates (Fourier filter near axis)
2339
2340 ! Initial values of the processor factorization optimization
2341 num_procs_x = 1
2342 num_procs_y = num_procs
2343 num_procs_z = 1
2344 ierr = -1
2345
2346 ! Computing minimization variable for these initial values
2347 tmp_num_procs_x = num_procs_x
2348 tmp_num_procs_y = num_procs_y
2349 tmp_num_procs_z = num_procs_z
2350 fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x - (n + 1)/tmp_num_procs_y)
2351
2352 ! Searching for optimal computational domain distribution
2353 do i = 1, num_procs
2354 if (mod(num_procs, i) == 0 .and. (m + 1)/i >= num_stcls_min*recon_order) then
2355 tmp_num_procs_x = i
2356 tmp_num_procs_y = num_procs/i
2357
2358 if (fct_min >= abs((m + 1)/tmp_num_procs_x - (n + 1)/tmp_num_procs_y) .and. (n + 1) &
2359 & /tmp_num_procs_y >= num_stcls_min*recon_order) then
2360 num_procs_x = i
2361 num_procs_y = num_procs/i
2362 fct_min = abs((m + 1)/tmp_num_procs_x - (n + 1)/tmp_num_procs_y)
2363 ierr = 0
2364 end if
2365 end if
2366 end do
2367 else
2368 ! Initial estimate of optimal processor topology
2369 num_procs_x = 1
2370 num_procs_y = 1
2371 num_procs_z = num_procs
2372 ierr = -1
2373
2374 ! Benchmarking the quality of this initial guess
2375 tmp_num_procs_x = num_procs_x
2376 tmp_num_procs_y = num_procs_y
2377 tmp_num_procs_z = num_procs_z
2378 fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x - (n + 1)/tmp_num_procs_y) + 10._wp*abs((n + 1) &
2379 & /tmp_num_procs_y - (p + 1)/tmp_num_procs_z)
2380
2381 ! Optimization of the initial processor topology
2382 do i = 1, num_procs
2383 if (mod(num_procs, i) == 0 .and. (m + 1)/i >= num_stcls_min*recon_order) then
2384 do j = 1, num_procs/i
2385 if (mod(num_procs/i, j) == 0 .and. (n + 1)/j >= num_stcls_min*recon_order) then
2386 tmp_num_procs_x = i
2387 tmp_num_procs_y = j
2388 tmp_num_procs_z = num_procs/(i*j)
2389
2390 if (fct_min >= abs((m + 1)/tmp_num_procs_x - (n + 1)/tmp_num_procs_y) + abs((n + 1) &
2391 & /tmp_num_procs_y - (p + 1)/tmp_num_procs_z) .and. (p + 1) &
2392 & /tmp_num_procs_z >= num_stcls_min*recon_order) then
2393 num_procs_x = i
2394 num_procs_y = j
2395 num_procs_z = num_procs/(i*j)
2396 fct_min = abs((m + 1)/tmp_num_procs_x - (n + 1)/tmp_num_procs_y) + abs((n + 1) &
2397 & /tmp_num_procs_y - (p + 1)/tmp_num_procs_z)
2398 ierr = 0
2399 end if
2400 end if
2401 end do
2402 end if
2403 end do
2404 end if
2405 end if
2406
2407 ! Verifying that a valid decomposition of the computational domain has been established. If not, the simulation
2408 ! exits.
2409 if (proc_rank == 0 .and. ierr == -1) then
2410 call s_mpi_abort('Unsupported combination of values ' // 'of num_procs, m, n, p and ' &
2411 & // 'weno/muscl/igr_order. Exiting.')
2412 end if
2413
2414 ! Creating new communicator using the Cartesian topology
2415 call mpi_cart_create(mpi_comm_world, 3, (/num_procs_x, num_procs_y, num_procs_z/), (/.true., .true., .true./), &
2416 & .false., mpi_comm_cart, ierr)
2417
2418 ! Finding the Cartesian coordinates of the local process
2419 call mpi_cart_coords(mpi_comm_cart, proc_rank, 3, proc_coords, ierr)
2420
2421 ! Global Parameters for z-direction
2422
2423 ! Number of remaining cells
2424 rem_cells = mod(p + 1, num_procs_z)
2425
2426 ! Optimal number of cells per processor
2427 p = (p + 1)/num_procs_z - 1
2428
2429 ! Distributing the remaining cells
2430 do i = 1, rem_cells
2431 if (proc_coords(3) == i - 1) then
2432 p = p + 1; exit
2433 end if
2434 end do
2435
2436 ! Boundary condition at the beginning
2437 if (proc_coords(3) > 0 .or. (bc_z%beg == bc_periodic .and. num_procs_z > 1)) then
2438 proc_coords(3) = proc_coords(3) - 1
2439 call mpi_cart_rank(mpi_comm_cart, proc_coords, bc_z%beg, ierr)
2440 proc_coords(3) = proc_coords(3) + 1
2441 end if
2442
2443 ! Boundary condition at the end
2444 if (proc_coords(3) < num_procs_z - 1 .or. (bc_z%end == bc_periodic .and. num_procs_z > 1)) then
2445 proc_coords(3) = proc_coords(3) + 1
2446 call mpi_cart_rank(mpi_comm_cart, proc_coords, bc_z%end, ierr)
2447 proc_coords(3) = proc_coords(3) - 1
2448 end if
2449
2450#ifdef MFC_POST_PROCESS
2451 ! Ghost zone at the beginning
2452 if (proc_coords(3) > 0 .and. format == format_silo) then
2453 offset_z%beg = 2
2454 else
2455 offset_z%beg = 0
2456 end if
2457
2458 ! Ghost zone at the end
2459 if (proc_coords(3) < num_procs_z - 1 .and. format == format_silo) then
2460 offset_z%end = 2
2461 else
2462 offset_z%end = 0
2463 end if
2464#endif
2465
2466 ! Beginning and end sub-domain boundary locations
2467 if (parallel_io) then
2468 if (proc_coords(3) < rem_cells) then
2469 start_idx(3) = (p + 1)*proc_coords(3)
2470 else
2471 start_idx(3) = (p + 1)*proc_coords(3) + rem_cells
2472 end if
2473 else
2474#ifdef MFC_PRE_PROCESS
2475 if (old_grid .neqv. .true.) then
2476 dz = (z_domain%end - z_domain%beg)/real(p_glb + 1, wp)
2477
2478 if (proc_coords(3) < rem_cells) then
2479 z_domain%beg = z_domain%beg + dz*real((p + 1)*proc_coords(3))
2480 z_domain%end = z_domain%end - dz*real((p + 1)*(num_procs_z - proc_coords(3) - 1) - (num_procs_z &
2481 & - rem_cells))
2482 else
2483 z_domain%beg = z_domain%beg + dz*real((p + 1)*proc_coords(3) + rem_cells)
2484 z_domain%end = z_domain%end - dz*real((p + 1)*(num_procs_z - proc_coords(3) - 1))
2485 end if
2486 end if
2487#endif
2488 end if
2489
2490 ! 2D Cartesian Processor Topology
2491 else
2492 ! Initial estimate of optimal processor topology
2493 num_procs_x = 1
2494 num_procs_y = num_procs
2495 ierr = -1
2496
2497 ! Benchmarking the quality of this initial guess
2498 tmp_num_procs_x = num_procs_x
2499 tmp_num_procs_y = num_procs_y
2500 fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x - (n + 1)/tmp_num_procs_y)
2501
2502 ! Optimization of the initial processor topology
2503 do i = 1, num_procs
2504 if (mod(num_procs, i) == 0 .and. (m + 1)/i >= num_stcls_min*recon_order) then
2505 tmp_num_procs_x = i
2506 tmp_num_procs_y = num_procs/i
2507
2508 if (fct_min >= abs((m + 1)/tmp_num_procs_x - (n + 1)/tmp_num_procs_y) .and. (n + 1) &
2509 & /tmp_num_procs_y >= num_stcls_min*recon_order) then
2510 num_procs_x = i
2511 num_procs_y = num_procs/i
2512 fct_min = abs((m + 1)/tmp_num_procs_x - (n + 1)/tmp_num_procs_y)
2513 ierr = 0
2514 end if
2515 end if
2516 end do
2517
2518 ! Verifying that a valid decomposition of the computational domain has been established. If not, the simulation
2519 ! exits.
2520 if (proc_rank == 0 .and. ierr == -1) then
2521 call s_mpi_abort('Unsupported combination of values ' // 'of num_procs, m, n and ' &
2522 & // 'weno/muscl/igr_order. Exiting.')
2523 end if
2524
2525 ! Creating new communicator using the Cartesian topology
2526 call mpi_cart_create(mpi_comm_world, 2, (/num_procs_x, num_procs_y/), (/.true., .true./), .false., mpi_comm_cart, &
2527 & ierr)
2528
2529 ! Finding the Cartesian coordinates of the local process
2530 call mpi_cart_coords(mpi_comm_cart, proc_rank, 2, proc_coords, ierr)
2531 end if
2532
2533 ! Global Parameters for y-direction
2534
2535 ! Number of remaining cells
2536 rem_cells = mod(n + 1, num_procs_y)
2537
2538 ! Optimal number of cells per processor
2539 n = (n + 1)/num_procs_y - 1
2540
2541 ! Distributing the remaining cells
2542 do i = 1, rem_cells
2543 if (proc_coords(2) == i - 1) then
2544 n = n + 1; exit
2545 end if
2546 end do
2547
2548 ! Boundary condition at the beginning
2549 if (proc_coords(2) > 0 .or. (bc_y%beg == bc_periodic .and. num_procs_y > 1)) then
2550 proc_coords(2) = proc_coords(2) - 1
2551 call mpi_cart_rank(mpi_comm_cart, proc_coords, bc_y%beg, ierr)
2552 proc_coords(2) = proc_coords(2) + 1
2553 end if
2554
2555 ! Boundary condition at the end
2556 if (proc_coords(2) < num_procs_y - 1 .or. (bc_y%end == bc_periodic .and. num_procs_y > 1)) then
2557 proc_coords(2) = proc_coords(2) + 1
2558 call mpi_cart_rank(mpi_comm_cart, proc_coords, bc_y%end, ierr)
2559 proc_coords(2) = proc_coords(2) - 1
2560 end if
2561
2562#ifdef MFC_POST_PROCESS
2563 ! Ghost zone at the beginning
2564 if (proc_coords(2) > 0 .and. format == format_silo) then
2565 offset_y%beg = 2
2566 else
2567 offset_y%beg = 0
2568 end if
2569
2570 ! Ghost zone at the end
2571 if (proc_coords(2) < num_procs_y - 1 .and. format == format_silo) then
2572 offset_y%end = 2
2573 else
2574 offset_y%end = 0
2575 end if
2576#endif
2577
2578 ! Beginning and end sub-domain boundary locations
2579 if (parallel_io) then
2580 if (proc_coords(2) < rem_cells) then
2581 start_idx(2) = (n + 1)*proc_coords(2)
2582 else
2583 start_idx(2) = (n + 1)*proc_coords(2) + rem_cells
2584 end if
2585 else
2586#ifdef MFC_PRE_PROCESS
2587 if (old_grid .neqv. .true.) then
2588 dy = (y_domain%end - y_domain%beg)/real(n_glb + 1, wp)
2589
2590 if (proc_coords(2) < rem_cells) then
2591 y_domain%beg = y_domain%beg + dy*real((n + 1)*proc_coords(2))
2592 y_domain%end = y_domain%end - dy*real((n + 1)*(num_procs_y - proc_coords(2) - 1) - (num_procs_y &
2593 & - rem_cells))
2594 else
2595 y_domain%beg = y_domain%beg + dy*real((n + 1)*proc_coords(2) + rem_cells)
2596 y_domain%end = y_domain%end - dy*real((n + 1)*(num_procs_y - proc_coords(2) - 1))
2597 end if
2598 end if
2599#endif
2600 end if
2601
2602 ! 1D Cartesian Processor Topology
2603 else
2604 ! Optimal processor topology
2605 num_procs_x = num_procs
2606
2607 ! Creating new communicator using the Cartesian topology
2608 call mpi_cart_create(mpi_comm_world, 1, (/num_procs_x/), (/.true./), .false., mpi_comm_cart, ierr)
2609
2610 ! Finding the Cartesian coordinates of the local process
2611 call mpi_cart_coords(mpi_comm_cart, proc_rank, 1, proc_coords, ierr)
2612 end if
2613
2614 ! Global Parameters for x-direction
2615
2616 ! Number of remaining cells
2617 rem_cells = mod(m + 1, num_procs_x)
2618
2619 ! Optimal number of cells per processor
2620 m = (m + 1)/num_procs_x - 1
2621
2622 ! Distributing the remaining cells
2623 do i = 1, rem_cells
2624 if (proc_coords(1) == i - 1) then
2625 m = m + 1; exit
2626 end if
2627 end do
2628
2629 call s_update_cell_bounds(cells_bounds, m, n, p)
2630
2631 ! Boundary condition at the beginning
2632 if (proc_coords(1) > 0 .or. (bc_x%beg == bc_periodic .and. num_procs_x > 1)) then
2633 proc_coords(1) = proc_coords(1) - 1
2634 call mpi_cart_rank(mpi_comm_cart, proc_coords, bc_x%beg, ierr)
2635 proc_coords(1) = proc_coords(1) + 1
2636 end if
2637
2638 ! Boundary condition at the end
2639 if (proc_coords(1) < num_procs_x - 1 .or. (bc_x%end == bc_periodic .and. num_procs_x > 1)) then
2640 proc_coords(1) = proc_coords(1) + 1
2641 call mpi_cart_rank(mpi_comm_cart, proc_coords, bc_x%end, ierr)
2642 proc_coords(1) = proc_coords(1) - 1
2643 end if
2644
2645#ifdef MFC_POST_PROCESS
2646 ! Ghost zone at the beginning
2647 if (proc_coords(1) > 0 .and. format == format_silo) then
2648 offset_x%beg = 2
2649 else
2650 offset_x%beg = 0
2651 end if
2652
2653 ! Ghost zone at the end
2654 if (proc_coords(1) < num_procs_x - 1 .and. format == format_silo) then
2655 offset_x%end = 2
2656 else
2657 offset_x%end = 0
2658 end if
2659#endif
2660
2661 ! Beginning and end sub-domain boundary locations
2662 if (parallel_io) then
2663 if (proc_coords(1) < rem_cells) then
2664 start_idx(1) = (m + 1)*proc_coords(1)
2665 else
2666 start_idx(1) = (m + 1)*proc_coords(1) + rem_cells
2667 end if
2668 else
2669#ifdef MFC_PRE_PROCESS
2670 if (old_grid .neqv. .true.) then
2671 dx = (x_domain%end - x_domain%beg)/real(m_glb + 1, wp)
2672
2673 if (proc_coords(1) < rem_cells) then
2674 x_domain%beg = x_domain%beg + dx*real((m + 1)*proc_coords(1))
2675 x_domain%end = x_domain%end - dx*real((m + 1)*(num_procs_x - proc_coords(1) - 1) - (num_procs_x - rem_cells))
2676 else
2677 x_domain%beg = x_domain%beg + dx*real((m + 1)*proc_coords(1) + rem_cells)
2678 x_domain%end = x_domain%end - dx*real((m + 1)*(num_procs_x - proc_coords(1) - 1))
2679 end if
2680 end if
2681#endif
2682 end if
2683#endif
2684
2686
2687 !> The goal of this procedure is to populate the buffers of the grid variables by communicating with the neighboring processors.
2688 !! Note that only the buffers of the cell-width distributions are handled in such a way. This is because the buffers of
2689 !! cell-boundary locations may be calculated directly from those of the cell-width distributions.
2690#ifndef MFC_PRE_PROCESS
2691 subroutine s_mpi_sendrecv_grid_variables_buffers(mpi_dir, pbc_loc)
2692
2693 integer, intent(in) :: mpi_dir
2694 integer, intent(in) :: pbc_loc
2695
2696#ifdef MFC_MPI
2697 integer :: ierr !< Generic flag used to identify and report MPI errors
2698
2699 if (mpi_dir == 1) then
2700 if (pbc_loc == -1) then ! PBC at the beginning
2701 if (bc_x%end >= 0) then ! PBC at the beginning and end
2702 call mpi_sendrecv(dx(m - buff_size + 1), buff_size, mpi_p, bc_x%end, 0, dx(-buff_size), buff_size, mpi_p, &
2703 & bc_x%beg, 0, mpi_comm_world, mpi_status_ignore, ierr)
2704 else ! PBC at the beginning only
2705 call mpi_sendrecv(dx(0), buff_size, mpi_p, bc_x%beg, 1, dx(-buff_size), buff_size, mpi_p, bc_x%beg, 0, &
2706 & mpi_comm_world, mpi_status_ignore, ierr)
2707 end if
2708 else ! PBC at the end
2709 if (bc_x%beg >= 0) then ! PBC at the end and beginning
2710 call mpi_sendrecv(dx(0), buff_size, mpi_p, bc_x%beg, 1, dx(m + 1), buff_size, mpi_p, bc_x%end, 1, &
2711 & mpi_comm_world, mpi_status_ignore, ierr)
2712 else ! PBC at the end only
2713 call mpi_sendrecv(dx(m - buff_size + 1), buff_size, mpi_p, bc_x%end, 0, dx(m + 1), buff_size, mpi_p, &
2714 & bc_x%end, 1, mpi_comm_world, mpi_status_ignore, ierr)
2715 end if
2716 end if
2717 else if (mpi_dir == 2) then
2718 if (pbc_loc == -1) then ! PBC at the beginning
2719 if (bc_y%end >= 0) then ! PBC at the beginning and end
2720 call mpi_sendrecv(dy(n - buff_size + 1), buff_size, mpi_p, bc_y%end, 0, dy(-buff_size), buff_size, mpi_p, &
2721 & bc_y%beg, 0, mpi_comm_world, mpi_status_ignore, ierr)
2722 else ! PBC at the beginning only
2723 call mpi_sendrecv(dy(0), buff_size, mpi_p, bc_y%beg, 1, dy(-buff_size), buff_size, mpi_p, bc_y%beg, 0, &
2724 & mpi_comm_world, mpi_status_ignore, ierr)
2725 end if
2726 else ! PBC at the end
2727 if (bc_y%beg >= 0) then ! PBC at the end and beginning
2728 call mpi_sendrecv(dy(0), buff_size, mpi_p, bc_y%beg, 1, dy(n + 1), buff_size, mpi_p, bc_y%end, 1, &
2729 & mpi_comm_world, mpi_status_ignore, ierr)
2730 else ! PBC at the end only
2731 call mpi_sendrecv(dy(n - buff_size + 1), buff_size, mpi_p, bc_y%end, 0, dy(n + 1), buff_size, mpi_p, &
2732 & bc_y%end, 1, mpi_comm_world, mpi_status_ignore, ierr)
2733 end if
2734 end if
2735 else
2736 if (pbc_loc == -1) then ! PBC at the beginning
2737 if (bc_z%end >= 0) then ! PBC at the beginning and end
2738 call mpi_sendrecv(dz(p - buff_size + 1), buff_size, mpi_p, bc_z%end, 0, dz(-buff_size), buff_size, mpi_p, &
2739 & bc_z%beg, 0, mpi_comm_world, mpi_status_ignore, ierr)
2740 else ! PBC at the beginning only
2741 call mpi_sendrecv(dz(0), buff_size, mpi_p, bc_z%beg, 1, dz(-buff_size), buff_size, mpi_p, bc_z%beg, 0, &
2742 & mpi_comm_world, mpi_status_ignore, ierr)
2743 end if
2744 else ! PBC at the end
2745 if (bc_z%beg >= 0) then ! PBC at the end and beginning
2746 call mpi_sendrecv(dz(0), buff_size, mpi_p, bc_z%beg, 1, dz(p + 1), buff_size, mpi_p, bc_z%end, 1, &
2747 & mpi_comm_world, mpi_status_ignore, ierr)
2748 else ! PBC at the end only
2749 call mpi_sendrecv(dz(p - buff_size + 1), buff_size, mpi_p, bc_z%end, 0, dz(p + 1), buff_size, mpi_p, &
2750 & bc_z%end, 1, mpi_comm_world, mpi_status_ignore, ierr)
2751 end if
2752 end if
2753 end if
2754#endif
2755
2757#endif
2758
2759 !> Module deallocation and/or disassociation procedures
2761
2762#ifdef MFC_MPI
2763#ifndef __NVCOMPILER_GPU_UNIFIED_MEM
2764#ifdef MFC_DEBUG
2765# 1509 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2766 block
2767# 1509 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2768 use iso_fortran_env, only: output_unit
2769# 1509 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2770
2771# 1509 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2772 print *, 'm_mpi_common.fpp:1509: ', '@:DEALLOCATE(buff_send, buff_recv)'
2773# 1509 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2774
2775# 1509 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2776 call flush (output_unit)
2777# 1509 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2778 end block
2779# 1509 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2780#endif
2781# 1509 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2782
2783# 1509 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2784#if defined(MFC_OpenACC)
2785# 1509 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2786!$acc exit data delete(buff_send, buff_recv)
2787# 1509 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2788#elif defined(MFC_OpenMP)
2789# 1509 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2790!$omp target exit data map(release:buff_send, buff_recv)
2791# 1509 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2792#endif
2793# 1509 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2794 deallocate (buff_send, buff_recv)
2795#else
2796
2797# 1511 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2798#if defined(MFC_OpenACC)
2799# 1511 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2800!$acc exit data delete(buff_send, buff_recv)
2801# 1511 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2802#elif defined(MFC_OpenMP)
2803# 1511 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2804!$omp target exit data map(release:buff_send, buff_recv)
2805# 1511 "/home/runner/work/MFC/MFC/src/common/m_mpi_common.fpp"
2806#endif
2807 deallocate (buff_send, buff_recv)
2808#endif
2809#endif
2810
2811 end subroutine s_finalize_mpi_common_module
2812
2813end module m_mpi_common
type(scalar_field), dimension(sys_size), intent(inout) q_cons_vf
integer, intent(in) j
Compile-time constant parameters: default values, tolerances, and physical constants.
integer, parameter format_silo
integer, parameter nnode
Number of QBMM nodes.
integer, parameter recon_type_weno
Shared derived types for field data, patch geometry, bubble dynamics, and MPI I/O structures.
Global parameters for the computational domain, fluid properties, and simulation algorithm configurat...
integer buff_size
Number of ghost cells for boundary condition storage.
type(cell_num_bounds) cells_bounds
Utility routines for bubble model setup, coordinate transforms, array sampling, and special functions...
MPI communication layer: domain decomposition, halo exchange, reductions, and parallel I/O setup.
impure subroutine s_mpi_abort(prnt, code)
The subroutine terminates the MPI execution environment.
impure subroutine s_initialize_mpi_common_module
Initialize the module.
impure subroutine s_mpi_gather_data(my_vector, counts, gathered_vector, root)
Gather variable-length real vectors from all MPI ranks onto the root process.
impure subroutine s_mpi_barrier
Halts all processes until all have reached barrier.
impure subroutine s_mpi_initialize
Initialize the MPI execution environment and query the number of processors and local rank.
impure subroutine s_mpi_allreduce_vectors_sum(var_loc, var_glb, num_vectors, vector_length)
Reduce an array of vectors to their global sums across all MPI ranks.
real(wp), dimension(:), allocatable, private buff_recv
Primitive variable receive buffer for halo exchange.
impure subroutine s_initialize_mpi_data(q_cons_vf, ib_markers, beta)
Set up MPI I/O data views and variable pointers for parallel file output.
impure subroutine s_mpi_reduce_maxloc(var_loc)
Reduce a 2-element variable to its global maximum value with the owning processor rank (MPI_MAXLOC)....
subroutine s_mpi_sendrecv_grid_variables_buffers(mpi_dir, pbc_loc)
The goal of this procedure is to populate the buffers of the grid variables by communicating with the...
impure subroutine s_mpi_reduce_stability_criteria_extrema(icfl_max_loc, vcfl_max_loc, rc_min_loc, icfl_max_glb, vcfl_max_glb, rc_min_glb)
The goal of this subroutine is to determine the global extrema of the stability criteria in the compu...
impure subroutine s_mpi_allreduce_sum(var_loc, var_glb)
Reduce a local real value to its global sum across all MPI ranks.
real(wp), dimension(:), allocatable, private buff_send
Primitive variable send buffer for halo exchange.
impure subroutine s_mpi_allreduce_min(var_loc, var_glb)
Reduce a local real value to its global minimum across all MPI ranks.
impure subroutine s_prohibit_abort(condition, message)
Print a case file error with the prohibited condition and message, then abort execution.
impure subroutine s_mpi_finalize
The subroutine finalizes the MPI execution environment.
subroutine s_initialize_mpi_data_ds(q_cons_vf)
Set up MPI I/O data views for downsampled (coarsened) parallel file output.
impure subroutine s_mpi_allreduce_max(var_loc, var_glb)
Reduce a local real value to its global maximum across all MPI ranks.
impure subroutine s_mpi_allreduce_integer_sum(var_loc, var_glb)
Reduce a local integer value to its global sum across all MPI ranks.
subroutine s_mpi_sendrecv_variables_buffers(q_comm, mpi_dir, pbc_loc, nvar, pb_in, mv_in, q_t_sf)
The goal of this procedure is to populate the buffers of the cell-average conservative variables by c...
integer, private v_size
impure subroutine mpi_bcast_time_step_values(proc_time, time_avg)
Gather per-rank time step wall-clock times onto rank 0 for performance reporting.
impure subroutine s_mpi_reduce_min(var_loc)
Reduce a local real value to its global minimum across all ranks.
impure subroutine s_finalize_mpi_common_module
Module deallocation and/or disassociation procedures.
integer(kind=8) halo_size
subroutine s_mpi_decompose_computational_domain
Decompose the computational domain among processors by balancing cells per rank in each coordinate di...
NVIDIA NVTX profiling API bindings for GPU performance instrumentation.
Definition m_nvtx.f90:6