MFC
Exascale flow solver
Loading...
Searching...
No Matches
m_fftw.fpp.f90
Go to the documentation of this file.
1# 1 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
2!>
3!! @file
4!! @brief Contains module m_fftw
5
6# 1 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp" 1
7# 1 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp" 1
8# 1 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp" 1
9# 2 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
10# 3 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
11# 4 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
12# 5 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
13# 6 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
14
15# 8 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
16# 9 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
17# 10 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
18
19# 17 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
20
21# 46 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
22
23# 58 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
24
25# 68 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
26
27# 98 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
28
29# 110 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
30
31# 120 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
32! New line at end of file is required for FYPP
33# 2 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp" 2
34# 1 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp" 1
35# 1 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp" 1
36# 2 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
37# 3 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
38# 4 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
39# 5 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
40# 6 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
41
42# 8 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
43# 9 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
44# 10 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
45
46# 17 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
47
48# 46 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
49
50# 58 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
51
52# 68 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
53
54# 98 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
55
56# 110 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
57
58# 120 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
59! New line at end of file is required for FYPP
60# 2 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp" 2
61
62# 4 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
63# 5 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
64# 6 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
65# 7 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
66# 8 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
67
68# 20 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
69
70# 43 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
71
72# 48 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
73
74# 53 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
75
76# 58 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
77
78# 63 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
79
80# 68 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
81
82# 76 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
83
84# 81 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
85
86# 86 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
87
88# 91 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
89
90# 96 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
91
92# 101 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
93
94# 106 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
95
96# 111 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
97
98# 116 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
99
100# 121 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
101
102# 151 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
103
104# 192 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
105
106# 206 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
107
108# 231 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
109
110# 242 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
111
112# 244 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
113# 255 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
114
115# 284 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
116
117# 294 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
118
119# 304 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
120
121# 313 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
122
123# 330 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
124
125# 340 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
126
127# 347 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
128
129# 353 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
130
131# 359 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
132
133# 365 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
134
135# 371 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
136
137# 377 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
138! New line at end of file is required for FYPP
139# 3 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp" 2
140# 1 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp" 1
141# 1 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp" 1
142# 2 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
143# 3 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
144# 4 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
145# 5 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
146# 6 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
147
148# 8 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
149# 9 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
150# 10 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
151
152# 17 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
153
154# 46 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
155
156# 58 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
157
158# 68 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
159
160# 98 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
161
162# 110 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
163
164# 120 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
165! New line at end of file is required for FYPP
166# 2 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp" 2
167
168# 7 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
169
170# 17 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
171
172# 22 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
173
174# 27 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
175
176# 32 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
177
178# 37 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
179
180# 42 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
181
182# 47 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
183
184# 52 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
185
186# 57 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
187
188# 62 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
189
190# 73 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
191
192# 78 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
193
194# 83 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
195
196# 88 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
197
198# 103 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
199
200# 131 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
201
202# 160 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
203
204# 175 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
205
206# 193 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
207
208# 215 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
209
210# 244 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
211
212# 259 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
213
214# 269 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
215
216# 278 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
217
218# 294 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
219
220# 304 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
221
222# 311 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
223! New line at end of file is required for FYPP
224# 4 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp" 2
225
226! GPU parallel region (scalar reductions, maxval/minval)
227# 23 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
228
229! GPU parallel loop over threads (most common GPU macro)
230# 43 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
231
232! Required closing for GPU_PARALLEL_LOOP
233# 55 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
234
235! Mark routine for device compilation
236# 112 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
237
238! Declare device-resident data
239# 130 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
240
241! Inner loop within a GPU parallel region
242# 145 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
243
244! Scoped GPU data region
245# 164 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
246
247! Host code with device pointers (for MPI with GPU buffers)
248# 193 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
249
250! Allocate device memory (unscoped)
251# 207 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
252
253! Free device memory
254# 219 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
255
256! Atomic operation on device
257# 231 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
258
259! End atomic capture block
260# 242 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
261
262! Copy data between host and device
263# 254 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
264
265! Synchronization barrier
266# 266 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
267
268! Import GPU library module (openacc or omp_lib)
269# 275 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
270
271! Emit code only for AMD compiler
272# 282 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
273
274! Emit code for non-Cray compilers
275# 289 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
276
277! Emit code only for Cray compiler
278# 296 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
279
280! Emit code for non-NVIDIA compilers
281# 303 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
282
283# 305 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
284# 306 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
285! New line at end of file is required for FYPP
286# 2 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp" 2
287
288# 14 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
289
290! Caution: This macro requires the use of a binding script to set CUDA_VISIBLE_DEVICES, such that we have one GPU device per MPI
291! rank. That's because for both cudaMemAdvise (preferred location) and cudaMemPrefetchAsync we use location = device_id = 0. For an
292! example see misc/nvidia_uvm/bind.sh. NVIDIA unified memory page placement hint
293# 57 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
294
295! Allocate and create GPU device memory
296# 77 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
297
298! Free GPU device memory and deallocate
299# 85 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
300
301! Cray-specific GPU pointer setup for vector fields
302# 109 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
303
304! Cray-specific GPU pointer setup for scalar fields
305# 125 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
306
307! Cray-specific GPU pointer setup for acoustic source spatials
308# 150 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
309
310# 156 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
311
312# 163 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
313! New line at end of file is required for FYPP
314# 6 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp" 2
315
316!> @brief Forward and inverse FFT wrappers (FFTW/cuFFT/hipFFT) for azimuthal Fourier filtering in cylindrical geometries
317module m_fftw
318
319 use, intrinsic :: iso_c_binding
320
323 use m_mpi_proxy
324#if defined(MFC_GPU) && defined(__PGI)
325 use cufft
326#elif defined(MFC_GPU)
327 use hipfort
328 use hipfort_check
329 use hipfort_hipfft
330#endif
331
332 implicit none
333
335
336#if !defined(MFC_GPU)
337 include 'fftw3.f03'
338#endif
339
340 type(c_ptr) :: fwd_plan, bwd_plan
343 real(c_double), pointer :: data_real(:) !< Real data
344 complex(c_double_complex), pointer :: data_cmplx(:) !< Complex data in Fourier space
345 complex(c_double_complex), pointer :: data_fltr_cmplx(:) !< Filtered complex data in Fourier space
346#if defined(MFC_GPU)
347
348# 38 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
349#if defined(MFC_OpenACC)
350# 38 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
351!$acc declare create(real_size, cmplx_size, x_size, batch_size, Nfq)
352# 38 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
353#elif defined(MFC_OpenMP)
354# 38 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
355!$omp declare target (real_size, cmplx_size, x_size, batch_size, Nfq)
356# 38 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
357#endif
358
359 real(dp), allocatable, target :: data_real_gpu(:)
360 complex(dp), allocatable, target :: data_cmplx_gpu(:)
361 complex(dp), allocatable, target :: data_fltr_cmplx_gpu(:)
362
363# 43 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
364#if defined(MFC_OpenACC)
365# 43 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
366!$acc declare create(data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu)
367# 43 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
368#elif defined(MFC_OpenMP)
369# 43 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
370!$omp declare target (data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu)
371# 43 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
372#endif
373
374 !> @cond
375#if defined(__PGI)
376 integer :: fwd_plan_gpu, bwd_plan_gpu
377#else
378 !> @endcond
380 !> @cond
381#endif
382 !> @endcond
383
384 integer, allocatable :: gpu_fft_size(:), iembed(:), oembed(:)
386#endif
387
388contains
389
390 !> Initialize the FFTW module
391 impure subroutine s_initialize_fftw_module
392
393 integer :: ierr !< Generic flag used to identify and report GPU errors
394 ! Size of input array going into DFT
395
396 real_size = p + 1
397 ! Size of output array coming out of DFT
398 cmplx_size = (p + 1)/2 + 1
399
400 x_size = m + 1
402
403#if defined(MFC_GPU)
404 rank = 1; istride = 1; ostride = 1
405 allocate (gpu_fft_size(1:rank), iembed(1:rank), oembed(1:rank))
406
408 iembed(1) = 0
409 oembed(1) = 0
410
411# 81 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
412#if defined(MFC_OpenACC)
413# 81 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
414!$acc enter data copyin(real_size, cmplx_size, x_size, sys_size, batch_size, Nfq)
415# 81 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
416#elif defined(MFC_OpenMP)
417# 81 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
418!$omp target enter data map(to:real_size, cmplx_size, x_size, sys_size, batch_size, Nfq)
419# 81 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
420#endif
421
422# 82 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
423#if defined(MFC_OpenACC)
424# 82 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
425!$acc update device(real_size, cmplx_size, x_size, sys_size, batch_size)
426# 82 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
427#elif defined(MFC_OpenMP)
428# 82 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
429!$omp target update to(real_size, cmplx_size, x_size, sys_size, batch_size)
430# 82 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
431#endif
432#else
433 ! Allocate input and output DFT data sizes
434 fftw_real_data = fftw_alloc_real(int(real_size, c_size_t))
435 fftw_cmplx_data = fftw_alloc_complex(int(cmplx_size, c_size_t))
436 fftw_fltr_cmplx_data = fftw_alloc_complex(int(cmplx_size, c_size_t))
437 ! Associate input and output data pointers with allocated memory
438 call c_f_pointer(fftw_real_data, data_real, [real_size])
439 call c_f_pointer(fftw_cmplx_data, data_cmplx, [cmplx_size])
441
442 ! Generate plans for forward and backward DFTs
443 fwd_plan = fftw_plan_dft_r2c_1d(real_size, data_real, data_cmplx, fftw_estimate)
444 bwd_plan = fftw_plan_dft_c2r_1d(real_size, data_fltr_cmplx, data_real, fftw_estimate)
445#endif
446
447#if defined(MFC_GPU)
448#ifdef MFC_DEBUG
449# 99 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
450 block
451# 99 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
452 use iso_fortran_env, only: output_unit
453# 99 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
454
455# 99 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
456 print *, 'm_fftw.fpp:99: ', '@:ALLOCATE(data_real_gpu(1:real_size*x_size*sys_size))'
457# 99 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
458
459# 99 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
460 call flush (output_unit)
461# 99 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
462 end block
463# 99 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
464#endif
465# 99 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
467# 99 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
468
469# 99 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
470
471# 99 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
472#if defined(MFC_OpenACC)
473# 99 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
474!$acc enter data create(data_real_gpu)
475# 99 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
476#elif defined(MFC_OpenMP)
477# 99 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
478!$omp target enter data map(always,alloc:data_real_gpu)
479# 99 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
480#endif
481#ifdef MFC_DEBUG
482# 100 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
483 block
484# 100 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
485 use iso_fortran_env, only: output_unit
486# 100 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
487
488# 100 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
489 print *, 'm_fftw.fpp:100: ', '@:ALLOCATE(data_cmplx_gpu(1:cmplx_size*x_size*sys_size))'
490# 100 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
491
492# 100 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
493 call flush (output_unit)
494# 100 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
495 end block
496# 100 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
497#endif
498# 100 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
499 allocate (data_cmplx_gpu(1:cmplx_size*x_size*sys_size))
500# 100 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
501
502# 100 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
503
504# 100 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
505#if defined(MFC_OpenACC)
506# 100 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
507!$acc enter data create(data_cmplx_gpu)
508# 100 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
509#elif defined(MFC_OpenMP)
510# 100 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
511!$omp target enter data map(always,alloc:data_cmplx_gpu)
512# 100 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
513#endif
514#ifdef MFC_DEBUG
515# 101 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
516 block
517# 101 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
518 use iso_fortran_env, only: output_unit
519# 101 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
520
521# 101 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
522 print *, 'm_fftw.fpp:101: ', '@:ALLOCATE(data_fltr_cmplx_gpu(1:cmplx_size*x_size*sys_size))'
523# 101 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
524
525# 101 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
526 call flush (output_unit)
527# 101 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
528 end block
529# 101 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
530#endif
531# 101 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
532 allocate (data_fltr_cmplx_gpu(1:cmplx_size*x_size*sys_size))
533# 101 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
534
535# 101 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
536
537# 101 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
538#if defined(MFC_OpenACC)
539# 101 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
540!$acc enter data create(data_fltr_cmplx_gpu)
541# 101 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
542#elif defined(MFC_OpenMP)
543# 101 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
544!$omp target enter data map(always,alloc:data_fltr_cmplx_gpu)
545# 101 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
546#endif
547
548#if defined(__PGI)
550 & cufft_d2z, batch_size)
552 & cufft_z2d, batch_size)
553#else
555 & hipfft_d2z, batch_size)
557 & hipfft_z2d, batch_size)
558#endif
559#endif
560
561 end subroutine s_initialize_fftw_module
562
563 !> Apply a Fourier low-pass filter in the azimuthal direction to remove high-frequency content
564 impure subroutine s_apply_fourier_filter(q_cons_vf)
565
566 type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf
567 integer :: i, j, k, l !< Generic loop iterators
568 integer :: ierr !< Generic flag used to identify and report GPU errors
569 ! Restrict filter to processors that have cells adjacent to axis
570
571 if (bc_y%beg >= 0) return
572#if defined(MFC_GPU)
573
574# 128 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
575
576# 128 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
577#if defined(MFC_OpenACC)
578# 128 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
579!$acc parallel loop collapse(3) gang vector default(present)
580# 128 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
581#elif defined(MFC_OpenMP)
582# 128 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
583
584# 128 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
585
586# 128 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
587
588# 128 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
589!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(3) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer)
590# 128 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
591#endif
592 do k = 1, sys_size
593 do j = 0, m
594 do l = 1, cmplx_size
595 data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp)
596 end do
597 end do
598 end do
599
600# 136 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
601#if defined(MFC_OpenACC)
602# 136 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
603!$acc end parallel loop
604# 136 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
605#elif defined(MFC_OpenMP)
606# 136 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
607
608# 136 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
609!$omp end target teams loop
610# 136 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
611#endif
612
613
614# 138 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
615
616# 138 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
617#if defined(MFC_OpenACC)
618# 138 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
619!$acc parallel loop collapse(3) gang vector default(present)
620# 138 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
621#elif defined(MFC_OpenMP)
622# 138 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
623
624# 138 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
625
626# 138 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
627
628# 138 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
629!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(3) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer)
630# 138 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
631#endif
632 do k = 1, sys_size
633 do j = 0, m
634 do l = 0, p
635 data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, 0, l)
636 end do
637 end do
638 end do
639
640# 146 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
641#if defined(MFC_OpenACC)
642# 146 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
643!$acc end parallel loop
644# 146 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
645#elif defined(MFC_OpenMP)
646# 146 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
647
648# 146 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
649!$omp end target teams loop
650# 146 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
651#endif
652
653
654# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
655#if defined(MFC_OpenACC)
656# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
657!$acc host_data use_device(data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu)
658# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
659#if defined(__PGI)
660# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
661 ierr = cufftexecd2z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
662# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
663#else
664# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
665 ierr = hipfftexecd2z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
666# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
667 call hipcheck(hipdevicesynchronize())
668# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
669#endif
670# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
671!$acc end host_data
672# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
673#elif defined(MFC_OpenMP)
674# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
675!$omp target data use_device_addr(data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu)
676# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
677#if defined(__PGI)
678# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
679 ierr = cufftexecd2z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
680# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
681#else
682# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
683 ierr = hipfftexecd2z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
684# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
685 call hipcheck(hipdevicesynchronize())
686# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
687#endif
688# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
689!$omp end target data
690# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
691#else
692# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
693#if defined(__PGI)
694# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
695 ierr = cufftexecd2z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
696# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
697#else
698# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
699 ierr = hipfftexecd2z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
700# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
701 call hipcheck(hipdevicesynchronize())
702# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
703#endif
704# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
705#endif
706# 156 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
707 nfq = 3
708
709# 157 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
710#if defined(MFC_OpenACC)
711# 157 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
712!$acc update device(Nfq)
713# 157 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
714#elif defined(MFC_OpenMP)
715# 157 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
716!$omp target update to(Nfq)
717# 157 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
718#endif
719
720
721# 159 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
722
723# 159 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
724#if defined(MFC_OpenACC)
725# 159 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
726!$acc parallel loop collapse(3) gang vector default(present)
727# 159 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
728#elif defined(MFC_OpenMP)
729# 159 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
730
731# 159 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
732
733# 159 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
734
735# 159 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
736!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(3) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer)
737# 159 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
738#endif
739 do k = 1, sys_size
740 do j = 0, m
741 do l = 1, nfq
744 end do
745 end do
746 end do
747
748# 168 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
749#if defined(MFC_OpenACC)
750# 168 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
751!$acc end parallel loop
752# 168 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
753#elif defined(MFC_OpenMP)
754# 168 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
755
756# 168 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
757!$omp end target teams loop
758# 168 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
759#endif
760
761
762# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
763#if defined(MFC_OpenACC)
764# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
765!$acc host_data use_device(data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu)
766# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
767#if defined(__PGI)
768# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
769 ierr = cufftexecz2d(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
770# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
771#else
772# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
773 ierr = hipfftexecz2d(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
774# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
775 call hipcheck(hipdevicesynchronize())
776# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
777#endif
778# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
779!$acc end host_data
780# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
781#elif defined(MFC_OpenMP)
782# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
783!$omp target data use_device_addr(data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu)
784# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
785#if defined(__PGI)
786# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
787 ierr = cufftexecz2d(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
788# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
789#else
790# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
791 ierr = hipfftexecz2d(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
792# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
793 call hipcheck(hipdevicesynchronize())
794# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
795#endif
796# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
797!$omp end target data
798# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
799#else
800# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
801#if defined(__PGI)
802# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
803 ierr = cufftexecz2d(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
804# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
805#else
806# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
807 ierr = hipfftexecz2d(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
808# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
809 call hipcheck(hipdevicesynchronize())
810# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
811#endif
812# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
813#endif
814# 178 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
815
816
817# 179 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
818
819# 179 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
820#if defined(MFC_OpenACC)
821# 179 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
822!$acc parallel loop collapse(3) gang vector default(present)
823# 179 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
824#elif defined(MFC_OpenMP)
825# 179 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
826
827# 179 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
828
829# 179 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
830
831# 179 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
832!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(3) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer)
833# 179 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
834#endif
835 do k = 1, sys_size
836 do j = 0, m
837 do l = 0, p
838 data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1) &
839 & *real_size*x_size)/real(real_size, dp)
840 q_cons_vf(k)%sf(j, 0, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)
841 end do
842 end do
843 end do
844
845# 189 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
846#if defined(MFC_OpenACC)
847# 189 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
848!$acc end parallel loop
849# 189 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
850#elif defined(MFC_OpenMP)
851# 189 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
852
853# 189 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
854!$omp end target teams loop
855# 189 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
856#endif
857
858 do i = 1, fourier_rings
859
860# 192 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
861
862# 192 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
863#if defined(MFC_OpenACC)
864# 192 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
865!$acc parallel loop collapse(3) gang vector default(present)
866# 192 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
867#elif defined(MFC_OpenMP)
868# 192 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
869
870# 192 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
871
872# 192 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
873
874# 192 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
875!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(3) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer)
876# 192 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
877#endif
878 do k = 1, sys_size
879 do j = 0, m
880 do l = 1, cmplx_size
881 data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp)
882 end do
883 end do
884 end do
885
886# 200 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
887#if defined(MFC_OpenACC)
888# 200 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
889!$acc end parallel loop
890# 200 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
891#elif defined(MFC_OpenMP)
892# 200 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
893
894# 200 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
895!$omp end target teams loop
896# 200 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
897#endif
898
899
900# 202 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
901
902# 202 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
903#if defined(MFC_OpenACC)
904# 202 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
905!$acc parallel loop collapse(3) gang vector default(present) firstprivate(i)
906# 202 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
907#elif defined(MFC_OpenMP)
908# 202 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
909
910# 202 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
911
912# 202 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
913
914# 202 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
915!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(3) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) firstprivate(i)
916# 202 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
917#endif
918 do k = 1, sys_size
919 do j = 0, m
920 do l = 0, p
921 data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, i, l)
922 end do
923 end do
924 end do
925
926# 210 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
927#if defined(MFC_OpenACC)
928# 210 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
929!$acc end parallel loop
930# 210 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
931#elif defined(MFC_OpenMP)
932# 210 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
933
934# 210 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
935!$omp end target teams loop
936# 210 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
937#endif
938
939
940# 212 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
941#if defined(MFC_OpenACC)
942# 212 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
943!$acc host_data use_device(data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu)
944# 212 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
945#if defined(__PGI)
946# 212 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
947 ierr = cufftexecd2z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
948# 212 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
949#else
950# 212 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
951 ierr = hipfftexecd2z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
952# 212 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
953 call hipcheck(hipdevicesynchronize())
954# 212 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
955#endif
956# 212 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
957!$acc end host_data
958# 212 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
959#elif defined(MFC_OpenMP)
960# 212 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
961!$omp target data use_device_addr(data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu)
962# 212 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
963#if defined(__PGI)
964# 212 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
965 ierr = cufftexecd2z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
966# 212 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
967#else
968# 212 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
969 ierr = hipfftexecd2z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
970# 212 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
971 call hipcheck(hipdevicesynchronize())
972# 212 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
973#endif
974# 212 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
975!$omp end target data
976# 212 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
977#else
978# 212 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
979#if defined(__PGI)
980# 212 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
981 ierr = cufftexecd2z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
982# 212 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
983#else
984# 212 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
985 ierr = hipfftexecd2z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
986# 212 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
987 call hipcheck(hipdevicesynchronize())
988# 212 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
989#endif
990# 212 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
991#endif
992# 220 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
993
994 nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size)
995
996# 222 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
997#if defined(MFC_OpenACC)
998# 222 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
999!$acc update device(Nfq)
1000# 222 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1001#elif defined(MFC_OpenMP)
1002# 222 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1003!$omp target update to(Nfq)
1004# 222 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1005#endif
1006
1007
1008# 224 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1009
1010# 224 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1011#if defined(MFC_OpenACC)
1012# 224 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1013!$acc parallel loop collapse(3) gang vector default(present)
1014# 224 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1015#elif defined(MFC_OpenMP)
1016# 224 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1017
1018# 224 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1019
1020# 224 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1021
1022# 224 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1023!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(3) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer)
1024# 224 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1025#endif
1026 do k = 1, sys_size
1027 do j = 0, m
1028 do l = 1, nfq
1030 & - 1)*cmplx_size*x_size)
1031 end do
1032 end do
1033 end do
1034
1035# 233 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1036#if defined(MFC_OpenACC)
1037# 233 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1038!$acc end parallel loop
1039# 233 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1040#elif defined(MFC_OpenMP)
1041# 233 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1042
1043# 233 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1044!$omp end target teams loop
1045# 233 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1046#endif
1047
1048
1049# 235 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1050#if defined(MFC_OpenACC)
1051# 235 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1052!$acc host_data use_device(data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu)
1053# 235 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1054#if defined(__PGI)
1055# 235 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1056 ierr = cufftexecz2d(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
1057# 235 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1058#else
1059# 235 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1060 ierr = hipfftexecz2d(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
1061# 235 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1062 call hipcheck(hipdevicesynchronize())
1063# 235 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1064#endif
1065# 235 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1066!$acc end host_data
1067# 235 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1068#elif defined(MFC_OpenMP)
1069# 235 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1070!$omp target data use_device_addr(data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu)
1071# 235 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1072#if defined(__PGI)
1073# 235 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1074 ierr = cufftexecz2d(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
1075# 235 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1076#else
1077# 235 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1078 ierr = hipfftexecz2d(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
1079# 235 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1080 call hipcheck(hipdevicesynchronize())
1081# 235 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1082#endif
1083# 235 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1084!$omp end target data
1085# 235 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1086#else
1087# 235 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1088#if defined(__PGI)
1089# 235 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1090 ierr = cufftexecz2d(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
1091# 235 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1092#else
1093# 235 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1094 ierr = hipfftexecz2d(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
1095# 235 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1096 call hipcheck(hipdevicesynchronize())
1097# 235 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1098#endif
1099# 235 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1100#endif
1101# 243 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1102
1103
1104# 244 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1105
1106# 244 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1107#if defined(MFC_OpenACC)
1108# 244 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1109!$acc parallel loop collapse(3) gang vector default(present) firstprivate(i)
1110# 244 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1111#elif defined(MFC_OpenMP)
1112# 244 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1113
1114# 244 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1115
1116# 244 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1117
1118# 244 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1119!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(3) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) firstprivate(i)
1120# 244 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1121#endif
1122 do k = 1, sys_size
1123 do j = 0, m
1124 do l = 0, p
1125 data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k &
1126 & - 1)*real_size*x_size)/real(real_size, dp)
1127 q_cons_vf(k)%sf(j, i, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)
1128 end do
1129 end do
1130 end do
1131
1132# 254 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1133#if defined(MFC_OpenACC)
1134# 254 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1135!$acc end parallel loop
1136# 254 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1137#elif defined(MFC_OpenMP)
1138# 254 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1139
1140# 254 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1141!$omp end target teams loop
1142# 254 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1143#endif
1144 end do
1145#else
1146 nfq = 3
1147 do j = 0, m
1148 do k = 1, sys_size
1149 data_fltr_cmplx(:) = (0_dp, 0_dp)
1150 data_real(1:p + 1) = q_cons_vf(k)%sf(j, 0,0:p)
1151 call fftw_execute_dft_r2c(fwd_plan, data_real, data_cmplx)
1153 call fftw_execute_dft_c2r(bwd_plan, data_fltr_cmplx, data_real)
1154 data_real(:) = data_real(:)/real(real_size, dp)
1155 q_cons_vf(k)%sf(j, 0,0:p) = data_real(1:p + 1)
1156 end do
1157 end do
1158
1159 ! Apply Fourier filter to additional rings
1160 do i = 1, fourier_rings
1161 nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size)
1162 do j = 0, m
1163 do k = 1, sys_size
1164 data_fltr_cmplx(:) = (0_dp, 0_dp)
1165 data_real(1:p + 1) = q_cons_vf(k)%sf(j, i,0:p)
1166 call fftw_execute_dft_r2c(fwd_plan, data_real, data_cmplx)
1168 call fftw_execute_dft_c2r(bwd_plan, data_fltr_cmplx, data_real)
1169 data_real(:) = data_real(:)/real(real_size, dp)
1170 q_cons_vf(k)%sf(j, i,0:p) = data_real(1:p + 1)
1171 end do
1172 end do
1173 end do
1174#endif
1175
1176 end subroutine s_apply_fourier_filter
1177
1178 !> Finalize the FFTW module
1179 impure subroutine s_finalize_fftw_module
1180
1181#if defined(MFC_GPU)
1182 integer :: ierr !< Generic flag used to identify and report GPU errors
1183
1184#ifdef MFC_DEBUG
1185# 295 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1186 block
1187# 295 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1188 use iso_fortran_env, only: output_unit
1189# 295 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1190
1191# 295 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1192 print *, 'm_fftw.fpp:295: ', '@:DEALLOCATE(data_real_gpu, data_fltr_cmplx_gpu, data_cmplx_gpu)'
1193# 295 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1194
1195# 295 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1196 call flush (output_unit)
1197# 295 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1198 end block
1199# 295 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1200#endif
1201# 295 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1202
1203# 295 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1204#if defined(MFC_OpenACC)
1205# 295 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1206!$acc exit data delete(data_real_gpu, data_fltr_cmplx_gpu, data_cmplx_gpu)
1207# 295 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1208#elif defined(MFC_OpenMP)
1209# 295 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1210!$omp target exit data map(release:data_real_gpu, data_fltr_cmplx_gpu, data_cmplx_gpu)
1211# 295 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1212#endif
1213# 295 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1215#if defined(__PGI)
1216 ierr = cufftdestroy(fwd_plan_gpu)
1217 ierr = cufftdestroy(bwd_plan_gpu)
1218#else
1219 ierr = hipfftdestroy(fwd_plan_gpu)
1220 ierr = hipfftdestroy(bwd_plan_gpu)
1221#endif
1222#else
1223 call fftw_free(fftw_real_data)
1224 call fftw_free(fftw_cmplx_data)
1225 call fftw_free(fftw_fltr_cmplx_data)
1226
1227 call fftw_destroy_plan(fwd_plan)
1228 call fftw_destroy_plan(bwd_plan)
1229#endif
1230
1231 end subroutine s_finalize_fftw_module
1232
1233end module m_fftw
type(scalar_field), dimension(sys_size), intent(inout) q_cons_vf
integer, intent(in) k
integer, intent(in) j
integer, intent(in) l
Shared derived types for field data, patch geometry, bubble dynamics, and MPI I/O structures.
Forward and inverse FFT wrappers (FFTW/cuFFT/hipFFT) for azimuthal Fourier filtering in cylindrical g...
integer rank
type(c_ptr) fftw_real_data
complex(dp), dimension(:), allocatable, target data_fltr_cmplx_gpu
integer odist
integer x_size
complex(c_double_complex), dimension(:), pointer data_fltr_cmplx
Filtered complex data in Fourier space.
real(dp), dimension(:), allocatable, target data_real_gpu
complex(c_double_complex), dimension(:), pointer data_cmplx
Complex data in Fourier space.
type(c_ptr) bwd_plan
integer ostride
integer batch_size
type(c_ptr) fftw_cmplx_data
real(c_double), dimension(:), pointer data_real
Real data.
impure subroutine, public s_apply_fourier_filter(q_cons_vf)
Apply a Fourier low-pass filter in the azimuthal direction to remove high-frequency content.
integer real_size
integer, dimension(:), allocatable iembed
type(c_ptr) bwd_plan_gpu
integer, dimension(:), allocatable gpu_fft_size
complex(dp), dimension(:), allocatable, target data_cmplx_gpu
type(c_ptr) fftw_fltr_cmplx_data
integer istride
integer nfq
type(c_ptr) fwd_plan_gpu
impure subroutine, public s_initialize_fftw_module
Initialize the FFTW module.
impure subroutine, public s_finalize_fftw_module
Finalize the FFTW module.
integer, dimension(:), allocatable oembed
type(c_ptr) fwd_plan
integer idist
integer cmplx_size
Global parameters for the computational domain, fluid properties, and simulation algorithm configurat...
integer sys_size
Number of unknowns in system of eqns.
MPI halo exchange, domain decomposition, and buffer packing/unpacking for the simulation solver.