MFC
Exascale flow solver
Loading...
Searching...
No Matches
m_fftw.fpp.f90
Go to the documentation of this file.
1# 1 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
2!>
3!! @file
4!! @brief Contains module m_fftw
5
6# 1 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp" 1
7# 1 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp" 1
8# 1 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp" 1
9# 2 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
10# 3 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
11# 4 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
12# 5 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
13# 6 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
14
15# 8 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
16# 9 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
17# 10 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
18
19# 17 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
20
21# 46 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
22
23# 58 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
24
25# 68 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
26
27# 98 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
28
29# 110 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
30
31# 120 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
32! New line at end of file is required for FYPP
33# 2 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp" 2
34# 1 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp" 1
35# 1 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp" 1
36# 2 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
37# 3 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
38# 4 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
39# 5 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
40# 6 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
41
42# 8 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
43# 9 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
44# 10 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
45
46# 17 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
47
48# 46 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
49
50# 58 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
51
52# 68 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
53
54# 98 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
55
56# 110 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
57
58# 120 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
59! New line at end of file is required for FYPP
60# 2 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp" 2
61
62# 4 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
63# 5 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
64# 6 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
65# 7 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
66# 8 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
67
68# 20 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
69
70# 43 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
71
72# 48 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
73
74# 53 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
75
76# 58 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
77
78# 63 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
79
80# 68 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
81
82# 76 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
83
84# 81 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
85
86# 86 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
87
88# 91 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
89
90# 96 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
91
92# 101 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
93
94# 106 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
95
96# 111 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
97
98# 116 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
99
100# 121 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
101
102# 151 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
103
104# 192 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
105
106# 206 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
107
108# 231 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
109
110# 242 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
111
112# 244 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
113# 255 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
114
115# 284 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
116
117# 294 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
118
119# 304 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
120
121# 313 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
122
123# 330 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
124
125# 340 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
126
127# 347 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
128
129# 353 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
130
131# 359 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
132
133# 365 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
134
135# 371 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
136
137# 377 "/home/runner/work/MFC/MFC/src/common/include/omp_macros.fpp"
138! New line at end of file is required for FYPP
139# 3 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp" 2
140# 1 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp" 1
141# 1 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp" 1
142# 2 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
143# 3 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
144# 4 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
145# 5 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
146# 6 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
147
148# 8 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
149# 9 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
150# 10 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
151
152# 17 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
153
154# 46 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
155
156# 58 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
157
158# 68 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
159
160# 98 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
161
162# 110 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
163
164# 120 "/home/runner/work/MFC/MFC/src/common/include/shared_parallel_macros.fpp"
165! New line at end of file is required for FYPP
166# 2 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp" 2
167
168# 7 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
169
170# 17 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
171
172# 22 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
173
174# 27 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
175
176# 32 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
177
178# 37 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
179
180# 42 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
181
182# 47 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
183
184# 52 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
185
186# 57 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
187
188# 62 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
189
190# 73 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
191
192# 78 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
193
194# 83 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
195
196# 88 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
197
198# 103 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
199
200# 131 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
201
202# 160 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
203
204# 175 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
205
206# 193 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
207
208# 215 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
209
210# 244 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
211
212# 259 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
213
214# 269 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
215
216# 278 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
217
218# 294 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
219
220# 304 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
221
222# 311 "/home/runner/work/MFC/MFC/src/common/include/acc_macros.fpp"
223! New line at end of file is required for FYPP
224# 4 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp" 2
225
226! GPU parallel region (scalar reductions, maxval/minval)
227# 23 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
228
229! GPU parallel loop over threads (most common GPU macro)
230# 43 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
231
232! Required closing for GPU_PARALLEL_LOOP
233# 55 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
234
235! Mark routine for device compilation
236# 112 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
237
238! Declare device-resident data
239# 130 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
240
241! Inner loop within a GPU parallel region
242# 145 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
243
244! Scoped GPU data region
245# 164 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
246
247! Host code with device pointers (for MPI with GPU buffers)
248# 193 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
249
250! Allocate device memory (unscoped)
251# 207 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
252
253! Free device memory
254# 219 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
255
256! Atomic operation on device
257# 231 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
258
259! End atomic capture block
260# 242 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
261
262! Copy data between host and device
263# 254 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
264
265! Synchronization barrier
266# 266 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
267
268! Import GPU library module (openacc or omp_lib)
269# 275 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
270
271! Emit code only for AMD compiler
272# 282 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
273
274! Emit code for non-Cray compilers
275# 289 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
276
277! Emit code only for Cray compiler
278# 296 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
279
280! Emit code for non-NVIDIA compilers
281# 303 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
282
283# 305 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
284# 306 "/home/runner/work/MFC/MFC/src/common/include/parallel_macros.fpp"
285! New line at end of file is required for FYPP
286# 2 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp" 2
287
288# 14 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
289
290! Caution: This macro requires the use of a binding script to set CUDA_VISIBLE_DEVICES, such that we have one GPU device per MPI
291! rank. That's because for both cudaMemAdvise (preferred location) and cudaMemPrefetchAsync we use location = device_id = 0. For an
292! example see misc/nvidia_uvm/bind.sh. NVIDIA unified memory page placement hint
293# 57 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
294
295! Allocate and create GPU device memory
296# 77 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
297
298! Free GPU device memory and deallocate
299# 85 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
300
301! Cray-specific GPU pointer setup for vector fields
302# 109 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
303
304! Cray-specific GPU pointer setup for scalar fields
305# 125 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
306
307! Cray-specific GPU pointer setup for acoustic source spatials
308# 150 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
309
310# 156 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
311
312# 163 "/home/runner/work/MFC/MFC/src/common/include/macros.fpp"
313! New line at end of file is required for FYPP
314# 6 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp" 2
315
316!> @brief Forward and inverse FFT wrappers (FFTW/cuFFT/hipFFT) for azimuthal Fourier filtering in cylindrical geometries
317module m_fftw
318
319 use, intrinsic :: iso_c_binding
320
323 use m_mpi_proxy
324#if defined(MFC_GPU) && defined(__PGI)
325 use cufft
326#elif defined(MFC_GPU)
327 use hipfort
328 use hipfort_check
329 use hipfort_hipfft
330#endif
331
332 implicit none
333
335
336#if !defined(MFC_GPU)
337 include 'fftw3.f03'
338#endif
339
340 type(c_ptr) :: fwd_plan, bwd_plan
343 real(c_double), pointer :: data_real(:) !< Real data
344 complex(c_double_complex), pointer :: data_cmplx(:) !< Complex data in Fourier space
345 complex(c_double_complex), pointer :: data_fltr_cmplx(:) !< Filtered complex data in Fourier space
346#if defined(MFC_GPU)
347
348# 38 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
349#if defined(MFC_OpenACC)
350# 38 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
351!$acc declare create(real_size, cmplx_size, x_size, batch_size, Nfq, i2)
352# 38 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
353#elif defined(MFC_OpenMP)
354# 38 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
355!$omp declare target (real_size, cmplx_size, x_size, batch_size, Nfq, i2)
356# 38 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
357#endif
358
359 real(dp), allocatable, target :: data_real_gpu(:)
360 complex(dp), allocatable, target :: data_cmplx_gpu(:)
361 complex(dp), allocatable, target :: data_fltr_cmplx_gpu(:)
362
363# 43 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
364#if defined(MFC_OpenACC)
365# 43 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
366!$acc declare create(data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu)
367# 43 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
368#elif defined(MFC_OpenMP)
369# 43 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
370!$omp declare target (data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu)
371# 43 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
372#endif
373
374 !> @cond
375#if defined(__PGI)
376 integer :: fwd_plan_gpu, bwd_plan_gpu
377#else
378 !> @endcond
380 !> @cond
381#endif
382 !> @endcond
383
384 integer, allocatable :: gpu_fft_size(:), iembed(:), oembed(:)
385 integer :: istride, ostride, rank
386#endif
387
388contains
389
390 !> Initialize the FFTW module
391 impure subroutine s_initialize_fftw_module
392
393 integer :: ierr !< Generic flag used to identify and report GPU errors
394 ! Size of input array going into DFT
395
396 real_size = p + 1
397 ! Size of output array coming out of DFT
398 cmplx_size = (p + 1)/2 + 1
399
400 x_size = m + 1
402
403#if defined(MFC_GPU)
404 rank = 1; istride = 1; ostride = 1
405 allocate (gpu_fft_size(1:rank), iembed(1:rank), oembed(1:rank))
406
408 iembed(1) = real_size
409 oembed(1) = cmplx_size
410
411# 81 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
412#if defined(MFC_OpenACC)
413# 81 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
414!$acc enter data copyin(real_size, cmplx_size, x_size, sys_size, batch_size, Nfq)
415# 81 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
416#elif defined(MFC_OpenMP)
417# 81 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
418!$omp target enter data map(to:real_size, cmplx_size, x_size, sys_size, batch_size, Nfq)
419# 81 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
420#endif
421
422# 82 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
423#if defined(MFC_OpenACC)
424# 82 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
425!$acc update device(real_size, cmplx_size, x_size, sys_size, batch_size)
426# 82 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
427#elif defined(MFC_OpenMP)
428# 82 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
429!$omp target update to(real_size, cmplx_size, x_size, sys_size, batch_size)
430# 82 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
431#endif
432#else
433 ! Allocate input and output DFT data sizes
434 fftw_real_data = fftw_alloc_real(int(real_size, c_size_t))
435 fftw_cmplx_data = fftw_alloc_complex(int(cmplx_size, c_size_t))
436 fftw_fltr_cmplx_data = fftw_alloc_complex(int(cmplx_size, c_size_t))
437 ! Associate input and output data pointers with allocated memory
438 call c_f_pointer(fftw_real_data, data_real, [real_size])
439 call c_f_pointer(fftw_cmplx_data, data_cmplx, [cmplx_size])
441
442 ! Generate plans for forward and backward DFTs
443 fwd_plan = fftw_plan_dft_r2c_1d(real_size, data_real, data_cmplx, fftw_estimate)
444 bwd_plan = fftw_plan_dft_c2r_1d(real_size, data_fltr_cmplx, data_real, fftw_estimate)
445#endif
446
447#if defined(MFC_GPU)
448#ifdef MFC_DEBUG
449# 99 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
450 block
451# 99 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
452 use iso_fortran_env, only: output_unit
453# 99 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
454
455# 99 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
456 print *, 'm_fftw.fpp:99: ', '@:ALLOCATE(data_real_gpu(1:real_size*x_size*sys_size))'
457# 99 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
458
459# 99 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
460 call flush (output_unit)
461# 99 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
462 end block
463# 99 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
464#endif
465# 99 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
467# 99 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
468
469# 99 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
470
471# 99 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
472#if defined(MFC_OpenACC)
473# 99 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
474!$acc enter data create(data_real_gpu)
475# 99 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
476#elif defined(MFC_OpenMP)
477# 99 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
478!$omp target enter data map(always,alloc:data_real_gpu)
479# 99 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
480#endif
481#ifdef MFC_DEBUG
482# 100 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
483 block
484# 100 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
485 use iso_fortran_env, only: output_unit
486# 100 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
487
488# 100 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
489 print *, 'm_fftw.fpp:100: ', '@:ALLOCATE(data_cmplx_gpu(1:cmplx_size*x_size*sys_size))'
490# 100 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
491
492# 100 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
493 call flush (output_unit)
494# 100 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
495 end block
496# 100 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
497#endif
498# 100 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
499 allocate (data_cmplx_gpu(1:cmplx_size*x_size*sys_size))
500# 100 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
501
502# 100 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
503
504# 100 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
505#if defined(MFC_OpenACC)
506# 100 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
507!$acc enter data create(data_cmplx_gpu)
508# 100 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
509#elif defined(MFC_OpenMP)
510# 100 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
511!$omp target enter data map(always,alloc:data_cmplx_gpu)
512# 100 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
513#endif
514#ifdef MFC_DEBUG
515# 101 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
516 block
517# 101 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
518 use iso_fortran_env, only: output_unit
519# 101 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
520
521# 101 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
522 print *, 'm_fftw.fpp:101: ', '@:ALLOCATE(data_fltr_cmplx_gpu(1:cmplx_size*x_size*sys_size))'
523# 101 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
524
525# 101 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
526 call flush (output_unit)
527# 101 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
528 end block
529# 101 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
530#endif
531# 101 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
532 allocate (data_fltr_cmplx_gpu(1:cmplx_size*x_size*sys_size))
533# 101 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
534
535# 101 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
536
537# 101 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
538#if defined(MFC_OpenACC)
539# 101 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
540!$acc enter data create(data_fltr_cmplx_gpu)
541# 101 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
542#elif defined(MFC_OpenMP)
543# 101 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
544!$omp target enter data map(always,alloc:data_fltr_cmplx_gpu)
545# 101 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
546#endif
547
548#if defined(__PGI)
550 & cufft_d2z, batch_size)
552 & cufft_z2d, batch_size)
553#else
555 & hipfft_d2z, batch_size)
557 & hipfft_z2d, batch_size)
558#endif
559#endif
560
561 end subroutine s_initialize_fftw_module
562
563 !> Apply a Fourier low-pass filter in the azimuthal direction to remove high-frequency content
564 impure subroutine s_apply_fourier_filter(q_cons_vf)
565
566 type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf
567 integer :: i, j, k, l !< Generic loop iterators
568 integer :: ierr !< Generic flag used to identify and report GPU errors
569 ! Restrict filter to processors that have cells adjacent to axis
570
571 if (bc_y%beg >= 0) return
572#if defined(MFC_GPU)
573
574# 128 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
575
576# 128 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
577#if defined(MFC_OpenACC)
578# 128 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
579!$acc parallel loop collapse(3) gang vector default(present)
580# 128 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
581#elif defined(MFC_OpenMP)
582# 128 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
583
584# 128 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
585
586# 128 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
587
588# 128 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
589!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(3) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer)
590# 128 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
591#endif
592 do k = 1, sys_size
593 do j = 0, m
594 do l = 1, cmplx_size
595 data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp)
596 end do
597 end do
598 end do
599
600# 136 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
601#if defined(MFC_OpenACC)
602# 136 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
603!$acc end parallel loop
604# 136 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
605#elif defined(MFC_OpenMP)
606# 136 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
607
608# 136 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
609!$omp end target teams loop
610# 136 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
611#endif
612
613
614# 138 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
615
616# 138 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
617#if defined(MFC_OpenACC)
618# 138 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
619!$acc parallel loop collapse(3) gang vector default(present)
620# 138 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
621#elif defined(MFC_OpenMP)
622# 138 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
623
624# 138 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
625
626# 138 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
627
628# 138 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
629!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(3) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer)
630# 138 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
631#endif
632 do k = 1, sys_size
633 do j = 0, m
634 do l = 0, p
635 data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, 0, l)
636 end do
637 end do
638 end do
639
640# 146 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
641#if defined(MFC_OpenACC)
642# 146 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
643!$acc end parallel loop
644# 146 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
645#elif defined(MFC_OpenMP)
646# 146 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
647
648# 146 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
649!$omp end target teams loop
650# 146 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
651#endif
652
653
654# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
655#if defined(MFC_OpenACC)
656# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
657!$acc host_data use_device(data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu)
658# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
659#if defined(__PGI)
660# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
661 ierr = cufftexecd2z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
662# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
663#else
664# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
665 ierr = hipfftexecd2z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
666# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
667 call hipcheck(hipdevicesynchronize())
668# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
669#endif
670# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
671!$acc end host_data
672# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
673#elif defined(MFC_OpenMP)
674# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
675!$omp target data use_device_addr(data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu)
676# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
677#if defined(__PGI)
678# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
679 ierr = cufftexecd2z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
680# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
681#else
682# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
683 ierr = hipfftexecd2z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
684# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
685 call hipcheck(hipdevicesynchronize())
686# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
687#endif
688# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
689!$omp end target data
690# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
691#else
692# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
693#if defined(__PGI)
694# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
695 ierr = cufftexecd2z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
696# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
697#else
698# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
699 ierr = hipfftexecd2z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
700# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
701 call hipcheck(hipdevicesynchronize())
702# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
703#endif
704# 148 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
705#endif
706# 156 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
707 nfq = 3
708
709# 157 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
710#if defined(MFC_OpenACC)
711# 157 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
712!$acc update device(Nfq)
713# 157 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
714#elif defined(MFC_OpenMP)
715# 157 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
716!$omp target update to(Nfq)
717# 157 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
718#endif
719
720
721# 159 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
722
723# 159 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
724#if defined(MFC_OpenACC)
725# 159 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
726!$acc parallel loop collapse(3) gang vector default(present)
727# 159 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
728#elif defined(MFC_OpenMP)
729# 159 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
730
731# 159 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
732
733# 159 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
734
735# 159 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
736!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(3) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer)
737# 159 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
738#endif
739 do k = 1, sys_size
740 do j = 0, m
741 do l = 1, nfq
744 end do
745 end do
746 end do
747
748# 168 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
749#if defined(MFC_OpenACC)
750# 168 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
751!$acc end parallel loop
752# 168 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
753#elif defined(MFC_OpenMP)
754# 168 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
755
756# 168 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
757!$omp end target teams loop
758# 168 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
759#endif
760
761
762# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
763#if defined(MFC_OpenACC)
764# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
765!$acc host_data use_device(data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu)
766# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
767#if defined(__PGI)
768# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
769 ierr = cufftexecz2d(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
770# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
771#else
772# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
773 ierr = hipfftexecz2d(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
774# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
775 call hipcheck(hipdevicesynchronize())
776# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
777#endif
778# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
779!$acc end host_data
780# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
781#elif defined(MFC_OpenMP)
782# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
783!$omp target data use_device_addr(data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu)
784# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
785#if defined(__PGI)
786# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
787 ierr = cufftexecz2d(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
788# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
789#else
790# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
791 ierr = hipfftexecz2d(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
792# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
793 call hipcheck(hipdevicesynchronize())
794# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
795#endif
796# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
797!$omp end target data
798# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
799#else
800# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
801#if defined(__PGI)
802# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
803 ierr = cufftexecz2d(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
804# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
805#else
806# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
807 ierr = hipfftexecz2d(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
808# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
809 call hipcheck(hipdevicesynchronize())
810# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
811#endif
812# 170 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
813#endif
814# 178 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
815
816
817# 179 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
818
819# 179 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
820#if defined(MFC_OpenACC)
821# 179 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
822!$acc parallel loop collapse(3) gang vector default(present)
823# 179 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
824#elif defined(MFC_OpenMP)
825# 179 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
826
827# 179 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
828
829# 179 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
830
831# 179 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
832!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(3) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer)
833# 179 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
834#endif
835 do k = 1, sys_size
836 do j = 0, m
837 do l = 0, p
838 data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1) &
839 & *real_size*x_size)/real(real_size, dp)
840 q_cons_vf(k)%sf(j, 0, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)
841 end do
842 end do
843 end do
844
845# 189 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
846#if defined(MFC_OpenACC)
847# 189 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
848!$acc end parallel loop
849# 189 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
850#elif defined(MFC_OpenMP)
851# 189 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
852
853# 189 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
854!$omp end target teams loop
855# 189 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
856#endif
857
858 do i = 1, fourier_rings
859 i2 = i
860
861# 193 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
862#if defined(MFC_OpenACC)
863# 193 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
864!$acc update device(i2)
865# 193 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
866#elif defined(MFC_OpenMP)
867# 193 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
868!$omp target update to(i2)
869# 193 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
870#endif
871
872
873# 195 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
874
875# 195 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
876#if defined(MFC_OpenACC)
877# 195 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
878!$acc parallel loop collapse(3) gang vector default(present)
879# 195 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
880#elif defined(MFC_OpenMP)
881# 195 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
882
883# 195 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
884
885# 195 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
886
887# 195 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
888!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(3) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer)
889# 195 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
890#endif
891 do k = 1, sys_size
892 do j = 0, m
893 do l = 1, cmplx_size
894 data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp)
895 end do
896 end do
897 end do
898
899# 203 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
900#if defined(MFC_OpenACC)
901# 203 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
902!$acc end parallel loop
903# 203 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
904#elif defined(MFC_OpenMP)
905# 203 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
906
907# 203 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
908!$omp end target teams loop
909# 203 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
910#endif
911
912
913# 205 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
914
915# 205 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
916#if defined(MFC_OpenACC)
917# 205 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
918!$acc parallel loop collapse(3) gang vector default(present)
919# 205 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
920#elif defined(MFC_OpenMP)
921# 205 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
922
923# 205 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
924
925# 205 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
926
927# 205 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
928!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(3) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer)
929# 205 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
930#endif
931 do k = 1, sys_size
932 do j = 0, m
933 do l = 0, p
934 data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, i2, l)
935 end do
936 end do
937 end do
938
939# 213 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
940#if defined(MFC_OpenACC)
941# 213 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
942!$acc end parallel loop
943# 213 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
944#elif defined(MFC_OpenMP)
945# 213 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
946
947# 213 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
948!$omp end target teams loop
949# 213 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
950#endif
951
952
953# 215 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
954#if defined(MFC_OpenACC)
955# 215 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
956!$acc host_data use_device(data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu)
957# 215 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
958#if defined(__PGI)
959# 215 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
960 ierr = cufftexecd2z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
961# 215 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
962#else
963# 215 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
964 ierr = hipfftexecd2z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
965# 215 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
966 call hipcheck(hipdevicesynchronize())
967# 215 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
968#endif
969# 215 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
970!$acc end host_data
971# 215 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
972#elif defined(MFC_OpenMP)
973# 215 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
974!$omp target data use_device_addr(data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu)
975# 215 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
976#if defined(__PGI)
977# 215 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
978 ierr = cufftexecd2z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
979# 215 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
980#else
981# 215 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
982 ierr = hipfftexecd2z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
983# 215 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
984 call hipcheck(hipdevicesynchronize())
985# 215 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
986#endif
987# 215 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
988!$omp end target data
989# 215 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
990#else
991# 215 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
992#if defined(__PGI)
993# 215 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
994 ierr = cufftexecd2z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
995# 215 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
996#else
997# 215 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
998 ierr = hipfftexecd2z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
999# 215 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1000 call hipcheck(hipdevicesynchronize())
1001# 215 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1002#endif
1003# 215 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1004#endif
1005# 223 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1006
1007 nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size)
1008
1009# 225 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1010#if defined(MFC_OpenACC)
1011# 225 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1012!$acc update device(Nfq)
1013# 225 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1014#elif defined(MFC_OpenMP)
1015# 225 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1016!$omp target update to(Nfq)
1017# 225 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1018#endif
1019
1020
1021# 227 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1022
1023# 227 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1024#if defined(MFC_OpenACC)
1025# 227 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1026!$acc parallel loop collapse(3) gang vector default(present)
1027# 227 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1028#elif defined(MFC_OpenMP)
1029# 227 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1030
1031# 227 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1032
1033# 227 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1034
1035# 227 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1036!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(3) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer)
1037# 227 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1038#endif
1039 do k = 1, sys_size
1040 do j = 0, m
1041 do l = 1, nfq
1043 & - 1)*cmplx_size*x_size)
1044 end do
1045 end do
1046 end do
1047
1048# 236 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1049#if defined(MFC_OpenACC)
1050# 236 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1051!$acc end parallel loop
1052# 236 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1053#elif defined(MFC_OpenMP)
1054# 236 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1055
1056# 236 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1057!$omp end target teams loop
1058# 236 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1059#endif
1060
1061
1062# 238 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1063#if defined(MFC_OpenACC)
1064# 238 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1065!$acc host_data use_device(data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu)
1066# 238 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1067#if defined(__PGI)
1068# 238 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1069 ierr = cufftexecz2d(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
1070# 238 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1071#else
1072# 238 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1073 ierr = hipfftexecz2d(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
1074# 238 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1075 call hipcheck(hipdevicesynchronize())
1076# 238 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1077#endif
1078# 238 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1079!$acc end host_data
1080# 238 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1081#elif defined(MFC_OpenMP)
1082# 238 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1083!$omp target data use_device_addr(data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu)
1084# 238 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1085#if defined(__PGI)
1086# 238 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1087 ierr = cufftexecz2d(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
1088# 238 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1089#else
1090# 238 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1091 ierr = hipfftexecz2d(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
1092# 238 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1093 call hipcheck(hipdevicesynchronize())
1094# 238 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1095#endif
1096# 238 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1097!$omp end target data
1098# 238 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1099#else
1100# 238 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1101#if defined(__PGI)
1102# 238 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1103 ierr = cufftexecz2d(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
1104# 238 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1105#else
1106# 238 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1107 ierr = hipfftexecz2d(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
1108# 238 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1109 call hipcheck(hipdevicesynchronize())
1110# 238 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1111#endif
1112# 238 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1113#endif
1114# 246 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1115
1116
1117# 247 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1118
1119# 247 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1120#if defined(MFC_OpenACC)
1121# 247 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1122!$acc parallel loop collapse(3) gang vector default(present)
1123# 247 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1124#elif defined(MFC_OpenMP)
1125# 247 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1126
1127# 247 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1128
1129# 247 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1130
1131# 247 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1132!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) collapse(3) defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer)
1133# 247 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1134#endif
1135 do k = 1, sys_size
1136 do j = 0, m
1137 do l = 0, p
1138 data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k &
1139 & - 1)*real_size*x_size)/real(real_size, dp)
1140 q_cons_vf(k)%sf(j, i2, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)
1141 end do
1142 end do
1143 end do
1144
1145# 257 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1146#if defined(MFC_OpenACC)
1147# 257 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1148!$acc end parallel loop
1149# 257 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1150#elif defined(MFC_OpenMP)
1151# 257 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1152
1153# 257 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1154!$omp end target teams loop
1155# 257 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1156#endif
1157 end do
1158#else
1159 nfq = 3
1160 do j = 0, m
1161 do k = 1, sys_size
1162 data_fltr_cmplx(:) = (0_dp, 0_dp)
1163 data_real(1:p + 1) = q_cons_vf(k)%sf(j, 0,0:p)
1164 call fftw_execute_dft_r2c(fwd_plan, data_real, data_cmplx)
1166 call fftw_execute_dft_c2r(bwd_plan, data_fltr_cmplx, data_real)
1167 data_real(:) = data_real(:)/real(real_size, dp)
1168 q_cons_vf(k)%sf(j, 0,0:p) = data_real(1:p + 1)
1169 end do
1170 end do
1171
1172 ! Apply Fourier filter to additional rings
1173 do i = 1, fourier_rings
1174 nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size)
1175 do j = 0, m
1176 do k = 1, sys_size
1177 data_fltr_cmplx(:) = (0_dp, 0_dp)
1178 data_real(1:p + 1) = q_cons_vf(k)%sf(j, i,0:p)
1179 call fftw_execute_dft_r2c(fwd_plan, data_real, data_cmplx)
1181 call fftw_execute_dft_c2r(bwd_plan, data_fltr_cmplx, data_real)
1182 data_real(:) = data_real(:)/real(real_size, dp)
1183 q_cons_vf(k)%sf(j, i,0:p) = data_real(1:p + 1)
1184 end do
1185 end do
1186 end do
1187#endif
1188
1189 end subroutine s_apply_fourier_filter
1190
1191 !> Finalize the FFTW module
1192 impure subroutine s_finalize_fftw_module
1193
1194#if defined(MFC_GPU)
1195 integer :: ierr !< Generic flag used to identify and report GPU errors
1196
1197#ifdef MFC_DEBUG
1198# 298 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1199 block
1200# 298 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1201 use iso_fortran_env, only: output_unit
1202# 298 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1203
1204# 298 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1205 print *, 'm_fftw.fpp:298: ', '@:DEALLOCATE(data_real_gpu, data_fltr_cmplx_gpu, data_cmplx_gpu)'
1206# 298 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1207
1208# 298 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1209 call flush (output_unit)
1210# 298 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1211 end block
1212# 298 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1213#endif
1214# 298 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1215
1216# 298 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1217#if defined(MFC_OpenACC)
1218# 298 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1219!$acc exit data delete(data_real_gpu, data_fltr_cmplx_gpu, data_cmplx_gpu)
1220# 298 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1221#elif defined(MFC_OpenMP)
1222# 298 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1223!$omp target exit data map(release:data_real_gpu, data_fltr_cmplx_gpu, data_cmplx_gpu)
1224# 298 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1225#endif
1226# 298 "/home/runner/work/MFC/MFC/src/simulation/m_fftw.fpp"
1228#if defined(__PGI)
1229 ierr = cufftdestroy(fwd_plan_gpu)
1230 ierr = cufftdestroy(bwd_plan_gpu)
1231#else
1232 ierr = hipfftdestroy(fwd_plan_gpu)
1233 ierr = hipfftdestroy(bwd_plan_gpu)
1234#endif
1235#else
1236 call fftw_free(fftw_real_data)
1237 call fftw_free(fftw_cmplx_data)
1238 call fftw_free(fftw_fltr_cmplx_data)
1239
1240 call fftw_destroy_plan(fwd_plan)
1241 call fftw_destroy_plan(bwd_plan)
1242#endif
1243
1244 end subroutine s_finalize_fftw_module
1245
1246end module m_fftw
type(scalar_field), dimension(sys_size), intent(inout) q_cons_vf
integer, intent(in) k
integer, intent(in) j
integer, intent(in) l
Shared derived types for field data, patch geometry, bubble dynamics, and MPI I/O structures.
Forward and inverse FFT wrappers (FFTW/cuFFT/hipFFT) for azimuthal Fourier filtering in cylindrical g...
integer rank
type(c_ptr) fftw_real_data
complex(dp), dimension(:), allocatable, target data_fltr_cmplx_gpu
integer x_size
complex(c_double_complex), dimension(:), pointer data_fltr_cmplx
Filtered complex data in Fourier space.
real(dp), dimension(:), allocatable, target data_real_gpu
complex(c_double_complex), dimension(:), pointer data_cmplx
Complex data in Fourier space.
type(c_ptr) bwd_plan
integer ostride
integer batch_size
type(c_ptr) fftw_cmplx_data
real(c_double), dimension(:), pointer data_real
Real data.
impure subroutine, public s_apply_fourier_filter(q_cons_vf)
Apply a Fourier low-pass filter in the azimuthal direction to remove high-frequency content.
integer real_size
integer, dimension(:), allocatable iembed
type(c_ptr) bwd_plan_gpu
integer, dimension(:), allocatable gpu_fft_size
complex(dp), dimension(:), allocatable, target data_cmplx_gpu
type(c_ptr) fftw_fltr_cmplx_data
integer istride
integer nfq
type(c_ptr) fwd_plan_gpu
impure subroutine, public s_initialize_fftw_module
Initialize the FFTW module.
impure subroutine, public s_finalize_fftw_module
Finalize the FFTW module.
integer, dimension(:), allocatable oembed
type(c_ptr) fwd_plan
integer i2
integer cmplx_size
Global parameters for the computational domain, fluid properties, and simulation algorithm configurat...
integer sys_size
Number of unknowns in system of eqns.
MPI halo exchange, domain decomposition, and buffer packing/unpacking for the simulation solver.