! (C) Copyright 2014- ECMWF. ! (C) Copyright 2022- NVIDIA. ! ! This software is licensed under the terms of the Apache Licence Version 2.0 ! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. ! In applying this licence, ECMWF does not waive the privileges and immunities ! granted to it by virtue of its status as an intergovernmental organisation ! nor does it submit to any jurisdiction. ! MODULE TPM_HICFFT ! Author. ! ------- ! George Mozdzynski ! ! Modifications. ! -------------- ! Original October 2014 ! HICFFT abstraction for CUDA and HIP August 2023 B. Reuter USE, INTRINSIC :: ISO_C_BINDING USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT USE GROWING_ALLOCATOR_MOD, ONLY: GROWING_ALLOCATION_TYPE IMPLICIT NONE SAVE PRIVATE PUBLIC CREATE_PLAN_FFT, DESTROY_PLAN_FFT, DESTROY_ALL_PLANS_FFT, INIT_PLANS_FFT, EXECUTE_PLAN_FFT, & & HICFFT_RESOL, HICT, EXECUTE_DIR_FFT, EXECUTE_INV_FFT TYPE HICFFT_TYPE INTEGER(KIND=C_INT),POINTER :: N_PLANS(:) TYPE(HICFFT_PLAN),POINTER :: HICFFT_PLANS(:) INTEGER(KIND=C_INT) :: N_MAX=0 END TYPE HICFFT_TYPE TYPE HICFFT_PLAN TYPE(C_PTR) :: NPLAN INTEGER(KIND=C_INT) :: NLOT INTEGER(KIND=C_INT) :: NSTRIDE INTEGER(KIND=C_INT) :: NTYPE TYPE(HICFFT_PLAN),POINTER :: NEXT_PLAN => NULL() END TYPE HICFFT_PLAN TYPE(HICFFT_TYPE),ALLOCATABLE,TARGET :: HICFFT_RESOL(:) TYPE(HICFFT_TYPE),POINTER :: HICT INTERFACE EXECUTE_DIR_FFT MODULE PROCEDURE EXECUTE_DIR_FFT_FLOAT,EXECUTE_DIR_FFT_DOUBLE END INTERFACE INTERFACE EXECUTE_INV_FFT MODULE PROCEDURE EXECUTE_INV_FFT_FLOAT,EXECUTE_INV_FFT_DOUBLE END INTERFACE ! ------------------------------------------------------------------ CONTAINS ! ------------------------------------------------------------------ SUBROUTINE INIT_PLANS_FFT(KDLON) INTEGER(KIND=C_INT),INTENT(IN) :: KDLON HICT%N_MAX=KDLON ALLOCATE(HICT%HICFFT_PLANS(HICT%N_MAX)) ALLOCATE(HICT%N_PLANS(HICT%N_MAX)) HICT%N_PLANS(:)=0 RETURN END SUBROUTINE INIT_PLANS_FFT SUBROUTINE CREATE_PLAN_FFT(KPLAN,KTYPE,KN,KLOT,KSTRIDE) TYPE(C_PTR),INTENT(OUT) :: KPLAN INTEGER(KIND=C_INT),INTENT(IN) :: KTYPE,KN,KLOT,KSTRIDE TYPE(C_PTR) :: IPLAN INTEGER(KIND=C_INT) :: IRANK, ISTRIDE INTEGER(KIND=C_INT) :: JL, JN INTEGER(KIND=C_INT) :: IRDIST,ICDIST,IN(1),IEMBED(1) LOGICAL :: LLFOUND TYPE(HICFFT_PLAN),POINTER :: CURR_HICFFT_PLAN,START_HICFFT_PLAN INTERFACE SUBROUTINE HICFFT_CREATE_PLAN(KPLAN,KTYPE,KN,KLOT,KSTRIDE) BIND(C,NAME="hicfft_create_plan_") USE, INTRINSIC :: ISO_C_BINDING TYPE(C_PTR), INTENT(OUT) :: KPLAN INTEGER(C_INT), INTENT(IN) :: KTYPE,KN,KLOT,KSTRIDE END SUBROUTINE HICFFT_CREATE_PLAN END INTERFACE IF( KN > HICT%N_MAX )THEN stop 'CREATE_PLAN_FFT: KN > N_MAX THAT WAS INITIALISED IN INIT_PLANS_FFT' ENDIF IRANK=1 ISTRIDE=1 IN(1)=KN IEMBED(1)=IN(1) ICDIST=KN/2+1 IRDIST=ICDIST*2 !!$OMP CRITICAL LLFOUND=.FALSE. CURR_HICFFT_PLAN=>HICT%HICFFT_PLANS(KN) ! search for plan in existing plans DO JL=1,HICT%N_PLANS(KN) IF( KLOT == CURR_HICFFT_PLAN%NLOT .AND. KTYPE == CURR_HICFFT_PLAN%NTYPE & & .AND. KSTRIDE == CURR_HICFFT_PLAN%NSTRIDE)THEN LLFOUND=.TRUE. IPLAN=CURR_HICFFT_PLAN%NPLAN EXIT ELSEIF( JL /= HICT%N_PLANS(KN) )THEN CURR_HICFFT_PLAN=>CURR_HICFFT_PLAN%NEXT_PLAN ENDIF ENDDO IF( .NOT.LLFOUND )THEN CALL HICFFT_CREATE_PLAN(IPLAN,KTYPE,KN,KLOT,KSTRIDE) KPLAN=IPLAN HICT%N_PLANS(KN)=HICT%N_PLANS(KN)+1 IF( HICT%N_PLANS(KN) /= 1 )THEN ALLOCATE(CURR_HICFFT_PLAN%NEXT_PLAN) CURR_HICFFT_PLAN=>CURR_HICFFT_PLAN%NEXT_PLAN ENDIF CURR_HICFFT_PLAN%NPLAN=IPLAN CURR_HICFFT_PLAN%NLOT=KLOT CURR_HICFFT_PLAN%NSTRIDE=KSTRIDE CURR_HICFFT_PLAN%NTYPE=KTYPE CURR_HICFFT_PLAN%NEXT_PLAN=>NULL() ELSE KPLAN=IPLAN ENDIF !!$OMP END CRITICAL END SUBROUTINE CREATE_PLAN_FFT SUBROUTINE DESTROY_PLAN_FFT(KPLAN) TYPE(C_PTR),INTENT(IN) :: KPLAN INTERFACE SUBROUTINE HICFFT_DESTROY_PLAN(KPLAN) BIND(C, NAME="hicfft_destroy_plan_") USE, INTRINSIC :: ISO_C_BINDING TYPE(C_PTR), VALUE, INTENT(IN) :: KPLAN END SUBROUTINE HICFFT_DESTROY_PLAN END INTERFACE CALL HICFFT_DESTROY_PLAN(KPLAN) END SUBROUTINE DESTROY_PLAN_FFT SUBROUTINE DESTROY_ALL_PLANS_FFT INTEGER(KIND=C_INT) :: JL, JN TYPE(HICFFT_PLAN),POINTER :: CURR_HICFFT_PLAN IF( .NOT. ASSOCIATED(HICT) ) THEN RETURN ENDIF IF ( .NOT. ASSOCIATED(HICT%HICFFT_PLANS) .OR. .NOT. ASSOCIATED(HICT%N_PLANS) ) THEN RETURN ENDIF DO JN = 1, HICT%N_MAX CURR_HICFFT_PLAN=>HICT%HICFFT_PLANS(JN) DO JL = 1, HICT%N_PLANS(JN) IF( ASSOCIATED(CURR_HICFFT_PLAN) ) THEN CALL DESTROY_PLAN_FFT(CURR_HICFFT_PLAN%NPLAN) CURR_HICFFT_PLAN=>CURR_HICFFT_PLAN%NEXT_PLAN ENDIF ENDDO ENDDO DEALLOCATE(HICT%HICFFT_PLANS) DEALLOCATE(HICT%N_PLANS) END SUBROUTINE DESTROY_ALL_PLANS_FFT SUBROUTINE EXECUTE_PLAN_FFT(KN,N,X_IN,X_OUT,PLAN_PTR) TYPE(C_PTR) :: PLAN_PTR INTEGER(KIND=C_INT) :: KN INTEGER(KIND=C_INT) :: N REAL(KIND=JPRBT), TARGET :: X_IN REAL(KIND=JPRBT), TARGET :: X_OUT INTERFACE SUBROUTINE HICFFT_EXECUTE_PLAN (KN, N, X_IN_PTR, X_OUT_PTR, PLAN_PTR) & & BIND(C,NAME="hicfft_execute_plan_") USE, INTRINSIC :: ISO_C_BINDING TYPE(C_PTR), VALUE :: PLAN_PTR INTEGER(KIND=C_INT), VALUE :: KN INTEGER(KIND=C_INT), VALUE :: N TYPE(C_PTR), VALUE :: X_IN_PTR, X_OUT_PTR END SUBROUTINE HICFFT_EXECUTE_PLAN END INTERFACE #ifdef OMPGPU !$OMP TARGET DATA USE_DEVICE_PTR(X_IN,X_OUT) #endif #ifdef ACCGPU !$ACC HOST_DATA USE_DEVICE(X_IN,X_OUT) #endif CALL HICFFT_EXECUTE_PLAN(KN,N,C_LOC(X_IN),C_LOC(X_OUT),PLAN_PTR) #ifdef ACCGPU !$ACC END HOST_DATA #endif #ifdef OMPGPU !$OMP END TARGET DATA #endif END SUBROUTINE EXECUTE_PLAN_FFT SUBROUTINE EXECUTE_DIR_FFT_FLOAT(PREEL_REAL,PREEL_COMPLEX,KFIELD,LOENS,OFFSETS,ALLOC) USE PARKIND_ECTRANS ,ONLY : JPIM IMPLICIT NONE REAL(KIND=C_FLOAT), INTENT(IN) :: PREEL_REAL(:) REAL(KIND=C_FLOAT), INTENT(OUT) :: PREEL_COMPLEX(:) INTEGER(KIND=JPIM),INTENT(IN) :: KFIELD INTEGER(KIND=JPIM),INTENT(IN) :: LOENS(:), OFFSETS(:) TYPE(GROWING_ALLOCATION_TYPE), INTENT(IN) :: ALLOC INTERFACE SUBROUTINE EXECUTE_DIR_FFT_FLOAT_C(PREEL_REAL,PREEL_COMPLEX,KFIELD,LOENS,OFFSETS,NFFT,ALLOC) & & BIND(C, NAME="execute_dir_fft_float") USE ISO_C_BINDING REAL(KIND=C_FLOAT), INTENT(IN) :: PREEL_REAL(*) REAL(KIND=C_FLOAT), INTENT(OUT) :: PREEL_COMPLEX(*) INTEGER(KIND=C_INT),INTENT(IN),VALUE :: KFIELD INTEGER(KIND=C_INT),INTENT(IN) :: LOENS(*), OFFSETS(*) INTEGER(KIND=C_INT),INTENT(IN),VALUE :: NFFT TYPE(C_PTR), INTENT(IN), VALUE :: ALLOC END SUBROUTINE END INTERFACE #ifdef ACCGPU !$ACC HOST_DATA USE_DEVICE(PREEL_REAL,PREEL_COMPLEX) #endif CALL EXECUTE_DIR_FFT_FLOAT_C(PREEL_REAL,PREEL_COMPLEX,KFIELD,LOENS,OFFSETS,SIZE(LOENS),C_LOC(ALLOC)) #ifdef ACCGPU !$ACC END HOST_DATA #endif END SUBROUTINE EXECUTE_DIR_FFT_FLOAT SUBROUTINE EXECUTE_DIR_FFT_DOUBLE(PREEL_REAL,PREEL_COMPLEX,KFIELD,LOENS,OFFSETS,ALLOC) USE PARKIND_ECTRANS ,ONLY : JPIM IMPLICIT NONE REAL(KIND=C_DOUBLE), INTENT(IN) :: PREEL_REAL(:) REAL(KIND=C_DOUBLE), INTENT(OUT) :: PREEL_COMPLEX(:) INTEGER(KIND=JPIM),INTENT(IN) :: KFIELD INTEGER(KIND=JPIM),INTENT(IN) :: LOENS(:), OFFSETS(:) TYPE(GROWING_ALLOCATION_TYPE), INTENT(IN) :: ALLOC INTERFACE SUBROUTINE EXECUTE_DIR_FFT_DOUBLE_C(PREEL_REAL,PREEL_COMPLEX,KFIELD,LOENS,OFFSETS,NFFT,ALLOC) & & BIND(C, NAME="execute_dir_fft_double") USE ISO_C_BINDING REAL(KIND=C_DOUBLE), INTENT(IN) :: PREEL_REAL(*) REAL(KIND=C_DOUBLE), INTENT(OUT) :: PREEL_COMPLEX(*) INTEGER(KIND=C_INT),INTENT(IN),VALUE :: KFIELD INTEGER(KIND=C_INT),INTENT(IN) :: LOENS(*), OFFSETS(*) INTEGER(KIND=C_INT),INTENT(IN),VALUE :: NFFT TYPE(C_PTR), INTENT(IN), VALUE :: ALLOC END SUBROUTINE END INTERFACE #ifdef ACCGPU !$ACC HOST_DATA USE_DEVICE(PREEL_REAL,PREEL_COMPLEX) #endif CALL EXECUTE_DIR_FFT_DOUBLE_C(PREEL_REAL,PREEL_COMPLEX,KFIELD,LOENS,OFFSETS,SIZE(LOENS),C_LOC(ALLOC)) #ifdef ACCGPU !$ACC END HOST_DATA #endif END SUBROUTINE EXECUTE_DIR_FFT_DOUBLE SUBROUTINE EXECUTE_INV_FFT_FLOAT(PREEL_COMPLEX,PREEL_REAL,KFIELD,LOENS,OFFSETS,ALLOC) USE PARKIND_ECTRANS ,ONLY : JPIM IMPLICIT NONE REAL(KIND=C_FLOAT), INTENT(IN) :: PREEL_COMPLEX(:) REAL(KIND=C_FLOAT), INTENT(OUT) :: PREEL_REAL(:) INTEGER(KIND=JPIM),INTENT(IN) :: KFIELD INTEGER(KIND=JPIM),INTENT(IN) :: LOENS(:), OFFSETS(:) TYPE(GROWING_ALLOCATION_TYPE), INTENT(IN) :: ALLOC INTERFACE SUBROUTINE EXECUTE_INV_FFT_FLOAT_C(PREEL_COMPLEX,PREEL_REAL,KFIELD,LOENS,OFFSETS,NFFT,ALLOC) & & BIND(C, NAME="execute_inv_fft_float") USE ISO_C_BINDING REAL(KIND=C_FLOAT), INTENT(IN) :: PREEL_COMPLEX(*) REAL(KIND=C_FLOAT), INTENT(OUT) :: PREEL_REAL(*) INTEGER(KIND=C_INT),INTENT(IN),VALUE :: KFIELD INTEGER(KIND=C_INT),INTENT(IN) :: LOENS(*), OFFSETS(*) INTEGER(KIND=C_INT),INTENT(IN),VALUE :: NFFT TYPE(C_PTR), INTENT(IN), VALUE :: ALLOC END SUBROUTINE END INTERFACE #ifdef ACCGPU !$ACC HOST_DATA USE_DEVICE(PREEL_COMPLEX,PREEL_REAL) #endif CALL EXECUTE_INV_FFT_FLOAT_C(PREEL_COMPLEX,PREEL_REAL,KFIELD,LOENS,OFFSETS,SIZE(LOENS),C_LOC(ALLOC)) #ifdef ACCGPU !$ACC END HOST_DATA #endif END SUBROUTINE SUBROUTINE EXECUTE_INV_FFT_DOUBLE(PREEL_COMPLEX,PREEL_REAL,KFIELD,LOENS,OFFSETS,ALLOC) USE PARKIND_ECTRANS ,ONLY : JPIM IMPLICIT NONE REAL(KIND=C_DOUBLE), INTENT(IN) :: PREEL_COMPLEX(:) REAL(KIND=C_DOUBLE), INTENT(OUT) :: PREEL_REAL(:) INTEGER(KIND=JPIM),INTENT(IN) :: KFIELD INTEGER(KIND=JPIM),INTENT(IN) :: LOENS(:), OFFSETS(:) TYPE(GROWING_ALLOCATION_TYPE), INTENT(IN) :: ALLOC INTERFACE SUBROUTINE EXECUTE_INV_FFT_DOUBLE_C(PREEL_COMPLEX,PREEL_REAL,KFIELD,LOENS,OFFSETS,NFFT,ALLOC) & & BIND(C, NAME="execute_inv_fft_double") USE ISO_C_BINDING REAL(KIND=C_DOUBLE), INTENT(IN) :: PREEL_COMPLEX(*) REAL(KIND=C_DOUBLE), INTENT(OUT) :: PREEL_REAL(*) INTEGER(KIND=C_INT),INTENT(IN),VALUE :: KFIELD INTEGER(KIND=C_INT),INTENT(IN) :: LOENS(*), OFFSETS(*) INTEGER(KIND=C_INT),INTENT(IN),VALUE :: NFFT TYPE(C_PTR), INTENT(IN), VALUE :: ALLOC END SUBROUTINE END INTERFACE #ifdef ACCGPU !$ACC HOST_DATA USE_DEVICE(PREEL_COMPLEX,PREEL_REAL) #endif CALL EXECUTE_INV_FFT_DOUBLE_C(PREEL_COMPLEX,PREEL_REAL,KFIELD,LOENS,OFFSETS,SIZE(LOENS),C_LOC(ALLOC)) #ifdef ACCGPU !$ACC END HOST_DATA #endif END SUBROUTINE END MODULE TPM_HICFFT