ltdirad_mod.F90 Source File


This file depends on

sourcefile~~ltdirad_mod.f90~~EfferentGraph sourcefile~ltdirad_mod.f90 ltdirad_mod.F90 sourcefile~buffered_allocator_mod.f90 buffered_allocator_mod.F90 sourcefile~ltdirad_mod.f90->sourcefile~buffered_allocator_mod.f90 sourcefile~ledir_mod.f90 ledir_mod.F90 sourcefile~ltdirad_mod.f90->sourcefile~ledir_mod.f90 sourcefile~leinv_mod.f90 leinv_mod.F90 sourcefile~ltdirad_mod.f90->sourcefile~leinv_mod.f90 sourcefile~parkind_ectrans.f90 parkind_ectrans.F90 sourcefile~ltdirad_mod.f90->sourcefile~parkind_ectrans.f90 sourcefile~prepsnm_mod.f90 prepsnm_mod.F90 sourcefile~ltdirad_mod.f90->sourcefile~prepsnm_mod.f90 sourcefile~tpm_dim.f90 tpm_dim.F90 sourcefile~ltdirad_mod.f90->sourcefile~tpm_dim.f90 sourcefile~tpm_distr.f90 tpm_distr.F90 sourcefile~ltdirad_mod.f90->sourcefile~tpm_distr.f90 sourcefile~tpm_gen.f90 tpm_gen.F90 sourcefile~ltdirad_mod.f90->sourcefile~tpm_gen.f90 sourcefile~tpm_geometry.f90 tpm_geometry.F90 sourcefile~ltdirad_mod.f90->sourcefile~tpm_geometry.f90 sourcefile~tpm_stats.f90 tpm_stats.F90 sourcefile~ltdirad_mod.f90->sourcefile~tpm_stats.f90 sourcefile~tpm_trans.f90 tpm_trans.F90 sourcefile~ltdirad_mod.f90->sourcefile~tpm_trans.f90 sourcefile~updspad_mod.f90 updspad_mod.F90 sourcefile~ltdirad_mod.f90->sourcefile~updspad_mod.f90 sourcefile~updspbad_mod.f90 updspbad_mod.F90 sourcefile~ltdirad_mod.f90->sourcefile~updspbad_mod.f90 sourcefile~uvtvdad_mod.f90 uvtvdad_mod.F90 sourcefile~ltdirad_mod.f90->sourcefile~uvtvdad_mod.f90 sourcefile~abort_trans_mod.f90 abort_trans_mod.F90 sourcefile~buffered_allocator_mod.f90->sourcefile~abort_trans_mod.f90 sourcefile~growing_allocator_mod.f90 growing_allocator_mod.F90 sourcefile~buffered_allocator_mod.f90->sourcefile~growing_allocator_mod.f90 sourcefile~ledir_mod.f90->sourcefile~tpm_dim.f90 sourcefile~butterfly_alg_mod.f90 butterfly_alg_mod.F90 sourcefile~ledir_mod.f90->sourcefile~butterfly_alg_mod.f90 sourcefile~ectrans_blas_mod.f90 ectrans_blas_mod.F90 sourcefile~ledir_mod.f90->sourcefile~ectrans_blas_mod.f90 sourcefile~tpm_flt.f90 tpm_flt.F90 sourcefile~ledir_mod.f90->sourcefile~tpm_flt.f90 sourcefile~leinv_mod.f90->sourcefile~buffered_allocator_mod.f90 sourcefile~leinv_mod.f90->sourcefile~parkind_ectrans.f90 sourcefile~leinv_mod.f90->sourcefile~tpm_dim.f90 sourcefile~leinv_mod.f90->sourcefile~tpm_distr.f90 sourcefile~leinv_mod.f90->sourcefile~tpm_gen.f90 sourcefile~leinv_mod.f90->sourcefile~tpm_geometry.f90 sourcefile~leinv_mod.f90->sourcefile~tpm_stats.f90 sourcefile~hicblas_mod.f90 hicblas_mod.F90 sourcefile~leinv_mod.f90->sourcefile~hicblas_mod.f90 sourcefile~tpm_fields_gpu.f90 tpm_fields_gpu.F90 sourcefile~leinv_mod.f90->sourcefile~tpm_fields_gpu.f90 sourcefile~prepsnm_mod.f90->sourcefile~tpm_dim.f90 sourcefile~prepsnm_mod.f90->sourcefile~tpm_distr.f90 sourcefile~tpm_fields.f90 tpm_fields.F90 sourcefile~prepsnm_mod.f90->sourcefile~tpm_fields.f90 sourcefile~updspad_mod.f90->sourcefile~tpm_dim.f90 sourcefile~updspad_mod.f90->sourcefile~tpm_distr.f90 sourcefile~updspad_mod.f90->sourcefile~tpm_trans.f90 sourcefile~updspad_mod.f90->sourcefile~updspbad_mod.f90 sourcefile~updspbad_mod.f90->sourcefile~parkind_ectrans.f90 sourcefile~updspbad_mod.f90->sourcefile~tpm_dim.f90 sourcefile~updspbad_mod.f90->sourcefile~tpm_distr.f90 sourcefile~updspbad_mod.f90->sourcefile~tpm_gen.f90 sourcefile~updspbad_mod.f90->sourcefile~abort_trans_mod.f90 sourcefile~uvtvdad_mod.f90->sourcefile~parkind_ectrans.f90 sourcefile~uvtvdad_mod.f90->sourcefile~tpm_dim.f90 sourcefile~uvtvdad_mod.f90->sourcefile~tpm_distr.f90 sourcefile~uvtvdad_mod.f90->sourcefile~tpm_fields_gpu.f90 sourcefile~abort_trans_mod.f90->sourcefile~tpm_gen.f90 sourcefile~butterfly_alg_mod.f90->sourcefile~ectrans_blas_mod.f90 sourcefile~interpol_decomp_mod.f90 interpol_decomp_mod.F90 sourcefile~butterfly_alg_mod.f90->sourcefile~interpol_decomp_mod.f90 sourcefile~sharedmem_mod.f90 sharedmem_mod.F90 sourcefile~butterfly_alg_mod.f90->sourcefile~sharedmem_mod.f90 sourcefile~growing_allocator_mod.f90->sourcefile~tpm_gen.f90 sourcefile~growing_allocator_mod.f90->sourcefile~abort_trans_mod.f90 sourcefile~tpm_flt.f90->sourcefile~parkind_ectrans.f90 sourcefile~seefmm_mix.f90 seefmm_mix.F90 sourcefile~tpm_flt.f90->sourcefile~seefmm_mix.f90 sourcefile~seefmm_mix.f90->sourcefile~parkind_ectrans.f90 sourcefile~wts500_mod.f90 wts500_mod.F90 sourcefile~seefmm_mix.f90->sourcefile~wts500_mod.f90

Files dependent on this one

sourcefile~~ltdirad_mod.f90~~AfferentGraph sourcefile~ltdirad_mod.f90 ltdirad_mod.F90 sourcefile~dir_trans_ctlad_mod.f90 dir_trans_ctlad_mod.F90 sourcefile~dir_trans_ctlad_mod.f90->sourcefile~ltdirad_mod.f90 sourcefile~ltdir_ctlad_mod.f90 ltdir_ctlad_mod.F90 sourcefile~ltdir_ctlad_mod.f90->sourcefile~ltdirad_mod.f90 sourcefile~dir_trans_ctlad_mod.f90~2 dir_trans_ctlad_mod.F90 sourcefile~dir_trans_ctlad_mod.f90~2->sourcefile~ltdir_ctlad_mod.f90 sourcefile~dir_transad.f90 dir_transad.F90 sourcefile~dir_transad.f90->sourcefile~dir_trans_ctlad_mod.f90 sourcefile~dir_transad.f90~2 dir_transad.F90 sourcefile~dir_transad.f90~2->sourcefile~dir_trans_ctlad_mod.f90

Source Code

#define ALIGN(I, A) (((I)+(A)-1)/(A)*(A))
! (C) Copyright 1987- ECMWF.
! (C) Copyright 1987- Meteo-France.
! (C) Copyright 2022- NVIDIA.
! 
! This software is licensed under the terms of the Apache Licence Version 2.0
! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
! In applying this licence, ECMWF does not waive the privileges and immunities
! granted to it by virtue of its status as an intergovernmental organisation
! nor does it submit to any jurisdiction.
!

MODULE LTDIRAD_MOD
  USE PARKIND_ECTRANS,        ONLY: JPIM, JPRBT, JPRB, JPRD, JPIB
  USE BUFFERED_ALLOCATOR_MOD, ONLY: ALLOCATION_RESERVATION_HANDLE
  IMPLICIT NONE

  PRIVATE
  PUBLIC :: PREPARE_LTDIRAD, LTDIRAD_HANDLE, LTDIRAD

  TYPE LTDIRAD_HANDLE
    TYPE(ALLOCATION_RESERVATION_HANDLE) :: HOUT_AND_POA
    TYPE(ALLOCATION_RESERVATION_HANDLE) :: HINPS_AND_ZINPA
  END TYPE

CONTAINS
  FUNCTION PREPARE_LTDIRAD(ALLOCATOR, KF_FS, KF_UV) RESULT(HLTDIR)
    USE TPM_DISTR,              ONLY: D
    USE TPM_DIM,                ONLY: R
    USE ISO_C_BINDING,          ONLY: C_SIZE_T, C_SIZEOF
    USE LEDIR_MOD,              ONLY: LEDIR_STRIDES
    USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE

    IMPLICIT NONE

    TYPE(BUFFERED_ALLOCATOR), INTENT(INOUT) :: ALLOCATOR
    INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS, KF_UV
    TYPE(LTDIRAD_HANDLE) :: HLTDIR

    INTEGER(KIND=JPIB) :: IALLOC_SZ
    INTEGER(KIND=JPIM)  :: IOUT_STRIDES0
    INTEGER(KIND=JPIB)  :: IOUT_SIZE
    INTEGER(KIND=JPIM)  :: IOUT0_STRIDES0, IOUT0_SIZE
    INTEGER(KIND=JPIM)  :: IIN_STRIDES0
    INTEGER(KIND=JPIB)  :: IIN_SIZE
    INTEGER(KIND=JPIM)  :: IIN0_STRIDES0, IIN0_SIZE

    REAL(KIND=JPRBT) :: ZPRBT_DUMMY
    REAL(KIND=JPRD) :: ZPRD_DUMMY

    CALL LEDIR_STRIDES(KF_FS,IOUT_STRIDES0=IOUT_STRIDES0,IOUT_SIZE=IOUT_SIZE,&
                       IOUT0_STRIDES0=IOUT0_STRIDES0,IOUT0_SIZE=IOUT0_SIZE,&
                       IIN_STRIDES0=IIN_STRIDES0,IIN_SIZE=IIN_SIZE,&
                       IIN0_STRIDES0=IIN0_STRIDES0,IIN0_SIZE=IIN0_SIZE)

    ! POA1
    IALLOC_SZ = ALIGN(2_JPIB*KF_FS*(R%NTMAX+3)*D%NUMP*C_SIZEOF(ZPRBT_DUMMY),128)
    ! POA2
    IALLOC_SZ = IALLOC_SZ + ALIGN(4_JPIB*KF_UV*(R%NTMAX+3)*D%NUMP*C_SIZEOF(ZPRBT_DUMMY),128)
    ! ZOUT
    IALLOC_SZ = IALLOC_SZ + ALIGN(IOUT_SIZE*C_SIZEOF(ZPRBT_DUMMY),128)
    ! ZOUT0
    IALLOC_SZ = IALLOC_SZ+ ALIGN(IOUT0_SIZE*C_SIZEOF(ZPRD_DUMMY),128)

    HLTDIR%HOUT_AND_POA = RESERVE(ALLOCATOR, IALLOC_SZ, "HLTDIRAD%HOUT_AND_POA")

    ! Check if the reuse buffer is large enough
    IALLOC_SZ = ALIGN(IIN_SIZE*C_SIZEOF(ZPRBT_DUMMY),128)
    IALLOC_SZ = IALLOC_SZ + ALIGN(IIN_SIZE*C_SIZEOF(ZPRBT_DUMMY),128)
    IALLOC_SZ = IALLOC_SZ + ALIGN(IIN0_SIZE*C_SIZEOF(ZPRD_DUMMY),128)
    IALLOC_SZ = IALLOC_SZ + ALIGN(IIN0_SIZE*C_SIZEOF(ZPRD_DUMMY),128)

    HLTDIR%HINPS_AND_ZINPA = RESERVE(ALLOCATOR, IALLOC_SZ, "HLTDIRAD%HINPS_AND_ZINPA")
  END FUNCTION PREPARE_LTDIRAD

  SUBROUTINE LTDIRAD(ALLOCATOR,HLTDIR,ZINPS,ZINPA,ZINPS0,ZINPA0,KF_FS,KF_UV,KF_SCALARS,&
                   & PSPVOR,PSPDIV,PSPSCALAR,&
                   & PSPSC3A,PSPSC3B,PSPSC2, &
                   & KFLDPTRUV,KFLDPTRSC)

    USE YOMHOOK,                ONLY: LHOOK, DR_HOOK, JPHOOK
    USE TPM_DIM,                ONLY: R
    USE TPM_DISTR,              ONLY: D
    USE TPM_GEOMETRY,           ONLY: G
    USE PREPSNM_MOD,            ONLY: PREPSNM
    USE LEDIR_MOD,              ONLY: LEDIR_STRIDES
    USE LEINV_MOD,              ONLY: LEINV
    USE UVTVDAD_MOD,            ONLY: UVTVDAD
    USE UPDSPAD_MOD,            ONLY: UPDSPAD
    USE UPDSPBAD_MOD,           ONLY: UPDSPBAD
    USE MPL_MODULE,             ONLY: MPL_BARRIER,MPL_ALL_MS_COMM
    USE TPM_GEN,                ONLY: LSYNC_TRANS, NERR
    USE TPM_TRANS,              ONLY: NF_SC2, NF_SC3A, NF_SC3B
    USE TPM_STATS,              ONLY: GSTATS => GSTATS_NVTX
    USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION
    USE ISO_C_BINDING,          ONLY: C_SIZE_T, C_F_POINTER, C_LOC, C_SIZEOF

    !**** *LTDIR* - Control of Direct Legendre transform step

    !     Purpose.
    !     --------
    !        Tranform from Fourier space to spectral space, compute
    !        vorticity and divergence.

    !**   Interface.
    !     ----------
    !        *CALL* *LTDIR(...)*

    !        Explicit arguments :
    !        --------------------  KM     - zonal wavenumber
    !                              KMLOC  - local zonal wavenumber

    !        Implicit arguments :  None
    !        --------------------

    !     Method.
    !     -------

    !     Externals.
    !     ----------
    !         PREPSNM - prepare REPSNM for wavenumber KM
    !         PRFI2   - prepares the Fourier work arrays for model variables.
    !         LEDIR   - direct Legendre transform
    !         UVTVD   -
    !         UPDSP   - updating of spectral arrays (fields)

    !     Reference.
    !     ----------
    !        ECMWF Research Department documentation of the IFS

    !     Author.
    !     -------
    !        Mats Hamrud and Philippe Courtier  *ECMWF*

    !     Modifications.
    !     --------------
    !        Original : 87-11-24
    !        Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite
    !                            for uv formulation
    !        Modified 93-03-19 D. Giard - CDCONF='T' for tendencies
    !        Modified 93-11-18 M. Hamrud - use only one Fourier buffer
    !        Modified 94-04-06 R. El khatib Full-POS implementation
    !        M.Hamrud  : 94-11-01 New conf 'G' - vor,div->vor,div
    !                             instead of u,v->vor,div
    !        MPP Group : 95-10-01 Support for Distributed Memory version
    !        K. YESSAD (AUGUST 1996):
    !               - Legendre transforms for transmission coefficients.
    !        Modified : 04/06/99 D.Salmond : change order of AIA and SIA
    !        R. El Khatib 12-Jul-2012 LDSPC2 replaced by UVTVD
    !     ------------------------------------------------------------------

    IMPLICIT NONE

    !     DUMMY INTEGER SCALARS
    INTEGER(KIND=JPIM)  :: KM
    INTEGER(KIND=JPIM)  :: KMLOC
    INTEGER(KIND=JPIM),INTENT(IN)   :: KF_FS,KF_UV,KF_SCALARS

    REAL(KIND=JPRB)   ,OPTIONAL,INTENT(IN)  :: PSPVOR(:,:)
    REAL(KIND=JPRB)   ,OPTIONAL,INTENT(IN)  :: PSPDIV(:,:)
    REAL(KIND=JPRB)   ,OPTIONAL,INTENT(IN)  :: PSPSCALAR(:,:)
    REAL(KIND=JPRB)   ,OPTIONAL,INTENT(IN)  :: PSPSC2(:,:)
    REAL(KIND=JPRB)   ,OPTIONAL,INTENT(IN)  :: PSPSC3A(:,:,:)
    REAL(KIND=JPRB)   ,OPTIONAL,INTENT(IN)  :: PSPSC3B(:,:,:)
    INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN)  :: KFLDPTRUV(:)
    INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN)  :: KFLDPTRSC(:)
    REAL(KIND=JPRBT)  ,POINTER, INTENT(OUT) :: ZINPS(:), ZINPA(:)
    REAL(KIND=JPRD)   ,POINTER, INTENT(OUT) :: ZINPS0(:), ZINPA0(:)

    !     LOCAL INTEGER SCALARS
    INTEGER(KIND=JPIM) :: IFC, IIFC, IDGLU, IFIRST

    REAL(KIND=JPHOOK) :: ZHOOK_HANDLE
    REAL(KIND=JPRB), POINTER :: POA1_L(:), POA1(:,:,:)
    REAL(KIND=JPRB), POINTER :: POA2_L(:), POA2(:,:,:)
    REAL(KIND=JPRB), POINTER :: PU(:,:,:), PV(:,:,:), PVOR(:,:,:), PDIV(:,:,:)
    REAL(KIND=JPRBT), POINTER :: ZOUT(:)
    REAL(KIND=JPRD), POINTER :: ZOUT0(:)
    TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR
    TYPE(LTDIRAD_HANDLE), INTENT(IN) :: HLTDIR
    INTEGER(KIND=JPIB) :: IALLOC_POS, IALLOC_SZ
    INTEGER(KIND=JPIM)  :: IOUT_STRIDES0
    INTEGER(KIND=JPIB)  :: IOUT_SIZE
    INTEGER(KIND=JPIM)  :: IOUT0_STRIDES0, IOUT0_SIZE
    INTEGER(KIND=JPIM)  :: IIN_STRIDES0
    INTEGER(KIND=JPIB)  :: IIN_SIZE
    INTEGER(KIND=JPIM)  :: IIN0_STRIDES0, IIN0_SIZE

    INTEGER(KIND=JPIM)  :: JFLD, JN
  
    ASSOCIATE(D_NUMP=>D%NUMP, R_NTMAX=>R%NTMAX)

    !     ------------------------------------------------------------------
    IF (LHOOK) CALL DR_HOOK('LTDIR_MOD',0,ZHOOK_HANDLE)

    !     ------------------------------------------------------------------

    !*       1.    PREPARE LEGENDRE POLONOMIALS AND EPSNM
    !              --------------------------------------


    !     ------------------------------------------------------------------

    !*       2.    PREPARE WORK ARRAYS.
    !              --------------------
    CALL LEDIR_STRIDES(KF_FS,IOUT_STRIDES0=IOUT_STRIDES0,IOUT_SIZE=IOUT_SIZE,&
                       IOUT0_STRIDES0=IOUT0_STRIDES0,IOUT0_SIZE=IOUT0_SIZE,&
                       IIN_STRIDES0=IIN_STRIDES0,IIN_SIZE=IIN_SIZE,&
                       IIN0_STRIDES0=IIN0_STRIDES0,IIN0_SIZE=IIN0_SIZE)

    IALLOC_POS = 1

    IALLOC_SZ = ALIGN(2_JPIB*KF_FS*(R%NTMAX+3)*D%NUMP*C_SIZEOF(POA1_L(1)),128)
    CALL ASSIGN_PTR(POA1_L, GET_ALLOCATION(ALLOCATOR, HLTDIR%HOUT_AND_POA),&
        & IALLOC_POS, IALLOC_SZ, SET_STREAM=1)
    CALL C_F_POINTER(C_LOC(POA1_L), POA1, (/ 2*KF_FS, R%NTMAX+3, D%NUMP /))
    IALLOC_POS = IALLOC_POS + IALLOC_SZ

    IALLOC_SZ = ALIGN(4_JPIB*KF_UV*(R%NTMAX+3)*D%NUMP*C_SIZEOF(POA2_L(1)),128)
    CALL ASSIGN_PTR(POA2_L, GET_ALLOCATION(ALLOCATOR, HLTDIR%HOUT_AND_POA),&
        & IALLOC_POS, IALLOC_SZ, SET_STREAM=1)
    CALL C_F_POINTER(C_LOC(POA2_L), POA2, (/ 4*KF_UV, R%NTMAX+3, D%NUMP /))
    IALLOC_POS = IALLOC_POS + IALLOC_SZ

    ! ZOUT
    IALLOC_SZ = ALIGN(IOUT_SIZE*C_SIZEOF(ZOUT(1)),128)
    CALL ASSIGN_PTR(ZOUT, GET_ALLOCATION(ALLOCATOR, HLTDIR%HOUT_AND_POA),&
        & IALLOC_POS, IALLOC_SZ, SET_STREAM=1)
    IALLOC_POS = IALLOC_POS + IALLOC_SZ

    ! ZOUT0
    IALLOC_SZ = ALIGN(IOUT0_SIZE*C_SIZEOF(ZOUT0(1)),128)
    CALL ASSIGN_PTR(ZOUT0, GET_ALLOCATION(ALLOCATOR, HLTDIR%HOUT_AND_POA),&
        & IALLOC_POS, IALLOC_SZ, SET_STREAM=1)
    IALLOC_POS = IALLOC_POS + IALLOC_SZ

    IALLOC_POS=1

    IALLOC_SZ = ALIGN(IIN_SIZE*C_SIZEOF(ZINPS(0)),128)
    CALL ASSIGN_PTR(ZINPS, GET_ALLOCATION(ALLOCATOR, HLTDIR%HINPS_AND_ZINPA),&
        & IALLOC_POS, IALLOC_SZ)
    IALLOC_POS=IALLOC_POS+IALLOC_SZ

    IALLOC_SZ = ALIGN(IIN_SIZE*C_SIZEOF(ZINPA(0)),128)
    CALL ASSIGN_PTR(ZINPA, GET_ALLOCATION(ALLOCATOR, HLTDIR%HINPS_AND_ZINPA),&
        & IALLOC_POS, IALLOC_SZ)
    IALLOC_POS=IALLOC_POS+IALLOC_SZ

    IALLOC_SZ = ALIGN(IIN0_SIZE*C_SIZEOF(ZINPS0(0)),128)
    CALL ASSIGN_PTR(ZINPS0, GET_ALLOCATION(ALLOCATOR, HLTDIR%HINPS_AND_ZINPA),&
        & IALLOC_POS, IALLOC_SZ)
    IALLOC_POS=IALLOC_POS+IALLOC_SZ

    IALLOC_SZ = ALIGN(IIN0_SIZE*C_SIZEOF(ZINPA0(0)),128)
    CALL ASSIGN_PTR(ZINPA0, GET_ALLOCATION(ALLOCATOR, HLTDIR%HINPS_AND_ZINPA),&
        & IALLOC_POS, IALLOC_SZ)
    IALLOC_POS=IALLOC_POS+IALLOC_SZ

#ifdef ACCGPU
    !$ACC DATA PRESENT(POA1,R_NTMAX,D_NUMP) 
    !$ACC PARALLEL LOOP COLLAPSE(3) DEFAULT(NONE) COPYIN(KF_FS)
#endif
#ifdef OMPGPU
    !$OMP TARGET DATA MAP(PRESENT,ALLOC:POA1,R_NTMAX,D_NUMP)
    !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) DEFAULT(NONE) SHARED(R,D,KF_FS,POA1) &
    !$OMP& MAP(TO:KF_FS)
#endif
    DO KMLOC=1,D_NUMP
        DO JN=1,R_NTMAX+3
          DO JFLD=1,2*KF_FS
            POA1(JFLD,JN,KMLOC) = 0
          END DO
        END DO
    END DO
#ifdef ACCGPU
    !$ACC END DATA
#endif
#ifdef OMPGPU
    !$OMP END TARGET DATA
#endif

    !     ------------------------------------------------------------------

    !*       3.    PREPARE FOURIER ARRAYS.
    !              ----------------------

    !     ------------------------------------------------------------------

    !*       4.    COPY WORK ARRAYS TO DEVICE.
    !              ---------------------------


#ifdef OMPGPU
    !$OMP TARGET DATA MAP(TO:PSPVOR,PSPDIV) IF(KF_UV > 0)
    !$OMP TARGET DATA MAP(TO:PSPSCALAR) IF(PRESENT(PSPSCALAR) .AND. KF_SCALARS > 0)
    !$OMP TARGET DATA MAP(TO:PSPSC2) IF(NF_SC2 > 0)
    !$OMP TARGET DATA MAP(TO:PSPSC3A) IF(NF_SC3A > 0)
    !$OMP TARGET DATA MAP(TO:PSPSC3B) IF(NF_SC3B > 0)
#endif
#ifdef ACCGPU
    !$ACC DATA COPYIN(PSPVOR,PSPDIV) IF(KF_UV > 0)
    !$ACC DATA COPYIN(PSPSCALAR) IF(PRESENT(PSPSCALAR) .AND. KF_SCALARS > 0)
    !$ACC DATA COPYIN(PSPSC2) IF(NF_SC2 > 0)
    !$ACC DATA COPYIN(PSPSC3A) IF(NF_SC3A > 0)
    !$ACC DATA COPYIN(PSPSC3B) IF(NF_SC3B > 0)
#endif
    !     ------------------------------------------------------------------

    !*       6.    UPDATE SPECTRAL ARRAYS.
    !              -----------------------

    ! this is on the host, so need to cp from device, Nils
    CALL UPDSPAD(KF_UV,KF_SCALARS,POA1,&
               & PSPSCALAR,&
               & PSPSC3A,PSPSC3B,PSPSC2 , &
               & KFLDPTRUV,KFLDPTRSC)

    !     ------------------------------------------------------------------

    !*       5.    COMPUTE VORTICITY AND DIVERGENCE.
    !              ---------------------------------

    IF( KF_UV > 0 ) THEN
       ! U and V are in POA1
       IFIRST = 0
       PU => POA1(IFIRST+1:IFIRST+2*KF_UV,:,:)
       IFIRST = IFIRST + 2*KF_UV
       PV => POA1(IFIRST+1:IFIRST+2*KF_UV,:,:)
       ! Compute VOR and DIV ino POA2
       IFIRST = 0
       PVOR => POA2(IFIRST+1:IFIRST+2*KF_UV,:,:)
       IFIRST = IFIRST + 2*KF_UV
       PDIV => POA2(IFIRST+1:IFIRST+2*KF_UV,:,:)

       ! Write back. Note, if we have UV, the contract says we *must* have VOR/DIV
       CALL UPDSPBAD(KF_UV,PVOR,PSPVOR,KFLDPTRUV)
       CALL UPDSPBAD(KF_UV,PDIV,PSPDIV,KFLDPTRUV)
       
       ! Compute vorticity and divergence
       CALL UVTVDAD(KF_UV,PU,PV,PVOR,PDIV)
    ENDIF


#ifdef ACCGPU
    !$ACC WAIT(1)
#endif

    IF (LSYNC_TRANS) THEN
      CALL GSTATS(430,0)
      CALL MPL_BARRIER(MPL_ALL_MS_COMM,CDSTRING='')
      CALL GSTATS(430,1)
    ENDIF
    CALL GSTATS(412,0)
#ifdef OMPGPU
    !$OMP END TARGET DATA
    !$OMP END TARGET DATA
    !$OMP END TARGET DATA
    !$OMP END TARGET DATA
    !$OMP END TARGET DATA
#endif
#ifdef ACCGPU
    !$ACC END DATA
    !$ACC END DATA
    !$ACC END DATA
    !$ACC END DATA
    !$ACC END DATA
#endif

    IF (LSYNC_TRANS) THEN
      CALL GSTATS(432,0)
      CALL MPL_BARRIER(MPL_ALL_MS_COMM,CDSTRING='')
      CALL GSTATS(432,1)
    ENDIF
    CALL GSTATS(412,1)

    ! do the legendre transform
    CALL LEINV(ALLOCATOR,POA1,ZOUT,ZOUT0,ZINPS,ZINPA,ZINPS0,ZINPA0,KF_FS)

    !     ------------------------------------------------------------------

    IF (LHOOK) CALL DR_HOOK('LTDIRAD_MOD',1,ZHOOK_HANDLE)

    END ASSOCIATE

  END SUBROUTINE LTDIRAD
END MODULE LTDIRAD_MOD