trgtol_mod.F90 Source File


This file depends on

sourcefile~~trgtol_mod.f90~2~~EfferentGraph sourcefile~trgtol_mod.f90~2 trgtol_mod.F90 sourcefile~abort_trans_mod.f90 abort_trans_mod.F90 sourcefile~trgtol_mod.f90~2->sourcefile~abort_trans_mod.f90 sourcefile~inigptr_mod.f90 inigptr_mod.F90 sourcefile~trgtol_mod.f90~2->sourcefile~inigptr_mod.f90 sourcefile~pe2set_mod.f90 pe2set_mod.F90 sourcefile~trgtol_mod.f90~2->sourcefile~pe2set_mod.f90 sourcefile~tpm_distr.f90 tpm_distr.F90 sourcefile~trgtol_mod.f90~2->sourcefile~tpm_distr.f90 sourcefile~tpm_gen.f90 tpm_gen.F90 sourcefile~trgtol_mod.f90~2->sourcefile~tpm_gen.f90 sourcefile~tpm_trans.f90 tpm_trans.F90 sourcefile~trgtol_mod.f90~2->sourcefile~tpm_trans.f90 sourcefile~abort_trans_mod.f90->sourcefile~tpm_distr.f90 sourcefile~abort_trans_mod.f90->sourcefile~tpm_gen.f90 sourcefile~inigptr_mod.f90->sourcefile~abort_trans_mod.f90 sourcefile~inigptr_mod.f90->sourcefile~tpm_distr.f90 sourcefile~inigptr_mod.f90->sourcefile~tpm_gen.f90 sourcefile~inigptr_mod.f90->sourcefile~tpm_trans.f90 sourcefile~eq_regions_mod.f90 eq_regions_mod.F90 sourcefile~inigptr_mod.f90->sourcefile~eq_regions_mod.f90 sourcefile~pe2set_mod.f90->sourcefile~abort_trans_mod.f90 sourcefile~pe2set_mod.f90->sourcefile~tpm_distr.f90 sourcefile~pe2set_mod.f90->sourcefile~eq_regions_mod.f90 sourcefile~parkind_ectrans.f90 parkind_ectrans.F90 sourcefile~tpm_gen.f90->sourcefile~parkind_ectrans.f90 sourcefile~growing_allocator_mod.f90 growing_allocator_mod.F90 sourcefile~tpm_trans.f90->sourcefile~growing_allocator_mod.f90 sourcefile~tpm_trans.f90->sourcefile~parkind_ectrans.f90 sourcefile~eq_regions_mod.f90->sourcefile~parkind_ectrans.f90

Source Code

! (C) Copyright 1995- ECMWF.
! (C) Copyright 1995- Meteo-France.
! 
! This software is licensed under the terms of the Apache Licence Version 2.0
! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
! In applying this licence, ECMWF does not waive the privileges and immunities
! granted to it by virtue of its status as an intergovernmental organisation
! nor does it submit to any jurisdiction.
!

MODULE TRGTOL_MOD

PUBLIC TRGTOL
PRIVATE TRGTOL_PROLOG, TRGTOL_COMM, TRGTOL_COMM_HEAP, TRGTOL_COMM_STACK

CONTAINS

SUBROUTINE TRGTOL(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,KPTRGP,&
 &PGP,PGPUV,PGP3A,PGP3B,PGP2)

!**** *TRGTOL * - head routine for transposition of grid point data from column
!                 structure to latitudinal. Reorganize data between
!                 grid point calculations and direct Fourier Transform

!**   Interface.
!     ----------
!        *call* *trgtol_prolog(...)

!        Explicit arguments :
!        --------------------

!        Implicit arguments :
!        --------------------

!     Method.
!     -------
!        See documentation

!     Externals.
!     ----------

!     Reference.
!     ----------
!        ECMWF Research Department documentation of the IFS

!     Author.
!     -------
!        R. El Khatib *Meteo-France*

!     Modifications.
!     --------------
!        Original  : 18-Aug-2014 from trgtol
!        R. El Khatib 09-Sep-2020 NSTACK_MEMORY_TR
!     ------------------------------------------------------------------

USE PARKIND1  ,ONLY : JPIM     ,JPRB
USE YOMHOOK   ,ONLY : LHOOK,   DR_HOOK, JPHOOK

USE TPM_GEN         ,ONLY : NSTACK_MEMORY_TR
USE TPM_DISTR       ,ONLY : D, NPRTRNS, NPROC
USE TPM_TRANS       ,ONLY : NGPBLKS

IMPLICIT NONE

INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_GP
REAL(KIND=JPRB),INTENT(OUT)   :: PGLAT(KF_FS,D%NLENGTF)
INTEGER(KIND=JPIM),INTENT(IN) :: KVSET(KF_GP)
INTEGER(KIND=JPIM),INTENT(IN) :: KF_SCALARS_G
INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:)
REAL(KIND=JPRB),OPTIONAL,INTENT(IN)     :: PGP(:,:,:)
REAL(KIND=JPRB),OPTIONAL,INTENT(IN)     :: PGPUV(:,:,:,:)
REAL(KIND=JPRB),OPTIONAL,INTENT(IN)     :: PGP3A(:,:,:,:)
REAL(KIND=JPRB),OPTIONAL,INTENT(IN)     :: PGP3B(:,:,:,:)
REAL(KIND=JPRB),OPTIONAL,INTENT(IN)     :: PGP2(:,:,:)

INTEGER(KIND=JPIM) :: ISENDCOUNT
INTEGER(KIND=JPIM) :: IRECVCOUNT
INTEGER(KIND=JPIM) :: INSEND
INTEGER(KIND=JPIM) :: INRECV
INTEGER(KIND=JPIM) :: ISENDTOT (NPROC)
INTEGER(KIND=JPIM) :: IRECVTOT (NPROC)
INTEGER(KIND=JPIM) :: ISEND    (NPROC)
INTEGER(KIND=JPIM) :: IRECV    (NPROC)
INTEGER(KIND=JPIM) :: IINDEX(D%NLENGTF)
INTEGER(KIND=JPIM) :: INDOFF(NPROC)
INTEGER(KIND=JPIM) :: IGPTRSEND(2,NGPBLKS,NPRTRNS)

REAL(KIND=JPHOOK) :: ZHOOK_HANDLE

!     ------------------------------------------------------------------

IF (LHOOK) CALL DR_HOOK('TRGTOL',0,ZHOOK_HANDLE)

CALL TRGTOL_PROLOG(KF_FS,KF_GP,KVSET,&
 & ISENDCOUNT,IRECVCOUNT,INSEND,INRECV,ISENDTOT,IRECVTOT,ISEND,IRECV,IINDEX,INDOFF,IGPTRSEND)
IF (NSTACK_MEMORY_TR==0) THEN
  CALL TRGTOL_COMM_HEAP(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET, &
   & ISENDCOUNT,IRECVCOUNT,INSEND,INRECV,ISENDTOT,IRECVTOT,ISEND,IRECV,IINDEX,INDOFF,IGPTRSEND, &
   & KPTRGP,PGP,PGPUV,PGP3A,PGP3B,PGP2)
ELSE
  CALL TRGTOL_COMM_STACK(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET, &
   & ISENDCOUNT,IRECVCOUNT,INSEND,INRECV,ISENDTOT,IRECVTOT,ISEND,IRECV,IINDEX,INDOFF,IGPTRSEND, &
   & KPTRGP,PGP,PGPUV,PGP3A,PGP3B,PGP2)
ENDIF

IF (LHOOK) CALL DR_HOOK('TRGTOL',1,ZHOOK_HANDLE)

!     ------------------------------------------------------------------

END SUBROUTINE TRGTOL

SUBROUTINE TRGTOL_PROLOG(KF_FS,KF_GP,KVSET,&
 & KSENDCOUNT,KRECVCOUNT,KNSEND,KNRECV,KSENDTOT,KRECVTOT,KSEND,KRECV,KINDEX,KNDOFF,KGPTRSEND)

!**** *TRGTOL_PROLOG * - prolog for transposition of grid point data from column
!                 structure to latitudinal. Reorganize data between
!                 grid point calculations and direct Fourier Transform
!                 the purpose is essentially 
!                 to compute the size of communication buffers in order to enable
!                 the use of automatic arrays later.


!     Purpose.
!     --------


!**   Interface.
!     ----------
!        *call* *trgtol_prolog(...)

!        Explicit arguments :
!        --------------------

!        Implicit arguments :
!        --------------------

!     Method.
!     -------
!        See documentation

!     Externals.
!     ----------

!     Reference.
!     ----------
!        ECMWF Research Department documentation of the IFS

!     Author.
!     -------
!        R. El Khatib *Meteo-France*

!     Modifications.
!     --------------
!        Original  : 18-Aug-2014 from trgtol
!     ------------------------------------------------------------------



USE PARKIND1  ,ONLY : JPIM 

USE TPM_DISTR       ,ONLY : D, NPRTRNS, MYSETW, MYPROC, NPROC
USE TPM_TRANS       ,ONLY : NGPBLKS

USE INIGPTR_MOD     ,ONLY : INIGPTR
USE PE2SET_MOD      ,ONLY : PE2SET
!

IMPLICIT NONE

INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_GP
INTEGER(KIND=JPIM),INTENT(IN) :: KVSET(KF_GP)

INTEGER(KIND=JPIM), INTENT(OUT) :: KSENDCOUNT
INTEGER(KIND=JPIM), INTENT(OUT) :: KRECVCOUNT
INTEGER(KIND=JPIM), INTENT(OUT) :: KNSEND
INTEGER(KIND=JPIM), INTENT(OUT) :: KNRECV
INTEGER(KIND=JPIM), INTENT(OUT) :: KSENDTOT (NPROC)
INTEGER(KIND=JPIM), INTENT(OUT) :: KRECVTOT (NPROC)
INTEGER(KIND=JPIM), INTENT(OUT) :: KSEND    (NPROC)
INTEGER(KIND=JPIM), INTENT(OUT) :: KRECV    (NPROC)
INTEGER(KIND=JPIM), INTENT(OUT) :: KINDEX(D%NLENGTF)
INTEGER(KIND=JPIM), INTENT(OUT) :: KNDOFF(NPROC)
INTEGER(KIND=JPIM), INTENT(OUT) :: KGPTRSEND(2,NGPBLKS,NPRTRNS)

INTEGER(KIND=JPIM) :: IGPTRRECV(NPRTRNS)
INTEGER(KIND=JPIM) :: IFIRSTLAT, IGL, IGLL, ILASTLAT, JROC, IPOS, ISETB, ISETA
INTEGER(KIND=JPIM) :: ISETV, J, JFLD, JGL, JL, ISETW,  INDOFFX

!     ------------------------------------------------------------------

!*       0.    Some initializations
!              --------------------

CALL GSTATS(1805,0)

CALL INIGPTR(KGPTRSEND,IGPTRRECV)

INDOFFX  = 0
KNRECV   = 0
KNSEND   = 0

DO JROC=1,NPROC

  CALL PE2SET(JROC,ISETA,ISETB,ISETW,ISETV)

!             count up expected number of fields
  IPOS = 0
  DO JFLD=1,KF_GP
    IF(KVSET(JFLD) == ISETV .OR. KVSET(JFLD) == -1) IPOS = IPOS+1
  ENDDO
  KSENDTOT(JROC) = IGPTRRECV(ISETW)*IPOS

  IF( JROC /= MYPROC) THEN
    IF(KSENDTOT(JROC) > 0) THEN
      KNSEND = KNSEND+1
      KSEND(KNSEND)=JROC
    ENDIF
  ENDIF

  IFIRSTLAT = MAX(D%NPTRLS(MYSETW),D%NFRSTLAT(ISETA))
  ILASTLAT  = MIN(D%NPTRLS(MYSETW)+D%NULTPP(MYSETW)-1,D%NLSTLAT(ISETA))

  IPOS = 0
  DO JGL=IFIRSTLAT,ILASTLAT
    IGL  = D%NPTRFRSTLAT(ISETA)+JGL-D%NFRSTLAT(ISETA)
    IPOS = IPOS+D%NONL(IGL,ISETB)
  ENDDO

  KRECVTOT(JROC) = IPOS*KF_FS
  IF(KRECVTOT(JROC) > 0 .AND. MYPROC /= JROC) THEN
    KNRECV = KNRECV + 1
    KRECV(KNRECV)=JROC
  ENDIF

  IF(IPOS > 0) THEN
    KNDOFF(JROC) = INDOFFX
    INDOFFX = INDOFFX+IPOS
    IPOS = 0
    DO JGL=IFIRSTLAT,ILASTLAT
      IGL  = D%NPTRFRSTLAT(ISETA)+JGL-D%NFRSTLAT(ISETA)
      IGLL = JGL-D%NPTRLS(MYSETW)+1
      DO JL=D%NSTA(IGL,ISETB)+D%NSTAGTF(IGLL),&
       &D%NSTA(IGL,ISETB)+D%NSTAGTF(IGLL)+D%NONL(IGL,ISETB)-1
        IPOS = IPOS+1
        KINDEX(IPOS+KNDOFF(JROC)) = JL
      ENDDO
    ENDDO
  ENDIF

ENDDO

KSENDCOUNT=0
KRECVCOUNT=0
DO J=1,NPROC
  KSENDCOUNT=MAX(KSENDCOUNT,KSENDTOT(J))
  KRECVCOUNT=MAX(KRECVCOUNT,KRECVTOT(J))
ENDDO

CALL GSTATS(1805,1)

END SUBROUTINE TRGTOL_PROLOG

SUBROUTINE TRGTOL_COMM_HEAP(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,&
 & KSENDCOUNT,KRECVCOUNT,KNSEND,KNRECV,KSENDTOT,KRECVTOT,KSEND,KRECV,KINDEX,KNDOFF,KGPTRSEND,&
 & KPTRGP,PGP,PGPUV,PGP3A,PGP3B,PGP2)

USE PARKIND1  ,ONLY : JPIM     ,JPRB
USE TPM_DISTR ,ONLY : D, NPRTRNS, NPROC
USE TPM_TRANS ,ONLY : NGPBLKS

IMPLICIT NONE

INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_GP
REAL(KIND=JPRB),INTENT(OUT)   :: PGLAT(KF_FS,D%NLENGTF)
INTEGER(KIND=JPIM),INTENT(IN) :: KVSET(KF_GP)
INTEGER(KIND=JPIM),INTENT(IN) :: KF_SCALARS_G
INTEGER(KIND=JPIM), INTENT(IN) :: KSENDCOUNT
INTEGER(KIND=JPIM), INTENT(IN) :: KRECVCOUNT
INTEGER(KIND=JPIM), INTENT(IN) :: KNSEND
INTEGER(KIND=JPIM), INTENT(IN) :: KNRECV
INTEGER(KIND=JPIM), INTENT(IN) :: KSENDTOT (NPROC)
INTEGER(KIND=JPIM), INTENT(IN) :: KRECVTOT (NPROC)
INTEGER(KIND=JPIM), INTENT(IN) :: KSEND    (NPROC)
INTEGER(KIND=JPIM), INTENT(IN) :: KRECV    (NPROC)
INTEGER(KIND=JPIM), INTENT(IN) :: KINDEX(D%NLENGTF)
INTEGER(KIND=JPIM), INTENT(IN) :: KNDOFF(NPROC)
INTEGER(KIND=JPIM), INTENT(IN) :: KGPTRSEND(2,NGPBLKS,NPRTRNS)
INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:)
REAL(KIND=JPRB),OPTIONAL,INTENT(IN)     :: PGP(:,:,:)
REAL(KIND=JPRB),OPTIONAL,INTENT(IN)     :: PGPUV(:,:,:,:)
REAL(KIND=JPRB),OPTIONAL,INTENT(IN)     :: PGP3A(:,:,:,:)
REAL(KIND=JPRB),OPTIONAL,INTENT(IN)     :: PGP3B(:,:,:,:)
REAL(KIND=JPRB),OPTIONAL,INTENT(IN)     :: PGP2(:,:,:)

REAL(KIND=JPRB), ALLOCATABLE, SAVE :: ZCOMBUFS_HEAP(:,:)
REAL(KIND=JPRB), ALLOCATABLE, SAVE :: ZCOMBUFR_HEAP(:,:)
INTEGER(KIND=JPIM), SAVE :: INRECV_PREV = -1
INTEGER(KIND=JPIM), SAVE :: INSEND_PREV = -1
INTEGER(KIND=JPIM), SAVE :: IRECVCOUNT_PREV = -1
INTEGER(KIND=JPIM), SAVE :: ISENDCOUNT_PREV = -1

IF ( .NOT. ALLOCATED(ZCOMBUFS_HEAP) ) THEN
  ALLOCATE(ZCOMBUFS_HEAP(-1:KSENDCOUNT,KNSEND))
  ISENDCOUNT_PREV = KSENDCOUNT
  INSEND_PREV = KNSEND
ELSEIF ( KSENDCOUNT .NE. ISENDCOUNT_PREV .OR. KNSEND .NE. INSEND_PREV ) THEN
  DEALLOCATE(ZCOMBUFS_HEAP)
  ALLOCATE(ZCOMBUFS_HEAP(-1:KSENDCOUNT,KNSEND))
  ISENDCOUNT_PREV = KSENDCOUNT
  INSEND_PREV = KNSEND
ENDIF

! Now, force the OS to allocate this shared array right now, not when it starts to be used which is
! an OPEN-MP loop, that would cause a threads synchronization lock :
IF (KNSEND > 0 .AND. KSENDCOUNT >= -1) ZCOMBUFS_HEAP(-1,1) = HUGE(1._JPRB)

IF ( .NOT. ALLOCATED(ZCOMBUFR_HEAP) ) THEN
  ALLOCATE(ZCOMBUFR_HEAP(-1:KRECVCOUNT,KNRECV))
  IRECVCOUNT_PREV = KRECVCOUNT
  INRECV_PREV = KNRECV
ELSEIF ( KRECVCOUNT .NE. IRECVCOUNT_PREV .OR. KNRECV .NE. INRECV_PREV ) THEN
  DEALLOCATE(ZCOMBUFR_HEAP)
  ALLOCATE(ZCOMBUFR_HEAP(-1:KRECVCOUNT,KNRECV))
  IRECVCOUNT_PREV = KRECVCOUNT
  INRECV_PREV = KNRECV
ENDIF

CALL TRGTOL_COMM(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,&
 & KSENDCOUNT,KRECVCOUNT,KNSEND,KNRECV,KSENDTOT,KRECVTOT,KSEND,KRECV,KINDEX,KNDOFF,KGPTRSEND,&
 & ZCOMBUFS_HEAP,ZCOMBUFR_HEAP, &
 & KPTRGP,PGP,PGPUV,PGP3A,PGP3B,PGP2)

END SUBROUTINE TRGTOL_COMM_HEAP

SUBROUTINE TRGTOL_COMM_STACK(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,&
 & KSENDCOUNT,KRECVCOUNT,KNSEND,KNRECV,KSENDTOT,KRECVTOT,KSEND,KRECV,KINDEX,KNDOFF,KGPTRSEND,&
 & KPTRGP,PGP,PGPUV,PGP3A,PGP3B,PGP2)

USE PARKIND1  ,ONLY : JPIM     ,JPRB
USE TPM_DISTR ,ONLY : D, NPRTRNS, NPROC
USE TPM_TRANS ,ONLY : NGPBLKS

IMPLICIT NONE

INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_GP
REAL(KIND=JPRB),INTENT(OUT)   :: PGLAT(KF_FS,D%NLENGTF)
INTEGER(KIND=JPIM),INTENT(IN) :: KVSET(KF_GP)
INTEGER(KIND=JPIM),INTENT(IN) :: KF_SCALARS_G
INTEGER(KIND=JPIM), INTENT(IN) :: KSENDCOUNT
INTEGER(KIND=JPIM), INTENT(IN) :: KRECVCOUNT
INTEGER(KIND=JPIM), INTENT(IN) :: KNSEND
INTEGER(KIND=JPIM), INTENT(IN) :: KNRECV
INTEGER(KIND=JPIM), INTENT(IN) :: KSENDTOT (NPROC)
INTEGER(KIND=JPIM), INTENT(IN) :: KRECVTOT (NPROC)
INTEGER(KIND=JPIM), INTENT(IN) :: KSEND    (NPROC)
INTEGER(KIND=JPIM), INTENT(IN) :: KRECV    (NPROC)
INTEGER(KIND=JPIM), INTENT(IN) :: KINDEX(D%NLENGTF)
INTEGER(KIND=JPIM), INTENT(IN) :: KNDOFF(NPROC)
INTEGER(KIND=JPIM), INTENT(IN) :: KGPTRSEND(2,NGPBLKS,NPRTRNS)
INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:)
REAL(KIND=JPRB),OPTIONAL,INTENT(IN)     :: PGP(:,:,:)
REAL(KIND=JPRB),OPTIONAL,INTENT(IN)     :: PGPUV(:,:,:,:)
REAL(KIND=JPRB),OPTIONAL,INTENT(IN)     :: PGP3A(:,:,:,:)
REAL(KIND=JPRB),OPTIONAL,INTENT(IN)     :: PGP3B(:,:,:,:)
REAL(KIND=JPRB),OPTIONAL,INTENT(IN)     :: PGP2(:,:,:)

REAL(KIND=JPRB) :: ZCOMBUFS_STACK(-1:KSENDCOUNT,KNSEND)
REAL(KIND=JPRB) :: ZCOMBUFR_STACK(-1:KRECVCOUNT,KNRECV)

CALL TRGTOL_COMM(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,&
 & KSENDCOUNT,KRECVCOUNT,KNSEND,KNRECV,KSENDTOT,KRECVTOT,KSEND,KRECV,KINDEX,KNDOFF,KGPTRSEND,&
 & ZCOMBUFS_STACK,ZCOMBUFR_STACK, &
 & KPTRGP,PGP,PGPUV,PGP3A,PGP3B,PGP2)

END SUBROUTINE TRGTOL_COMM_STACK

SUBROUTINE TRGTOL_COMM(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,&
 & KSENDCOUNT,KRECVCOUNT,KNSEND,KNRECV,KSENDTOT,KRECVTOT,KSEND,KRECV,KINDEX,KNDOFF,KGPTRSEND,&
 & PCOMBUFS,PCOMBUFR, &
 & KPTRGP,PGP,PGPUV,PGP3A,PGP3B,PGP2)

!**** *TRGTOL_COMM * - transposition of grid point data from column
!                 structure to latitudinal. Reorganize data between
!                 grid point calculations and direct Fourier Transform

!     Purpose.
!     --------


!**   Interface.
!     ----------
!        *call* *trgtol(...)

!        Explicit arguments :
!        --------------------
!           PGLAT    -  Latitudinal data ready for direct FFT (output)
!           PGP    -  Blocked grid point data    (input)

!        Implicit arguments :
!        --------------------

!     Method.
!     -------
!        See documentation

!     Externals.
!     ----------

!     Reference.
!     ----------
!        ECMWF Research Department documentation of the IFS

!     Author.
!     -------
!        MPP Group *ECMWF*

!     Modifications.
!     --------------
!        Original: 95-10-01
!        D.Dent  : 97-08-04   Reorganisation to allow
!                             NPRTRV to differ from NPRGPEW
!                : 98-06-17   add mailbox control logic (from TRLTOM)
!        =99-03-29= Mats Hamrud and Deborah Salmond
!                   JUMP in FFT's changed to 1
!                   KINDEX introduced and PCOMBUF not used for same PE
!         01-11-23  Deborah Salmond and John Hague
!                    LIMP_NOOLAP Option for non-overlapping message passing
!                    and buffer packing
!         01-12-18  Peter Towers
!                   Improved vector performance of GTOL_PACK,GTOL_UNPACK
!         03-04-02  G. Radnoti: call barrier always when nproc>1
!         08-01-01  G.Mozdzynski: cleanup
!         09-01-02  G.Mozdzynski: use non-blocking recv and send
!        R. El Khatib 09-Sep-2020 64 bits addressing for PGLAT
!     ------------------------------------------------------------------



USE PARKIND1  ,ONLY : JPIM     ,JPRB    ,JPIB
USE YOMHOOK   ,ONLY : LHOOK,   DR_HOOK, JPHOOK

USE MPL_MODULE  ,ONLY : MPL_RECV, MPL_SEND, MPL_WAIT, JP_NON_BLOCKING_STANDARD, MPL_WAITANY, &
     & JP_BLOCKING_STANDARD, MPL_BARRIER, JP_BLOCKING_BUFFERED

USE TPM_GEN         ,ONLY : NOUT, NTRANS_SYNC_LEVEL
USE TPM_DISTR       ,ONLY : D, NPRCIDS, NPRTRNS, MTAGGL,  &
     &                      MYSETV, MYSETW, MYPROC, NPROC
USE TPM_TRANS       ,ONLY : LDIVGP, LGPNORM, LSCDERS, LUVDER, LVORGP, NGPBLKS

USE PE2SET_MOD      ,ONLY : PE2SET
USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS
!

IMPLICIT NONE

INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_GP
REAL(KIND=JPRB),INTENT(OUT)   :: PGLAT(KF_FS,D%NLENGTF)
INTEGER(KIND=JPIM),INTENT(IN) :: KVSET(KF_GP)
INTEGER(KIND=JPIM),INTENT(IN) :: KF_SCALARS_G
INTEGER(KIND=JPIM), INTENT(IN) :: KSENDCOUNT
INTEGER(KIND=JPIM), INTENT(IN) :: KRECVCOUNT
INTEGER(KIND=JPIM), INTENT(IN) :: KNSEND
INTEGER(KIND=JPIM), INTENT(IN) :: KNRECV
INTEGER(KIND=JPIM), INTENT(IN) :: KSENDTOT (NPROC)
INTEGER(KIND=JPIM), INTENT(IN) :: KRECVTOT (NPROC)
INTEGER(KIND=JPIM), INTENT(IN) :: KSEND    (NPROC)
INTEGER(KIND=JPIM), INTENT(IN) :: KRECV    (NPROC)
INTEGER(KIND=JPIM), INTENT(IN) :: KINDEX(D%NLENGTF)
INTEGER(KIND=JPIM), INTENT(IN) :: KNDOFF(NPROC)
INTEGER(KIND=JPIM), INTENT(IN) :: KGPTRSEND(2,NGPBLKS,NPRTRNS)
REAL(KIND=JPRB), INTENT(INOUT) :: PCOMBUFS(-1:KSENDCOUNT,KNSEND)
REAL(KIND=JPRB), INTENT(INOUT) :: PCOMBUFR(-1:KRECVCOUNT,KNRECV)
INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:)
REAL(KIND=JPRB),OPTIONAL,INTENT(IN)     :: PGP(:,:,:)
REAL(KIND=JPRB),OPTIONAL,INTENT(IN)     :: PGPUV(:,:,:,:)
REAL(KIND=JPRB),OPTIONAL,INTENT(IN)     :: PGP3A(:,:,:,:)
REAL(KIND=JPRB),OPTIONAL,INTENT(IN)     :: PGP3B(:,:,:,:)
REAL(KIND=JPRB),OPTIONAL,INTENT(IN)     :: PGP2(:,:,:)

INTEGER(KIND=JPIM) :: IPOSPLUS(KNSEND)
INTEGER(KIND=JPIM) :: ISETW(KNSEND)
INTEGER(KIND=JPIM) :: IJPOS(NGPBLKS,KNSEND)
INTEGER(KIND=JPIM) :: IFLDA(KF_GP,KNSEND)
INTEGER(KIND=JPIM) :: IREQ_SEND(NPROC)
INTEGER(KIND=JPIM) :: IREQ_RECV(NPROC)

!     LOCAL LOGICAL SCALARS
LOGICAL   :: LLPGPUV,LLPGP3A,LLPGP3B,LLPGP2,LLPGPONLY, LLINDER
LOGICAL   :: LLUV(KF_GP),LLGP2(KF_GP),LLGP3A(KF_GP),LLGP3B(KF_GP)

!     LOCAL INTEGER SCALARS
INTEGER(KIND=JPIM) :: IFIRST, ILAST, ILEN, IPOS, ISETA, ISETB, IRECV, ISETV
INTEGER(KIND=JPIM) :: ISEND, ITAG, JBLK, JFLD, JK, JL, IFLD, II, IFLDS, INS, INR
INTEGER(KIND=JPIM) :: JJ,JI,IFLDT, J

INTEGER(KIND=JPIB) :: JFLD64

INTEGER(KIND=JPIM) :: IUVLEVS(KF_GP),IUVPARS(KF_GP),IGP2PARS(KF_GP)
INTEGER(KIND=JPIM) :: IGP3APARS(KF_GP),IGP3ALEVS(KF_GP),IGP3BPARS(KF_GP),IGP3BLEVS(KF_GP)
INTEGER(KIND=JPIM) :: IUVPAR,IUVLEV,IGP2PAR,IGP3ALEV,IGP3APAR,IGP3BLEV,IGP3BPAR,IPAROFF
INTEGER(KIND=JPIM) :: IOFF,IOFF1,IOFFNS,IOFFEW,J1,J2, JNR
INTEGER(KIND=JPIM) :: IFLDOFF(KF_FS)
INTEGER(KIND=JPIM) :: ISEND_FLD_START,ISEND_FLD_END
INTEGER(KIND=JPIM) :: IRECV_FLD_START,IRECV_FLD_END
INTEGER(KIND=JPIM) :: IGPTROFF(NGPBLKS)

REAL(KIND=JPHOOK) :: ZHOOK_HANDLE_BAR

!     ------------------------------------------------------------------

!*       0.    Some initializations
!              --------------------

ITAG = MTAGGL

IF (LHOOK) CALL DR_HOOK('TRGTOL_BAR',0,ZHOOK_HANDLE_BAR)
CALL GSTATS_BARRIER(761)
IF (LHOOK) CALL DR_HOOK('TRGTOL_BAR',1,ZHOOK_HANDLE_BAR)

IF(.NOT.LGPNORM)THEN
   CALL GSTATS(803,0)
ELSE
   CALL GSTATS(804,0)
ENDIF

IF (NTRANS_SYNC_LEVEL <= 0) THEN
   !  Receive loop.........................................................
   DO INR=1,KNRECV
      IRECV=KRECV(INR)
      CALL MPL_RECV(PCOMBUFR(-1:KRECVTOT(IRECV),INR), &
           & KSOURCE=NPRCIDS(IRECV), &
           & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IREQ_RECV(INR), &
           & KTAG=ITAG,CDSTRING='TRGTOL_COMM: NON-BLOCKING IRECV' )
   ENDDO
ENDIF

IF(.NOT.LGPNORM)THEN
   CALL GSTATS(803,1)
ELSE
   CALL GSTATS(804,1)
ENDIF

CALL GSTATS(1805,0)
LLINDER = .FALSE.
LLPGPUV = .FALSE.
LLPGP3A = .FALSE.
LLPGP3B = .FALSE.
LLPGP2  = .FALSE.
LLPGPONLY = .FALSE.
IF(PRESENT(KPTRGP))  LLINDER = .TRUE.
IF(PRESENT(PGP))     LLPGPONLY = .TRUE.
IF(PRESENT(PGPUV))   LLPGPUV = .TRUE.
IF(PRESENT(PGP3A))   LLPGP3A = .TRUE.
IF(PRESENT(PGP3B))   LLPGP3B = .TRUE.
IF(PRESENT(PGP2))    LLPGP2 = .TRUE.

IUVPAR=0
IUVLEV=0
IOFF1=0
IOFFNS=KF_SCALARS_G
IOFFEW=2*KF_SCALARS_G

LLUV(:) = .FALSE.
IUVPARS(:) = -99
IUVLEVS(:) = -99
IF (LLPGPUV) THEN
  IOFF=0
  IUVLEV=UBOUND(PGPUV,2)
  IF(LVORGP) THEN
    IUVPAR=IUVPAR+1
    DO J=1,IUVLEV
      IUVLEVS(IOFF+J)=J
      IUVPARS(IOFF+J)=IUVPAR
      LLUV(IOFF+J)=.TRUE.
    ENDDO
    IOFF=IOFF+IUVLEV
  ENDIF
  IF(LDIVGP) THEN
    IUVPAR=IUVPAR+1
    DO J=1,IUVLEV
      IUVLEVS(IOFF+J)=J
      IUVPARS(IOFF+J)=IUVPAR
      LLUV(IOFF+J)=.TRUE.
    ENDDO
    IOFF=IOFF+IUVLEV
  ENDIF
  DO J=1,IUVLEV
    IUVLEVS(IOFF+J)=J
    IUVPARS(IOFF+J)=IUVPAR+1
    IUVLEVS(IOFF+J+IUVLEV)=J
    IUVPARS(IOFF+J+IUVLEV)=IUVPAR+2
  ENDDO
  IUVPAR=IUVPAR+2
  LLUV(IOFF+1:IOFF+2*IUVLEV)=.TRUE.
  IOFF=IOFF+2*IUVLEV
  IOFF1=IOFF
  IOFFNS=IOFFNS+IOFF
  IOFFEW=IOFFEW+IOFF

  IOFF=IUVPAR*IUVLEV+KF_SCALARS_G
  IF(LUVDER) THEN
    IF(LSCDERS) IOFF=IOFF+KF_SCALARS_G
    DO J=1,IUVLEV
      IUVLEVS(IOFF+J)=J
      IUVPARS(IOFF+J)=IUVPAR+1
      LLUV(IOFF+J)=.TRUE.
      IUVLEVS(IOFF+J+IUVLEV)=J
      IUVPARS(IOFF+J+IUVLEV)=IUVPAR+2
      LLUV(IOFF+J+IUVLEV)=.TRUE.
    ENDDO
    IUVPAR=IUVPAR+2
    IOFF=IOFF+2*IUVLEV
    IOFFEW=IOFFEW+2*IUVLEV
  ENDIF
ENDIF

LLGP2(:)=.FALSE.
IF(LLPGP2) THEN
  IOFF=IOFF1
  IGP2PAR=UBOUND(PGP2,2)
  IF(LSCDERS) IGP2PAR=IGP2PAR/3
  DO J=1,IGP2PAR
    LLGP2(J+IOFF) = .TRUE.
    IGP2PARS(J+IOFF)=J
  ENDDO
  IOFF1=IOFF1+IGP2PAR
  IF(LSCDERS) THEN
    IOFF=IOFFNS
    DO J=1,IGP2PAR
      LLGP2(J+IOFF) = .TRUE.
      IGP2PARS(J+IOFF)=J+IGP2PAR
    ENDDO
    IOFFNS=IOFF+IGP2PAR
    IOFF=IOFFEW
    DO J=1,IGP2PAR
      LLGP2(J+IOFF) = .TRUE.
      IGP2PARS(J+IOFF)=J+2*IGP2PAR
    ENDDO
    IOFFEW=IOFF+IGP2PAR
  ENDIF
ENDIF

LLGP3A(:) = .FALSE.
IF(LLPGP3A) THEN
  IGP3ALEV=UBOUND(PGP3A,2)
  IGP3APAR=UBOUND(PGP3A,3)
  IF(LSCDERS) IGP3APAR=IGP3APAR/3
  IOFF=IOFF1
  DO J1=1,IGP3APAR
    DO J2=1,IGP3ALEV
      LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE.
      IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1
      IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2
    ENDDO
  ENDDO
  IPAROFF=IGP3APAR
  IOFF1=IOFF1+IGP3APAR*IGP3ALEV
  IF(LSCDERS) THEN
    IOFF=IOFFNS
    DO J1=1,IGP3APAR
      DO J2=1,IGP3ALEV
        LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE.
        IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1+IPAROFF
        IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2
      ENDDO
    ENDDO
    IPAROFF=IPAROFF+IGP3APAR
    IOFFNS=IOFFNS+IGP3APAR*IGP3ALEV
    IOFF=IOFFEW
    DO J1=1,IGP3APAR
      DO J2=1,IGP3ALEV
        LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE.
        IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1+IPAROFF
        IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2
      ENDDO
    ENDDO
    IOFFEW=IOFFEW+IGP3APAR*IGP3ALEV
  ENDIF
ENDIF

LLGP3B(:) = .FALSE.
IF(LLPGP3B) THEN
  IGP3BLEV=UBOUND(PGP3B,2)
  IGP3BPAR=UBOUND(PGP3B,3)
  IF(LSCDERS) IGP3BPAR=IGP3BPAR/3
  IOFF=IOFF1
  DO J1=1,IGP3BPAR
    DO J2=1,IGP3BLEV
      LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE.
      IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1
      IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2
    ENDDO
  ENDDO
  IPAROFF=IGP3BPAR
  IOFF1=IOFF1+IGP3BPAR*IGP3BLEV
  IF(LSCDERS) THEN
    IOFF=IOFFNS
    DO J1=1,IGP3BPAR
      DO J2=1,IGP3BLEV
        LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE.
        IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1+IPAROFF
        IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2
      ENDDO
    ENDDO
    IPAROFF=IPAROFF+IGP3BPAR
    IOFFNS=IOFFNS+IGP3BPAR*IGP3BLEV
    IOFF=IOFFEW
    DO J1=1,IGP3BPAR
      DO J2=1,IGP3BLEV
        LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE.
        IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1+IPAROFF
        IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2
      ENDDO
    ENDDO
    IOFFEW=IOFFEW+IGP3BPAR*IGP3BLEV
  ENDIF
ENDIF

CALL GSTATS(1805,1)


! Copy local contribution

IF(KSENDTOT(MYPROC) > 0 )THEN
  IFLDS = 0
  DO JFLD=1,KF_GP
    IF(KVSET(JFLD) == MYSETV .OR. KVSET(JFLD) == -1) THEN
      IFLDS = IFLDS+1
      IF(LLINDER) THEN
        IFLDOFF(IFLDS) = KPTRGP(JFLD)
      ELSE
        IFLDOFF(IFLDS) = JFLD
      ENDIF
    ENDIF
  ENDDO

  IPOS=0
  DO JBLK=1,NGPBLKS
    IGPTROFF(JBLK)=IPOS
    IFIRST = KGPTRSEND(1,JBLK,MYSETW)
    IF(IFIRST > 0) THEN
      ILAST = KGPTRSEND(2,JBLK,MYSETW)
      IPOS=IPOS+ILAST-IFIRST+1
    ENDIF
  ENDDO
  CALL GSTATS(1601,0)
#ifdef __NEC__
! Loops inversion is still better on Aurora machines, according to CHMI. REK.
!$OMP PARALLEL DO SCHEDULE(DYNAMIC) PRIVATE(JFLD64,JBLK,JK,IFLD,IPOS,IFIRST,ILAST)
#else
!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JFLD64,JBLK,JK,IFLD,IPOS,IFIRST,ILAST)
#endif
  DO JBLK=1,NGPBLKS
    IFIRST = KGPTRSEND(1,JBLK,MYSETW)
    IF(IFIRST > 0) THEN
      ILAST = KGPTRSEND(2,JBLK,MYSETW)
! Address PGLAT over 64 bits because its size may exceed 2 GB for big data and
! small number of tasks.
      IF(LLPGPONLY) THEN
        DO JFLD64=1,IFLDS
          IFLD = IFLDOFF(JFLD64)
!DIR$ VECTOR ALWAYS
          DO JK=IFIRST,ILAST
            IPOS = KNDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1
            PGLAT(JFLD64,KINDEX(IPOS)) = PGP(JK,IFLD,JBLK)
          ENDDO
        ENDDO
      ELSE
        DO JFLD64=1,IFLDS
          IFLD = IFLDOFF(JFLD64)
          IF(LLUV(IFLD)) THEN
!DIR$ VECTOR ALWAYS
            DO JK=IFIRST,ILAST
              IPOS = KNDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1
              PGLAT(JFLD64,KINDEX(IPOS)) = PGPUV(JK,IUVLEVS(IFLD),IUVPARS(IFLD),JBLK)
            ENDDO
          ELSEIF(LLGP2(IFLD)) THEN
!DIR$ VECTOR ALWAYS
            DO JK=IFIRST,ILAST
              IPOS = KNDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1
              PGLAT(JFLD64,KINDEX(IPOS)) = PGP2(JK,IGP2PARS(IFLD),JBLK)
            ENDDO
          ELSEIF(LLGP3A(IFLD)) THEN
!DIR$ VECTOR ALWAYS
            DO JK=IFIRST,ILAST
              IPOS = KNDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1
              PGLAT(JFLD64,KINDEX(IPOS)) = PGP3A(JK,IGP3ALEVS(IFLD),IGP3APARS(IFLD),JBLK)
            ENDDO
          ELSEIF(LLGP3B(IFLD)) THEN
!DIR$ VECTOR ALWAYS
            DO JK=IFIRST,ILAST
              IPOS = KNDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1
              PGLAT(JFLD64,KINDEX(IPOS)) = PGP3B(JK,IGP3BLEVS(IFLD),IGP3BPARS(IFLD),JBLK)
            ENDDO
          ELSE
            WRITE(NOUT,*)'TRGTOL_MOD: ERROR',JFLD64,IFLD
            CALL ABORT_TRANS('TRGTOL_MOD: ERROR')
          ENDIF
        ENDDO
      ENDIF
    ENDIF
  ENDDO
!$OMP END PARALLEL DO
  CALL GSTATS(1601,1)

ENDIF


! Now overlapping buffer packing/unpacking with sends/waits
! Time as if all communications to avoid double accounting

IF(.NOT.LGPNORM)THEN
   CALL GSTATS(803,0)
ELSE
   CALL GSTATS(804,0)
ENDIF

!....Pack+send loop.........................................................

!$OMP PARALLEL PRIVATE(JBLK,IFIRST,ILAST,ISEND_FLD_START,ISEND_FLD_END,INS,ISEND,ISETA,ISETB,&
!$OMP&                 ISETV,IFLD,IFLDT,IPOS,JFLD,JK,JJ,JI)
!$OMP DO SCHEDULE(STATIC)
DO INS=1,KNSEND
  ISEND=KSEND(INS)
  CALL PE2SET(ISEND,ISETA,ISETB,ISETW(INS),ISETV)
  IFLD = 0
  IPOS = 0
  IPOSPLUS(INS)=0
  DO JFLD=1,KF_GP
    IF(KVSET(JFLD) == ISETV .OR. KVSET(JFLD) == -1 ) THEN
      IFLD = IFLD+1
      IFLDA(IFLD,INS)=JFLD
    ENDIF
  ENDDO

  DO JBLK=1,NGPBLKS
    IFIRST = KGPTRSEND(1,JBLK,ISETW(INS))
    IF(IFIRST > 0) THEN
      ILAST = KGPTRSEND(2,JBLK,ISETW(INS))
      IJPOS(JBLK,INS)=IPOS
      IPOSPLUS(INS)=IPOSPLUS(INS)+(ILAST-IFIRST+1)
      IPOS=IPOS+(ILAST-IFIRST+1)
    ENDIF
  ENDDO

  PCOMBUFS(-1,INS) = 1
  PCOMBUFS(0,INS) = IFLD

ENDDO
!$OMP END DO
DO INS=1,KNSEND
  ISEND=KSEND(INS)
  IPOS=IPOSPLUS(INS)

  ISEND_FLD_START=PCOMBUFS(-1,INS)
  ISEND_FLD_END = PCOMBUFS(0,INS)

!$OMP DO SCHEDULE(STATIC)
  DO JJ=ISEND_FLD_START,ISEND_FLD_END
    IFLDT=IFLDA(JJ,INS)
    DO JBLK=1,NGPBLKS
      IFIRST = KGPTRSEND(1,JBLK,ISETW(INS))
      IF(IFIRST > 0) THEN
        ILAST = KGPTRSEND(2,JBLK,ISETW(INS))
        IF(LLINDER) THEN
          DO JK=IFIRST,ILAST
            JI=(JJ-ISEND_FLD_START)*IPOS+IJPOS(JBLK,INS)+JK-IFIRST+1
            PCOMBUFS(JI,INS) = PGP(JK,KPTRGP(IFLDT),JBLK)
          ENDDO
        ELSE
          IF(LLPGPONLY) THEN
            DO JK=IFIRST,ILAST
              JI=(JJ-ISEND_FLD_START)*IPOS+IJPOS(JBLK,INS)+JK-IFIRST+1
              PCOMBUFS(JI,INS) = PGP(JK,IFLDT,JBLK)
            ENDDO
          ELSEIF(LLUV(IFLDT)) THEN
            DO JK=IFIRST,ILAST
              JI=(JJ-ISEND_FLD_START)*IPOS+IJPOS(JBLK,INS)+JK-IFIRST+1
              PCOMBUFS(JI,INS) = PGPUV(JK,IUVLEVS(IFLDT),IUVPARS(IFLDT),JBLK)
            ENDDO
          ELSEIF(LLGP2(IFLDT)) THEN
            DO JK=IFIRST,ILAST
              JI=(JJ-ISEND_FLD_START)*IPOS+IJPOS(JBLK,INS)+JK-IFIRST+1
              PCOMBUFS(JI,INS) = PGP2(JK,IGP2PARS(IFLDT),JBLK)
            ENDDO
          ELSEIF(LLGP3A(IFLDT)) THEN
            DO JK=IFIRST,ILAST
              JI=(JJ-ISEND_FLD_START)*IPOS+IJPOS(JBLK,INS)+JK-IFIRST+1
              PCOMBUFS(JI,INS) = PGP3A(JK,IGP3ALEVS(IFLDT),IGP3APARS(IFLDT),JBLK)
            ENDDO
          ELSEIF(LLGP3B(IFLDT)) THEN
            DO JK=IFIRST,ILAST
              JI=(JJ-ISEND_FLD_START)*IPOS+IJPOS(JBLK,INS)+JK-IFIRST+1
              PCOMBUFS(JI,INS) = PGP3B(JK,IGP3BLEVS(IFLDT),IGP3BPARS(IFLDT),JBLK)
            ENDDO
          ENDIF
        ENDIF
      ENDIF
    ENDDO
  ENDDO
!$OMP END DO
ENDDO
!$OMP END PARALLEL

DO INS=1,KNSEND
  ISEND=KSEND(INS)
  IF (NTRANS_SYNC_LEVEL <= 1) THEN
     CALL MPL_SEND(PCOMBUFS(-1:KSENDTOT(ISEND),INS),KDEST=NPRCIDS(ISEND), &
          & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IREQ_SEND(INS), &
          & KTAG=ITAG,CDSTRING='TRGTOL_COMM: NON-BLOCKING ISEND' )
  ELSE
     CALL MPL_SEND(PCOMBUFS(-1:KSENDTOT(ISEND),INS),KDEST=NPRCIDS(ISEND), &
          & KMP_TYPE=JP_BLOCKING_BUFFERED, &
          & KTAG=ITAG,CDSTRING='TRGTOL_COMM: BLOCKING BUFFERED BSEND' )
  ENDIF

ENDDO

!  Unpack loop.........................................................

DO JNR=1,KNRECV

  IF (NTRANS_SYNC_LEVEL <= 0) THEN
     CALL MPL_WAITANY(KREQUEST=IREQ_RECV(1:KNRECV),KINDEX=INR,&
          & CDSTRING='TRGTOL_COMM: WAIT FOR ANY RECEIVES')
  ELSE
     INR = JNR
     IRECV=KRECV(INR)
     CALL MPL_RECV(PCOMBUFR(-1:KRECVTOT(IRECV),INR), &
          & KSOURCE=NPRCIDS(IRECV), &
          & KMP_TYPE=JP_BLOCKING_STANDARD, &
          & KTAG=ITAG,CDSTRING='TRGTOL_COMM: BLOCKING RECV' )
  ENDIF

  IRECV=KRECV(INR)
  ILEN = KRECVTOT(IRECV)/KF_FS
  IRECV_FLD_START = PCOMBUFR(-1,INR)
  IRECV_FLD_END   = PCOMBUFR(0,INR)
!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JL,II,JFLD)
  DO JL=1,ILEN
    II = KINDEX(KNDOFF(IRECV)+JL)
    DO JFLD=IRECV_FLD_START,IRECV_FLD_END
      PGLAT(JFLD,II) = PCOMBUFR(JL+(JFLD-IRECV_FLD_START)*ILEN,INR)
    ENDDO
  ENDDO
!$OMP END PARALLEL DO
  IPOS = ILEN*(IRECV_FLD_END-IRECV_FLD_START+1)
ENDDO

IF (NTRANS_SYNC_LEVEL <= 1) THEN
   IF(KNSEND > 0) THEN
      CALL MPL_WAIT(KREQUEST=IREQ_SEND(1:KNSEND),CDSTRING='TRGTOL_COMM: WAIT FOR ISENDS')
   ENDIF
ENDIF

IF (NTRANS_SYNC_LEVEL >= 1) THEN
   CALL MPL_BARRIER(CDSTRING='TRGTOL_COMM: BARRIER AT END')
ENDIF

IF(.NOT.LGPNORM)THEN
  CALL GSTATS(803,1)
ELSE
  CALL GSTATS(804,1)
ENDIF

CALL GSTATS_BARRIER2(761)

END SUBROUTINE TRGTOL_COMM

END MODULE TRGTOL_MOD