#define ALIGN(I, A) (((I)+(A)-1)/(A)*(A)) ! (C) Copyright 2000- ECMWF. ! (C) Copyright 2000- Meteo-France. ! (C) Copyright 2022- NVIDIA. ! ! This software is licensed under the terms of the Apache Licence Version 2.0 ! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. ! In applying this licence, ECMWF does not waive the privileges and immunities ! granted to it by virtue of its status as an intergovernmental organisation ! nor does it submit to any jurisdiction. ! MODULE SUMP_TRANS_MOD CONTAINS SUBROUTINE SUMP_TRANS ! Set up distributed environment for the transform package (part 2) ! Modifications : ! P.Marguinaud : 11-Sep-2012 : Fix twice allocated pointer USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT, JPRD USE TPM_GEN ,ONLY : NOUT, NPRINTLEV USE TPM_DIM ,ONLY : R USE TPM_GEOMETRY ,ONLY : G USE TPM_DISTR ,ONLY : D, LEQ_REGIONS, MYSETW, NPRTRNS, NPRTRW, NPROC, MYPROC !USE SUWAVEDI_MOD !USE PE2SET_MOD USE SUMPLATF_MOD ,ONLY : SUMPLATF USE SUMPLAT_MOD ,ONLY : SUMPLAT USE SUSTAONL_MOD ,ONLY : SUSTAONL USE MYSENDSET_MOD ,ONLY : MYSENDSET USE MYRECVSET_MOD ,ONLY : MYRECVSET USE EQ_REGIONS_MOD ,ONLY : MY_REGION_NS, MY_REGION_EW, & & N_REGIONS, N_REGIONS_EW, N_REGIONS_NS ! IMPLICIT NONE INTEGER(KIND=JPIM) :: JM INTEGER(KIND=JPIM) :: JGL,IGL,IPLAT,ISENDSET,IRECVSET,JML,IPOS,IM INTEGER(KIND=JPIM) :: IGPTOT,IMEDIAP,IRESTM,JA,JB,IOFF,OFFSET1,OFFSET2,KMLOC,KM INTEGER(KIND=JPIM),ALLOCATABLE :: IGPTOTL(:,:) REAL(KIND=JPRBT),ALLOCATABLE :: ZDUM(:) REAL(KIND=JPRBT) :: ZMEDIAP REAL(KIND=JPRD) :: ZTIME0,ZTIME1,ZTIME2 LOGICAL :: LLP1,LLP2 ! ------------------------------------------------------------------ LLP1 = NPRINTLEV>0 LLP2 = NPRINTLEV>1 IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SUMP_TRANS ===' IF(.NOT.D%LGRIDONLY) THEN ALLOCATE(D%NULTPP(NPRTRNS)) IF(LLP2)WRITE(NOUT,9) 'D%NULTPP ',SIZE(D%NULTPP ),SHAPE(D%NULTPP ) ALLOCATE(D%NPTRLS(NPRTRNS)) IF(LLP2)WRITE(NOUT,9) 'D%NPTRLS ',SIZE(D%NPTRLS ),SHAPE(D%NPTRLS ) ALLOCATE(D%NPROCL(R%NDGL)) IF(LLP2)WRITE(NOUT,9) 'D%NPROCL ',SIZE(D%NPROCL ),SHAPE(D%NPROCL ) CALL SUMPLATF(R%NDGL,NPRTRNS,MYSETW,D%NULTPP,D%NPROCL,D%NPTRLS) D%NDGL_FS = D%NULTPP(MYSETW) ! Help arrays for spectral to fourier space transposition ALLOCATE(D%NLTSGTB (NPRTRNS+1)) IF(LLP2)WRITE(NOUT,9) 'D%NLTSGTB ',SIZE(D%NLTSGTB),SHAPE(D%NLTSGTB) ALLOCATE(D%NLTSFTB (NPRTRNS+1)) IF(LLP2)WRITE(NOUT,9) 'D%NLTSFTB ',SIZE(D%NLTSFTB),SHAPE(D%NLTSFTB) ALLOCATE(D%NSTAGT0B(NPRTRNS+1)) IF(LLP2)WRITE(NOUT,9) 'D%NSTAGT0B ',SIZE(D%NSTAGT0B),SHAPE(D%NSTAGT0B) ALLOCATE(D%NSTAGT1B(NPRTRNS+1)) IF(LLP2)WRITE(NOUT,9) 'D%NSTAGT1B ',SIZE(D%NSTAGT1B),SHAPE(D%NSTAGT1B) ALLOCATE(D%MSTABF (NPRTRNS+1)) IF(LLP2)WRITE(NOUT,9) 'D%MSTABF ',SIZE(D%MSTABF),SHAPE(D%MSTABF) D%NLTSGTB(:) = 0 DO JGL=1,D%NDGL_FS IGL = D%NPTRLS(MYSETW)+JGL-1 DO JM=0,G%NMEN(IGL) D%NLTSGTB(D%NPROCM(JM)) = D%NLTSGTB(D%NPROCM(JM))+1 ENDDO ENDDO DO JA=1,NPRTRW IPLAT = 0 DO JGL=1,D%NULTPP(JA) IGL = D%NPTRLS(JA)+JGL-1 DO JM=1,D%NUMP IF(IGL > R%NDGNH-G%NDGLU(D%MYMS(JM)) .AND. IGL <= R%NDGNH+G%NDGLU(D%MYMS(JM))) THEN IPLAT = IPLAT + 1 ENDIF ENDDO ENDDO D%NLTSFTB(JA) = IPLAT ENDDO DO JA=1,NPRTRW-1 ISENDSET = MYSENDSET(NPRTRW,MYSETW,JA) IRECVSET = MYRECVSET(NPRTRW,MYSETW,JA) D%MSTABF(IRECVSET) = ISENDSET ENDDO D%MSTABF(MYSETW) = MYSETW ALLOCATE(D%NPNTGTB0(0:R%NSMAX,D%NDGL_FS)) IF(LLP2)WRITE(NOUT,9) 'D%NPNTGTB0 ',SIZE(D%NPNTGTB0 ),SHAPE(D%NPNTGTB0 ) ALLOCATE(D%NPNTGTB1(D%NUMP,R%NDGL)) IF(LLP2)WRITE(NOUT,9) 'D%NPNTGTB1 ',SIZE(D%NPNTGTB1 ),SHAPE(D%NPNTGTB1 ) ! Global offsets of processors D%NSTAGT0B(1) = 0 D%NSTAGT1B(1) = 0 DO JA=2,NPRTRNS D%NSTAGT0B(JA) = D%NSTAGT0B(JA-1)+D%NLTSGTB(JA-1) D%NSTAGT1B(JA) = D%NSTAGT1B(JA-1)+D%NLTSFTB(JA-1) ENDDO ! Global size of foubuf D%NLENGT0B = D%NSTAGT0B(NPRTRNS)+D%NLTSGTB(NPRTRNS) D%NLENGT1B = D%NSTAGT1B(NPRTRNS)+D%NLTSFTB(NPRTRNS) ! Global offsets of grid points DO JA=1,NPRTRW IPOS = 0 DO JGL=1,D%NULTPP(MYSETW) IGL = D%NPTRLS(MYSETW) + JGL - 1 DO JML=D%NPTRMS(JA),D%NPTRMS(JA)+D%NUMPP(JA)-1 IM = D%NALLMS(JML) IF (IM <= G%NMEN(IGL)) THEN D%NPNTGTB0(IM,JGL) = D%NSTAGT0B(D%NPROCM(IM)) + IPOS IPOS = IPOS+1 ELSE D%NPNTGTB0(IM,JGL) = -99 ENDIF ENDDO ENDDO ENDDO DO JA=1,NPRTRW IPOS = 0 DO JGL=1,D%NULTPP(JA) IGL = D%NPTRLS(JA) + JGL - 1 DO JM=1,D%NUMP IM = D%MYMS(JM) IF (IM <= G%NMEN(IGL)) THEN D%NPNTGTB1(JM,IGL) = D%NSTAGT1B(D%NPROCL(IGL)) + IPOS IPOS = IPOS+1 ELSE D%NPNTGTB1(JM,IGL) = -99 ENDIF ENDDO ENDDO ENDDO ! D%NSTAGT0B / D%NSTAGT1B: offset of peer rank in send/recv buffer ! D%NLTSGTB / D%NLTSFTB : size of peer rank in send/recv buffer ! D%NPNTGTB0 / D%NPNTGTB1: translation inp to global send buffer / recv to out buffer ENDIF ! GRIDPOINT SPACE ALLOCATE(D%NFRSTLAT(N_REGIONS_NS)) IF(LLP2)WRITE(NOUT,9) 'D%NFRSTLAT ',SIZE(D%NFRSTLAT ),SHAPE(D%NFRSTLAT ) ALLOCATE(D%NLSTLAT(N_REGIONS_NS)) IF(LLP2)WRITE(NOUT,9) 'D%NLSTLAT ',SIZE(D%NLSTLAT ),SHAPE(D%NLSTLAT ) ALLOCATE(D%NPTRLAT(R%NDGL)) IF(LLP2)WRITE(NOUT,9) 'D%NPTRLAT ',SIZE(D%NPTRLAT ),SHAPE(D%NPTRLAT ) ALLOCATE(D%NPTRFRSTLAT(N_REGIONS_NS)) IF(LLP2)WRITE(NOUT,9) 'D%NPTRFRSTLAT',SIZE(D%NPTRFRSTLAT),SHAPE(D%NPTRFRSTLAT) ALLOCATE(D%NPTRLSTLAT(N_REGIONS_NS)) IF(LLP2)WRITE(NOUT,9)'D%NPTRLSTLAT',SIZE(D%NPTRLSTLAT),SHAPE(D%NPTRLSTLAT) ALLOCATE(D%LSPLITLAT(R%NDGL)) IF(LLP2)WRITE(NOUT,9) 'D%LSPLITLAT',SIZE(D%LSPLITLAT),SHAPE(D%LSPLITLAT) ALLOCATE(D%NPROCA_GP(N_REGIONS_NS)) IF(LLP2)WRITE(NOUT,9) 'D%NPROCA_GP',SIZE(D%NPROCA_GP),SHAPE(D%NPROCA_GP) IF(.NOT.D%LWEIGHTED_DISTR) THEN ALLOCATE(ZDUM(1)) CALL SUMPLAT(R%NDGL,NPROC,N_REGIONS_NS,MY_REGION_NS,D%LSPLIT,LEQ_REGIONS,& &D%NFRSTLAT,D%NLSTLAT,D%NFRSTLOFF,D%NPTRLAT,& &D%NPTRFRSTLAT,D%NPTRLSTLAT,D%NPTRFLOFF,& &ZDUM,D%LWEIGHTED_DISTR,ZMEDIAP,D%NPROCA_GP,& &IMEDIAP,IRESTM,D%LSPLITLAT,MYPROC,G%NLOEN) ELSE CALL SUMPLAT(R%NDGL,NPROC,N_REGIONS_NS,MY_REGION_NS,D%LSPLIT,LEQ_REGIONS,& &D%NFRSTLAT,D%NLSTLAT,D%NFRSTLOFF,D%NPTRLAT,& &D%NPTRFRSTLAT,D%NPTRLSTLAT,D%NPTRFLOFF,& &D%RWEIGHT,D%LWEIGHTED_DISTR,ZMEDIAP,D%NPROCA_GP,& &IMEDIAP,IRESTM,D%LSPLITLAT,MYPROC,G%NLOEN) ENDIF D%NDGL_GP = D%NLSTLAT(MY_REGION_NS)-D%NFRSTLOFF IF (LLP1) THEN IF(.NOT.D%LGRIDONLY) THEN WRITE(NOUT,FMT='(/'' OUTPUT FROM ROUTINE SUMPLAT: ''/)') WRITE(NOUT,FMT='('' D%NULTPP '')') WRITE(NOUT,FMT='(20(1X,I4))') D%NULTPP(1:NPRTRNS) WRITE(NOUT,FMT='('' D%NPROCL '')') WRITE(NOUT,FMT='(20(1X,I4))') D%NPROCL(1:R%NDGL) ENDIF WRITE(NOUT,FMT='('' D%NFRSTLAT '')') WRITE(NOUT,FMT='(20(1X,I4))') D%NFRSTLAT(1:N_REGIONS_NS) WRITE(NOUT,FMT='('' D%NLSTLAT '')') WRITE(NOUT,FMT='(20(1X,I4))') D%NLSTLAT(1:N_REGIONS_NS) WRITE(NOUT,FMT='('' D%NFRSTLOFF D%NPTRFLOFF '')') WRITE(NOUT,FMT='(2(1X,I6))') D%NFRSTLOFF, D%NPTRFLOFF WRITE(NOUT,FMT='('' D%NPTRLAT '')') WRITE(NOUT,FMT='(20(1X,I4))') D%NPTRLAT(1:R%NDGL) WRITE(NOUT,FMT='('' D%LSPLITLAT '')') WRITE(NOUT,FMT='(50(1X,L1))') D%LSPLITLAT(1:R%NDGL) WRITE(NOUT,FMT='('' D%NPTRFRSTLAT '')') WRITE(NOUT,FMT='(20(1X,I4))') D%NPTRFRSTLAT(1:N_REGIONS_NS) WRITE(NOUT,FMT='('' D%NPTRLSTLAT '')') WRITE(NOUT,FMT='(20(1X,I4))') D%NPTRLSTLAT(1:N_REGIONS_NS) WRITE(NOUT,FMT='(/)') ENDIF ALLOCATE(D%NSTA(R%NDGL+N_REGIONS_NS-1,N_REGIONS_EW)) IF(LLP2)WRITE(NOUT,9) 'D%NSTA ',SIZE(D%NSTA ),SHAPE(D%NSTA ) ALLOCATE(D%NONL(R%NDGL+N_REGIONS_NS-1,N_REGIONS_EW)) IF(LLP2)WRITE(NOUT,9) 'D%NONL ',SIZE(D%NONL ),SHAPE(D%NONL ) IF(.NOT.D%LWEIGHTED_DISTR) THEN CALL SUSTAONL(IMEDIAP,IRESTM,D%LWEIGHTED_DISTR,ZDUM,ZMEDIAP,D%NPROCA_GP) ELSE CALL SUSTAONL(IMEDIAP,IRESTM,D%LWEIGHTED_DISTR,D%RWEIGHT,ZMEDIAP,D%NPROCA_GP) ENDIF ! IGPTOTL is the number of grid points in each individual processor ALLOCATE(IGPTOTL(N_REGIONS_NS,N_REGIONS_EW)) IGPTOTL(:,:)=0 DO JA=1,N_REGIONS_NS DO JB=1,N_REGIONS(JA) IGPTOT = 0 DO JGL=D%NPTRFRSTLAT(JA),D%NPTRLSTLAT(JA) IGPTOT = IGPTOT+D%NONL(JGL,JB) ENDDO IGPTOTL(JA,JB) = IGPTOT ENDDO ENDDO D%NGPTOT = IGPTOTL(MY_REGION_NS,MY_REGION_EW) D%NGPTOTMX = MAXVAL(IGPTOTL) D%NGPTOTG = SUM(IGPTOTL) ALLOCATE(D%NGPTOTL(N_REGIONS_NS,N_REGIONS_EW)) IF(LLP2)WRITE(NOUT,9) 'D%NGPTOTL ',SIZE(D%NGPTOTL ),SHAPE(D%NGPTOTL ) D%NGPTOTL(:,:) = IGPTOTL(:,:) IF(.NOT.D%LGRIDONLY) THEN ALLOCATE(D%NSTAGTF(D%NDGL_FS+1)) IF(LLP2)WRITE(NOUT,9) 'D%NSTAGTF ',SIZE(D%NSTAGTF ),SHAPE(D%NSTAGTF ) IOFF = 0 DO JGL=1,D%NDGL_FS D%NSTAGTF(JGL) = IOFF IGL = D%NPTRLS(MYSETW) + JGL - 1 ! Each latitude should be able to store NLON real values, or floor(NLON/2)+1 ! complex values. Note that IOFF should always be even, because we need to ! store complex values (i.e. 2 floats), but this is the case anyway. ! WARNING: Extra padding changes results, potentially, though it does not ! cause wrong results. IOFF = IOFF + (G%NLOEN(IGL)/2+1)*2 ENDDO D%NSTAGTF(D%NDGL_FS+1) = IOFF D%NLENGTF = IOFF ENDIF IF(ALLOCATED(ZDUM)) DEALLOCATE(ZDUM) DEALLOCATE(IGPTOTL) ALLOCATE(D%OFFSETS_GEMM1(D%NUMP+1)) ALLOCATE(D%OFFSETS_GEMM2(D%NUMP+1)) OFFSET1 = 0 OFFSET2 = 0 DO KMLOC=1,D%NUMP KM = D%MYMS(KMLOC) D%OFFSETS_GEMM1(KMLOC) = OFFSET1 D%OFFSETS_GEMM2(KMLOC) = OFFSET2 !KM=0 is transformed in double precision, no need to store here IF (KM /= 0) THEN OFFSET1 = OFFSET1 + ALIGN(G%NDGLU(KM),8) ! N_OFFSET takes the max of the two GEMMs OFFSET2 = OFFSET2 + ALIGN((R%NSMAX-KM+3)/2,8) ENDIF ENDDO D%OFFSETS_GEMM1(D%NUMP+1) = OFFSET1 D%OFFSETS_GEMM2(D%NUMP+1) = OFFSET2 ! ------------------------------------------------------------------ 9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) END SUBROUTINE SUMP_TRANS END MODULE SUMP_TRANS_MOD