! (C) Copyright 2008- ECMWF. ! (C) Copyright 2008- Meteo-France. ! ! This software is licensed under the terms of the Apache Licence Version 2.0 ! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. ! In applying this licence, ECMWF does not waive the privileges and immunities ! granted to it by virtue of its status as an intergovernmental organisation ! nor does it submit to any jurisdiction. ! SUBROUTINE GPNORM_TRANS_GPU(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,KRESOL) !**** *GPNORM_TRANS_GPU* - calculate grid-point norms ! Purpose. ! -------- ! calculate grid-point norms using a 2 stage (NPRTRV,NPRTRW) communication rather ! than an approach using a more expensive global gather collective communication !** Interface. ! ---------- ! CALL GPNORM_TRANS(...) ! Explicit arguments : ! -------------------- ! PGP(:,:,:) - gridpoint fields (input) ! PGP is dimensioned (NPROMA,KFIELDS,NGPBLKS) where ! NPROMA is the blocking factor, KFIELDS the total number ! of fields and NGPBLKS the number of NPROMA blocks. ! KFIELDS - number of fields (input) ! (these do not have to be just levels) ! KPROMA - required blocking factor (input) ! PAVE - average (output) ! PMIN - minimum (input/output) ! PMAX - maximum (input/output) ! LDAVE_ONLY - T : PMIN and PMAX already contain local MIN and MAX ! KRESOL - resolution tag (optional) ! default assumes first defined resolution ! ! Author. ! ------- ! George Mozdzynski *ECMWF* ! Modifications. ! -------------- ! Original : 19th Sept 2008 ! R. El Khatib 07-08-2009 Optimisation directive for NEC ! ------------------------------------------------------------------ USE PARKIND1 ,ONLY : JPIM ,JPRB , JPRD USE PARKIND_ECTRANS ,ONLY : JPRBT !ifndef INTERFACE USE TPM_GEN ,ONLY : NOUT USE TPM_DIM ,ONLY : R USE TPM_TRANS ,ONLY : LGPNORM, NGPBLKS, NPROMA USE TPM_DISTR ,ONLY : D, NPRCIDS, NPRTRV, NPRTRW, MYSETV, MYSETW, NPROC, D_NSTAGTF,D_NPTRLS, MYPROC USE TPM_GEOMETRY ,ONLY : G,G_NLOEN,G_NLOEN_MAX USE TPM_FIELDS ,ONLY : F_RW USE SET_RESOL_MOD ,ONLY : SET_RESOL USE TRGTOL_MOD ,ONLY : TRGTOL USE SET2PE_MOD ,ONLY : SET2PE USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, JP_BLOCKING_STANDARD USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK !endif INTERFACE IMPLICIT NONE ! Declaration of arguments REAL(KIND=JPRB) ,INTENT(IN) :: PGP(:,:,:) REAL(KIND=JPRB) ,INTENT(OUT) :: PAVE(:) REAL(KIND=JPRB) ,INTENT(INOUT) :: PMIN(:) REAL(KIND=JPRB) ,INTENT(INOUT) :: PMAX(:) INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS INTEGER(KIND=JPIM),INTENT(IN) :: KPROMA LOGICAL ,INTENT(IN) :: LDAVE_ONLY INTEGER(KIND=JPIM),OPTIONAL, INTENT(IN) :: KRESOL !ifndef INTERFACE ! Local variables REAL(KIND=JPHOOK) :: ZHOOK_HANDLE INTEGER(KIND=JPIM) :: IUBOUND(4) INTEGER(KIND=JPIM) :: IVSET(KFIELDS) INTEGER(KIND=JPIM),ALLOCATABLE :: IVSETS(:) INTEGER(KIND=JPIM),ALLOCATABLE :: IVSETG(:,:) !GPU REAL(KIND=JPRBT) :: V REAL(KIND=JPRBT),ALLOCATABLE,SAVE :: ZGTF(:) REAL(KIND=JPRD),ALLOCATABLE :: ZAVE(:,:) REAL(KIND=JPRBT),ALLOCATABLE :: ZMINGL(:,:) REAL(KIND=JPRBT),ALLOCATABLE :: ZMAXGL(:,:) REAL(KIND=JPRBT),ALLOCATABLE :: ZMIN(:) REAL(KIND=JPRBT),ALLOCATABLE :: ZMAX(:) REAL(KIND=JPRBT),ALLOCATABLE :: ZMINGPN(:) REAL(KIND=JPRBT),ALLOCATABLE :: ZMAXGPN(:) REAL(KIND=JPRD),ALLOCATABLE :: ZAVEG(:,:) REAL(KIND=JPRB),ALLOCATABLE :: ZMING(:) REAL(KIND=JPRB),ALLOCATABLE :: ZMAXG(:) REAL(KIND=JPRD),ALLOCATABLE :: ZSND(:) REAL(KIND=JPRD),ALLOCATABLE :: ZRCV(:) INTEGER(KIND=JPIM) :: J,JGL,IGL,JL,JF,IF_GP,IF_SCALARS_G,IF_FS,JSETV,JSETW,IWLATS,JMAX INTEGER(KIND=JPIM) :: IPROC,ITAG,ILEN,ILENR,IBEG,IEND,IND !INTEGER(KIND=JPIM) :: iunit ! ------------------------------------------------------------------ IF (LHOOK) CALL DR_HOOK('GPNORM_TRANS',0,ZHOOK_HANDLE) ! Set current resolution CALL SET_RESOL(KRESOL) ! Set defaults NPROMA = KPROMA NGPBLKS = (D%NGPTOT-1)/NPROMA+1 ! Consistency checks IUBOUND(1:3)=UBOUND(PGP) IF(IUBOUND(1) < NPROMA) THEN WRITE(NOUT,*)'GPNORM_TRANS:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),NPROMA CALL ABORT_TRANS('GPNORM_TRANS:FIRST DIMENSION OF PGP TOO SMALL ') ENDIF IF(IUBOUND(2) < KFIELDS) THEN WRITE(NOUT,*)'GPNORM_TRANS:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),KFIELDS CALL ABORT_TRANS('GPNORM_TRANS:SECOND DIMENSION OF PGP TOO SMALL ') ENDIF IF(IUBOUND(3) < NGPBLKS) THEN WRITE(NOUT,*)'GPNORM_TRANS:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),NGPBLKS CALL ABORT_TRANS('GPNORM_TRANS:THIRD DIMENSION OF PGP TOO SMALL ') ENDIF IF_GP=KFIELDS IF_SCALARS_G=0 IF_FS=0 DO J=1,KFIELDS IVSET(J)=MOD(J-1,NPRTRV)+1 IF(IVSET(J)==MYSETV)THEN IF_FS=IF_FS+1 ENDIF ENDDO IF (.NOT. ALLOCATED(ZAVE)) THEN ALLOCATE(ZAVE(IF_FS,R%NDGL)) ALLOCATE(ZMINGL(IF_FS,R%NDGL)) ALLOCATE(ZMAXGL(IF_FS,R%NDGL)) ALLOCATE(ZMINGPN(IF_FS)) ALLOCATE(ZMAXGPN(IF_FS)) ZAVE = 0._JPRBT ZMINGL = 0._JPRBT ZMAXGL = 0._JPRBT ZMINGPN = 0._JPRBT ZMAXGPN = 0._JPRBT #ifdef ACCGPU !$ACC ENTER DATA COPYIN(ZAVE,ZMINGL,ZMAXGL,ZMINGPN,ZMAXGPN) #endif IF (.NOT. ALLOCATED(ZGTF)) THEN ALLOCATE(ZGTF(IF_FS*D%NLENGTF)) WRITE(NOUT,*)'ZGTF :',SIZE(ZGTF) #ifdef ACCGPU !$ACC ENTER DATA CREATE(ZGTF) #endif ENDIF ENDIF ALLOCATE(IVSETS(NPRTRV)) IVSETS(:)=0 DO J=1,KFIELDS IVSETS(IVSET(J))=IVSETS(IVSET(J))+1 ENDDO ALLOCATE(IVSETG(NPRTRV,MAXVAL(IVSETS(:)))) IVSETG(:,:)=0 IVSETS(:)=0 DO J=1,KFIELDS IVSETS(IVSET(J))=IVSETS(IVSET(J))+1 IVSETG(IVSET(J),IVSETS(IVSET(J)))=J ENDDO ! done in setup_trans LGPNORM=.TRUE. !!FIXME !!CALL TRGTOL_CUDAAWARE(ZGTF,IF_FS,IF_GP,IVSET,PGP=PGP) LGPNORM=.FALSE. ! ZGTF is now on GPU IBEG=1 IEND=D%NDGL_FS CALL GSTATS(1429,0) IF( IF_FS > 0 )THEN #ifdef ACCGPU !$ACC DATA & !$ACC& COPY(F_RW) & !$ACC& COPY(D,D_NSTAGTF,D_NPTRLS,G_NLOEN,G_NLOEN_MAX) & !$ACC& PRESENT(ZGTF,ZAVE,ZMINGL,ZMAXGL,ZMINGPN,ZMAXGPN) #endif #ifdef OMPGPU !$OMP TARGET DATA MAP(TO:F_RW,D,D_NSTAGTF,D_NPTRLS,G_NLOEN,G_NLOEN_MAX) & !$OMP& MAP(PRESENT,ALLOC:ZGTF,ZAVE,ZMINGL,ZMAXGL,ZMINGPN,ZMAXGPN) !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO #endif #ifdef ACCGPU !$ACC KERNELS #endif DO JF=1,IF_FS V = ZGTF(IF_FS*D_NSTAGTF(1)+(JF-1)*(D%NSTAGTF(2)-D%NSTAGTF(1))) ZMINGL(JF,IBEG:IEND)=HUGE(1_JPRBT) ZMAXGL(JF,IBEG:IEND)=-HUGE(1_JPRBT) ENDDO #ifdef ACCGPU !$ACC END KERNELS #endif ! FIRST DO SUMS IN EACH FULL LATITUDE #ifdef OMPGPU !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO #endif #ifdef ACCGPU !$ACC KERNELS #endif DO JGL=1,D%NDGL_FS IGL = D_NPTRLS(MYSETW) + JGL - 1 DO JF=1,IF_FS ZAVE(JF,JGL)=0.0_JPRBT #ifdef ACCGPU !$ACC LOOP #endif DO JL=1,G_NLOEN(IGL) V = ZGTF(IF_FS*D%NSTAGTF(JGL)+(JF-1)*(D%NSTAGTF(JGL+1)-D%NSTAGTF(JGL))+JL) ZAVE(JF,JGL)=ZAVE(JF,JGL)+V ZMINGL(JF,JGL)=MIN(ZMINGL(JF,JGL),V) ZMAXGL(JF,JGL)=MAX(ZMAXGL(JF,JGL),V) ENDDO ENDDO ENDDO #ifdef ACCGPU !$ACC END KERNELS #endif #ifdef OMPGPU !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO #endif #ifdef ACCGPU !$ACC KERNELS #endif DO JF=1,IF_FS ZMINGPN(JF)=MINVAL(ZMINGL(JF,IBEG:IEND)) ZMAXGPN(JF)=MAXVAL(ZMAXGL(JF,IBEG:IEND)) ENDDO #ifdef ACCGPU !$ACC END KERNELS #endif #ifdef OMPGPU !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO #endif #ifdef ACCGPU !$ACC KERNELS #endif DO JGL=IBEG,IEND IGL = D_NPTRLS(MYSETW) + JGL - 1 DO JF=1,IF_FS ZAVE(JF,JGL)=ZAVE(JF,JGL)*F_RW(IGL)/G_NLOEN(IGL) !write(iunit,*) 'aver inside ',JF,IF_FS,IGL,ZAVE(JF,JGL), F_RW(IGL), G_NLOEN(IGL),ZMINGPN(JF),ZMAXGPN(JF) ENDDO ENDDO #ifdef ACCGPU !$ACC END KERNELS #endif #ifdef OMPGPU !$OMP END TARGET DATA #endif #ifdef ACCGPU !$ACC END DATA #endif #ifdef ACCGPU !$ACC UPDATE HOST(ZAVE) #endif #ifdef OMPGPU !$OMP TARGET UPDATE FROM(ZAVE) #endif #ifdef ACCGPU !$ACC UPDATE HOST(ZMINGPN) #endif #ifdef OMPGPU !$OMP TARGET UPDATE FROM(ZMINGPN) #endif #ifdef ACCGPU !$ACC UPDATE HOST(ZMAXGPN) #endif #ifdef OMPGPU !$OMP TARGET UPDATE FROM(ZMAXGPN) #endif #ifdef ACCGPU !$ACC WAIT #endif #ifdef OMPGPU !$OMP BARRIER #endif ENDIF CALL GSTATS(1429,1) ! from here rest on CPU ! IT IS IMPORTANT THAT SUMS ARE NOW DONE IN LATITUDE ORDER ALLOCATE(ZAVEG(R%NDGL,KFIELDS)) ALLOCATE(ZMING(KFIELDS)) ALLOCATE(ZMAXG(KFIELDS)) ZAVEG(:,:)=0.0_JPRD DO JF=1,IF_FS DO JGL=IBEG,IEND IGL = D%NPTRLS(MYSETW) + JGL - 1 ZAVEG(IGL,IVSETG(MYSETV,JF))=ZAVEG(IGL,IVSETG(MYSETV,JF))+ZAVE(JF,JGL) ENDDO ENDDO IF(LDAVE_ONLY)THEN ZMING(:)=PMIN(:) ZMAXG(:)=PMAX(:) ELSE DO JF=1,IF_FS ZMING(IVSETG(MYSETV,JF))=ZMINGPN(JF) ZMAXG(IVSETG(MYSETV,JF))=ZMAXGPN(JF) ENDDO ENDIF ! RECEIVE ABOVE FROM OTHER NPRTRV SETS FOR SAME LATS BUT DIFFERENT FIELDS ITAG=123 CALL GSTATS(815,0) IF( MYSETV==1 )THEN DO JSETV=2,NPRTRV IF(LDAVE_ONLY)THEN ILEN=D%NDGL_FS*IVSETS(JSETV)+2*KFIELDS ELSE ILEN=(D%NDGL_FS+2)*IVSETS(JSETV) ENDIF IF(ILEN > 0)THEN ALLOCATE(ZRCV(ILEN)) CALL SET2PE(IPROC,0,0,MYSETW,JSETV) CALL MPL_RECV(ZRCV(:),KSOURCE=NPRCIDS(IPROC),KTAG=ITAG,& &KMP_TYPE=JP_BLOCKING_STANDARD,KOUNT=ILENR,CDSTRING='GPNORM_TRANS:V') IF(ILENR /= ILEN)THEN CALL ABOR1('GPNORM_TRANS:ILENR /= ILEN') ENDIF IND=0 DO JF=1,IVSETS(JSETV) DO JGL=IBEG,IEND IGL = D%NPTRLS(MYSETW) + JGL - 1 IND=IND+1 ZAVEG(IGL,IVSETG(JSETV,JF))=ZRCV(IND) ENDDO IF(.NOT.LDAVE_ONLY)THEN IND=IND+1 ZMING(IVSETG(JSETV,JF))=ZRCV(IND) IND=IND+1 ZMAXG(IVSETG(JSETV,JF))=ZRCV(IND) ENDIF ENDDO IF(LDAVE_ONLY)THEN DO JF=1,KFIELDS IND=IND+1 ZMING(JF)=MIN(ZMING(JF),REAL(ZRCV(IND),JPRB)) IND=IND+1 ZMAXG(JF)=MAX(ZMAXG(JF),REAL(ZRCV(IND),JPRB)) ENDDO ENDIF DEALLOCATE(ZRCV) ENDIF ENDDO ELSE IF(LDAVE_ONLY)THEN ILEN=D%NDGL_FS*IVSETS(MYSETV)+2*KFIELDS ELSE ILEN=(D%NDGL_FS+2)*IVSETS(MYSETV) ENDIF IF(ILEN > 0)THEN CALL SET2PE(IPROC,0,0,MYSETW,1) ALLOCATE(ZSND(ILEN)) IND=0 DO JF=1,IF_FS DO JGL=IBEG,IEND IGL = D%NPTRLS(MYSETW) + JGL - 1 IND=IND+1 ZSND(IND)=ZAVEG(IGL,IVSETG(MYSETV,JF)) ENDDO IF(.NOT.LDAVE_ONLY)THEN IND=IND+1 ZSND(IND)=ZMING(IVSETG(MYSETV,JF)) IND=IND+1 ZSND(IND)=ZMAXG(IVSETG(MYSETV,JF)) ENDIF ENDDO IF(LDAVE_ONLY)THEN DO JF=1,KFIELDS IND=IND+1 ZSND(IND)=PMIN(JF) IND=IND+1 ZSND(IND)=PMAX(JF) ENDDO ENDIF CALL MPL_SEND(ZSND(:),KDEST=NPRCIDS(IPROC),KTAG=ITAG,& &KMP_TYPE=JP_BLOCKING_STANDARD,CDSTRING='GPNORM_TRANS:V') DEALLOCATE(ZSND) ENDIF ENDIF ! FINALLY RECEIVE CONTRIBUTIONS FROM OTHER NPRTRW SETS IF( MYSETV == 1 )THEN IF( MYSETW == 1 )THEN DO JSETW=2,NPRTRW IWLATS=D%NULTPP(JSETW) IBEG=1 IEND=IWLATS IF(LDAVE_ONLY)THEN ILEN=IWLATS*KFIELDS+2*KFIELDS ELSE ILEN=(IWLATS+2)*KFIELDS ENDIF IF(ILEN > 0 )THEN ALLOCATE(ZRCV(ILEN)) CALL SET2PE(IPROC,0,0,JSETW,1) CALL MPL_RECV(ZRCV(:),KSOURCE=NPRCIDS(IPROC),KTAG=ITAG,& &KMP_TYPE=JP_BLOCKING_STANDARD,KOUNT=ILENR,CDSTRING='GPNORM_TRANS:W') IF(ILENR /= ILEN)THEN CALL ABOR1('GPNORM_TRANS:ILENR /= ILEN') ENDIF IND=0 DO JF=1,KFIELDS DO JGL=IBEG,IEND IGL = D%NPTRLS(JSETW) + JGL - 1 IND=IND+1 ZAVEG(IGL,JF)=ZRCV(IND) ENDDO IF(.NOT.LDAVE_ONLY)THEN IND=IND+1 ZMING(JF)=MIN(ZMING(JF),ZRCV(IND)) IND=IND+1 ZMAXG(JF)=MAX(ZMAXG(JF),ZRCV(IND)) ENDIF ENDDO IF(LDAVE_ONLY)THEN DO JF=1,KFIELDS IND=IND+1 ZMING(JF)=MIN(ZMING(JF),ZRCV(IND)) IND=IND+1 ZMAXG(JF)=MAX(ZMAXG(JF),ZRCV(IND)) ENDDO ENDIF DEALLOCATE(ZRCV) ENDIF ENDDO ELSE IF(LDAVE_ONLY)THEN ILEN=D%NDGL_FS*KFIELDS+2*KFIELDS ELSE ILEN=(D%NDGL_FS+2)*KFIELDS ENDIF IF(ILEN > 0)THEN CALL SET2PE(IPROC,0,0,1,1) ALLOCATE(ZSND(ILEN)) IND=0 DO JF=1,KFIELDS DO JGL=IBEG,IEND IGL = D%NPTRLS(MYSETW) + JGL - 1 IND=IND+1 ZSND(IND)=ZAVEG(IGL,JF) ENDDO IF(.NOT.LDAVE_ONLY)THEN IND=IND+1 ZSND(IND)=ZMING(JF) IND=IND+1 ZSND(IND)=ZMAXG(JF) ENDIF ENDDO IF(LDAVE_ONLY)THEN DO JF=1,KFIELDS IND=IND+1 ZSND(IND)=ZMING(JF) IND=IND+1 ZSND(IND)=ZMAXG(JF) ENDDO ENDIF CALL MPL_SEND(ZSND(:),KDEST=NPRCIDS(IPROC),KTAG=ITAG,& &KMP_TYPE=JP_BLOCKING_STANDARD,CDSTRING='GPNORM_TRANS:V') DEALLOCATE(ZSND) ENDIF ENDIF ENDIF CALL GSTATS(815,1) IF( MYSETW == 1 .AND. MYSETV == 1 )THEN PAVE(:)=0.0_JPRB DO JGL=1,R%NDGL PAVE(:)=PAVE(:)+REAL(ZAVEG(JGL,:),JPRB) ENDDO PMIN(:)=ZMING(:) PMAX(:)=ZMAXG(:) ENDIF !DEALLOCATE(ZGTF) !DEALLOCATE(ZAVE) !DEALLOCATE(ZMIN) !DEALLOCATE(ZMAX) DEALLOCATE(ZAVEG) DEALLOCATE(ZMING) DEALLOCATE(ZMAXG) DEALLOCATE(IVSETS) DEALLOCATE(IVSETG) IF (LHOOK) CALL DR_HOOK('GPNORM_TRANS',1,ZHOOK_HANDLE) ! ------------------------------------------------------------------ !endif INTERFACE END SUBROUTINE GPNORM_TRANS_GPU