! (C) Copyright 1987- ECMWF. ! (C) Copyright 1987- Meteo-France. ! ! This software is licensed under the terms of the Apache Licence Version 2.0 ! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. ! In applying this licence, ECMWF does not waive the privileges and immunities ! granted to it by virtue of its status as an intergovernmental organisation ! nor does it submit to any jurisdiction. ! MODULE SULEG_MOD #ifdef __NEC__ #define SIZEOF(x) STORAGE_SIZE(x)/KIND(x) #endif CONTAINS SUBROUTINE SULEG !DEC$ OPTIMIZE:1 USE PARKIND_ECTRANS ,ONLY : JPRD, JPIM, JPRBT USE PARKIND2 ,ONLY : JPRH USE MPL_MODULE USE TPM_GEN USE TPM_DIM USE TPM_CONSTANTS USE TPM_DISTR USE TPM_FIELDS ,ONLY : FIELDS_RESOL, F USE TPM_FLT USE TPM_GEOMETRY USE TPM_CTL USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS USE PRE_SULEG_MOD USE SUGAW_MOD USE SUPOL_MOD USE SUPOLF_MOD USE TPM_POL USE SUTRLE_MOD USE SETUP_GEOM_MOD USE SEEFMM_MIX USE SET2PE_MOD USE ABORT_TRANS_MOD USE PREPSNM_MOD ,ONLY : PREPSNM USE WRITE_LEGPOL_MOD USE READ_LEGPOL_MOD !**** *SULEG * - initialize the Legendre polynomials ! Purpose. ! -------- ! Initialize COMMON YOMLEG !** Interface. ! ---------- ! *CALL* *SULEG* ! Explicit arguments : ! -------------------- ! Implicit arguments : ! -------------------- ! COMMON YOMLEG ! Method. ! ------- ! See documentation ! Externals. ! ---------- ! SUGAW (Gaussian latitudes) ! SUPOLM (polynomials) ! LFI routines for external IO's ! Called by SUGEM. ! Reference. ! ---------- ! ECMWF Research Department documentation of the IFS ! ! S.L. Belousov, Tables of normalized associated Legendre Polynomials, Pergamon Press (1962) ! P.N. Swarztrauber, On computing the points and weights for Gauss-Legendre quadrature, ! SIAM J. Sci. Comput. Vol. 24 (3) pp. 945-954 (2002) ! Author. ! ------- ! Mats Hamrud and Philippe Courtier *ECMWF* ! Modifications. ! -------------- ! Original : 87-10-15 ! MODIFICATION : 91-04 J.M. Piriou: ! - Read gaussian latitudes and PNM on LFI ! - If file missing, computes ! 91-04 M.Hamrud: ! - IO Scheme introduced ! MODIFICATION : 91-07-03 P.Courtier suppress derivatives ! MODIFICATION : 91-07-03 P.Courtier computes RATATH and RACTHE ! MODIFICATION : 91-07-03 P.Courtier change upper limit (NSMAX+1) ! MODIFICATION : 91-07-03 P.Courtier change ordering ! Order of the PNM in the file, as in the model : ! - increasing wave numbers m ! - for a given m, from n=NSMAX+1 to m ! MODIFICATION : 92-07-02 R. Bubnova: shift RATATH calculation ! to SUGEM1 ! MODIFICATION : 92-12-17 P.Courtier multitask computations ! Modified by R. EL Khatib : 93-04-02 Set-up defaults controled by LECMWF ! MODIFICATION : 93-03-19 D.Giard : n <= NTMAX ! K. YESSAD : 93-05-11 : DLMU --> global array DRMU(NDGSA:NDGEN). ! (not stored currently on LFI files). ! MODIFICATION : 94-02-03 R. El Khatib : subroutine SULEG2 to write out ! the Leg. polynomials on workfile or LFI file ! Modification : 94-08-31 M. Tolstykh: Setup for CUD interpolation ! Modified by K. YESSAD (MARCH 1995): Extra-latitudes computations ! according to value of NDGSUR and LRPOLE only. ! + change fancy loop numbering. ! Modified 98-08-10 by K. YESSAD: removal of LRPOLE option. ! - removal of LRPOLE in YOMCT0. ! - removal of code under LRPOLE. ! R. El Khatib 11-Apr-2007 Emulation of vectorized quadruple precision ! on NEC ! Nils Wedi + Mats Hamrud, 2009-02-05 revised following Swarztrauber, 2002 ! G.Mozdzynski: March 2011 Support 2D (RW,RV) initialisation of legendre coeffs ! G.Mozdzynski: July 2012 distribute FLT initialisation over NPRTRV ! R. El Khatib 14-Jun-2013 optional computation on the stretched latitudes ! F. Vana 05-Mar-2015 Support for single precision ! Nils Wedi, 20-Apr-2015 Support dual latitude/longitude set ! T. Wilhelmsson, 22-Sep-2016 Support single precision for dual too ! ------------------------------------------------------------------ IMPLICIT NONE ! LOCAL ! ------------------------------------------------------------------ REAL(KIND=JPRD),ALLOCATABLE :: ZPNMG(:) REAL(KIND=JPRD),ALLOCATABLE :: ZFN(:,:) REAL(KIND=JPRD),ALLOCATABLE :: ZLRMUZ2(:) REAL(KIND=JPRBT) :: ZEPSNM(0:R%NTMAX+2) REAL(KIND=JPRD) :: ZLRMUZ(R%NDGL) REAL(KIND=JPRD) :: ZW(R%NDGL) REAL(KIND=JPRD) :: ZANM REAL(KIND=JPRD) :: ZFNN REAL(KIND=JPRD) :: ZPI, ZINC, ZOFF, ZTEMP, ZORIG, ZTHETA, ZCOS REAL(KIND=JPRD), ALLOCATABLE :: ZSNDBUFV(:),ZRCVBUFV(:,:) REAL(KIND=JPRD), ALLOCATABLE :: ZPNMCDO(:,:),ZPNMCDD(:,:) REAL(KIND=JPRBT), ALLOCATABLE :: ZRCVBUTFV(:,:) REAL(KIND=KIND(ZRCVBUTFV)) :: ZBYTES INTEGER(KIND=JPIM) :: IBYTES INTEGER(KIND=JPIM) :: ISENDREQ(NPRTRV) INTEGER(KIND=JPIM) :: IRECVREQ(NPRTRV) INTEGER(KIND=JPIM) :: INM, IM, IRECV, ISEND, ISREQ, IRREQ, & &JGL, JM, JMLOC, IMLOC, JN, JNM, IODD, INN, INMAX, JI, IMAXN, ITAG, ITAG1, & &INX, ISL, ISTART, ITHRESHOLD, INSMAX, IMAXCOLS,ILATSMAX,JW,JV,J, & &IDGLU, ILA, ILS, IA, IS, I, ILATS, ILOOP, IPRTRV, JSETV, JH, & &IMAXRECVA, IMAXRECVS, IHEMIS, INNH, IGL, IGL1, IGL2, & &IDGLU2, ISYM, INZ REAL(KIND=JPRD) :: ZEPS_INT_DEC REAL(KIND=JPRD) :: ZEPS REAL(KIND=JPRD),ALLOCATABLE :: ZLFPOL(:,:) REAL(KIND=JPRD),ALLOCATABLE :: ZLPOL(:) LOGICAL :: LLP1,LLP2 ! For latitudes on the stretched geometry REAL(KIND=JPRH) :: ZTAN REAL(KIND=JPRH) :: ZSTRETMU(R%NDGL) ! ------------------------------------------------------------------ !* 0. Some initializations. ! --------------------- IBYTES = MPL_BYTES(ZBYTES) ZEPS = 1000._JPRD*EPSILON(ZEPS) !ZEPS_INT_DEC = EPSILON(ZEPS) ZEPS_INT_DEC = 1.0E-7_JPRD !ZEPS_INT_DEC = 1.0E-5_JPRD IHEMIS=1 IF (S%LSOUTHPNM) IHEMIS=2 LLP1 = NPRINTLEV>0 LLP2 = NPRINTLEV>1 IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SULEG ===' IF( NPROC > 1 )THEN CALL GSTATS(798,0) CALL MPL_BARRIER(CDSTRING='SULEG:') CALL GSTATS(798,1) ENDIF CALL GSTATS(140,0) CALL GSTATS(1801,0) IF(.NOT.D%LGRIDONLY) THEN CALL PRE_SULEG ENDIF ALLOCATE(F%RMU(R%NDGL)) IF (LLP2) WRITE(NOUT,9) 'F%RMU ',SIZE(F%RMU ),SHAPE(F%RMU ) ALLOCATE(F%RW(R%NDGL)) IF (LLP2) WRITE(NOUT,9) 'F%RW ',SIZE(F%RW ),SHAPE(F%RW ) !* 1.0 Initialize Fourier coefficients for ordinary Legendre polynomials ! ------------------------------------------------------------------------ ALLOCATE(ZFN(0:R%NDGL,0:R%NDGL)) IF (LLP2) WRITE(NOUT,9) 'ZFN ',SIZE(ZFN ),SHAPE(ZFN ) ! determines the number of stripes in butterfly NSMAX/IMAXCOLS ! IMAXCOLS = (R%NSMAX - 1)/4 + 1 ! IMAXCOLS=64 (min flops) IMAXCOLS=64 ! the threshold of efficiency IF(NPROC == 1 .OR. R%NDGNH <= 2560) THEN ITHRESHOLD = R%NDGNH/4 DO IF(ITHRESHOLD >= IMAXCOLS*4) EXIT IMAXCOLS = IMAXCOLS/2 ENDDO ELSE ITHRESHOLD = 900 ENDIF ITHRESHOLD = MAX(ITHRESHOLD,IMAXCOLS+1) S%ITHRESHOLD = ITHRESHOLD !* 3.1 Gaussian latitudes and weights ! --------------------------------------- CALL INI_POL(R%NTMAX+3) IF(.NOT.D%LGRIDONLY) THEN ISTART=1 ELSE ISTART=R%NDGL ENDIF INMAX=R%NDGL ! Belousov, Swarztrauber use ZFN(0,0)=SQRT(2._JPRD) ! IFS normalisation chosen to be 0.5*Integral(Pnm**2) = 1 ZFN(0,0)=2._JPRD DO JN=ISTART,R%NDGL ZFNN=ZFN(0,0) DO JGL=1,JN ZFNN=ZFNN*SQRT(1._JPRD-0.25_JPRD/REAL(JGL**2,JPRD)) ENDDO IODD=MOD(JN,2) ZFN(JN,JN)=ZFNN DO JGL=2,JN-IODD,2 ZFN(JN,JN-JGL)=ZFN(JN,JN-JGL+2)*REAL((JGL-1)*(2*JN-JGL+2),JPRD)/REAL(JGL*(2*JN-JGL+1),JPRD) ENDDO ENDDO ! compute latitudes and weights for original Gaussian latitudes ZANM=SQRT(REAL(2*INMAX+1,JPRD)*REAL(INMAX**2,JPRD)/REAL(2*INMAX-1,JPRD)) INN=R%NDGL CALL GSTATS(1801,2) CALL SUGAW(INN,0,INMAX,ZLRMUZ(1:INN),ZW(1:INN),ZANM,ZFN) CALL GSTATS(1801,3) IF (ABS(G%RSTRET-1.0_JPRD)>100._JPRD*EPSILON(1._JPRD)) THEN WRITE(NOUT,*) '=== SULEG: Change Gaussian latitudes to the transformed sphere ===' INNH=(INN+1)/2 ZTAN=(1.0_JPRD-G%RSTRET**2)/(1.0_JPRD+G%RSTRET**2) ! North hemisphere DO JGL=1,INNH ZSTRETMU(JGL)=(ZTAN+REAL(ZLRMUZ(JGL),JPRH))/(1.0_JPRD+ZTAN*REAL(ZLRMUZ(JGL),JPRH)) ENDDO ! South hemisphere DO JGL=1,INNH IGL=2*INNH-JGL+1 ZSTRETMU(IGL)=(ZTAN-REAL(ZLRMUZ(JGL),JPRH))/(1.0_JPRD-ZTAN*REAL(ZLRMUZ(JGL),JPRH)) ENDDO DO JGL=1,INN ZLRMUZ(JGL)=REAL(ZSTRETMU(JGL),JPRD) ENDDO ENDIF DO JGL=1,R%NDGL F%RW(JGL) = ZW(JGL) F%RMU(JGL) = ZLRMUZ(JGL) ENDDO IF (LLP1) WRITE(NOUT,*) '=== SULEG: Finished Gaussian latitudes ===' !* 3.1.1 specify a dual set of output (inv_trans) or input (dir_trans) latitudes / longitudes IF( S%LDLL ) THEN INMAX = S%NDGL INN= S%NDGL S%NDGNHD = (INMAX+1)/2 ALLOCATE(ZLRMUZ2(INN)) ! here we want to use the positions of the specified dual grid ! accuracy requirement is ZLRMUZ2(JGL) < F%RMU(1) ! so we use approximations for the remaining latitudes outside this range ! we approximate the vicinity to the pole/equator ZPI = 2.0_JPRD*ASIN(1.0_JPRD) ZORIG = ASIN(F%RMU(1)) IF( S%LSHIFTLL ) THEN ZINC = ZPI/REAL(INN,JPRD) ZOFF = 0.5_JPRD*ZINC ZTEMP = ZOFF + ZINC*REAL(S%NDGNHD-1,JPRD) ZLRMUZ2(1) = SIN(MIN(ZTEMP,ZORIG) - 0.5_JPRD*MAX(0._JPRD,ZTEMP - ZORIG)) ZLRMUZ2(S%NDGNHD) = SIN(ZOFF) ELSE ZINC = ZPI/REAL(INN-2,JPRD) ZOFF=-0.5_JPRD*ZINC ZTEMP = ZOFF + ZINC*REAL(S%NDGNHD-1,JPRD) ZLRMUZ2(1) = SIN(MIN(ZTEMP,ZORIG) - 0.5_JPRD*MAX(0._JPRD,ZTEMP - ZORIG)) ZOFF=0.01_JPRD*ZINC ZLRMUZ2(S%NDGNHD) = SIN(ZOFF) ZOFF=0._JPRD ENDIF DO JGL=2, S%NDGNHD-1 ZLRMUZ2(JGL) = SIN(ZOFF + ZINC*REAL(S%NDGNHD-JGL,JPRD)) ENDDO DO JGL=1, S%NDGNHD ISYM = INN-JGL+1 ZLRMUZ2(ISYM) = -ZLRMUZ2(JGL) ENDDO IF( LLP2 ) THEN WRITE(NOUT,*) 'dual latitudes' DO JGL= 1, INN WRITE(NOUT,*) 'dual JGL=',JGL,(180._JPRD/ZPI)*ZINC,(180._JPRD/ZPI)*ASIN(ZLRMUZ2(JGL)),(180._JPRD/ZPI)*ASIN(F%RMU(JGL)) ENDDO ENDIF ALLOCATE(F%RMU2(INMAX)) IF (LLP2) WRITE(NOUT,9) 'F%RMU2 ',SIZE(F%RMU2 ),SHAPE(F%RMU2 ) ALLOCATE(F%RACTHE2(INMAX)) IF (LLP2) WRITE(NOUT,9) 'F%RACTHE2 ',SIZE(F%RACTHE2),SHAPE(F%RACTHE2 ) DO JGL=1,INN F%RMU2(JGL) = ZLRMUZ2(JGL) F%RACTHE2(JGL) = 1.0_JPRD/(SQRT(1.0_JPRD-ZLRMUZ2(JGL)*ZLRMUZ2(JGL))+ZEPS)/REAL(RA,JPRD) ENDDO IF (LLP1) WRITE(NOUT,*) '=== SULEG: Finished dual Gaussian latitudes ===' ! inverse + direct map for FMM INX=2*R%NDGNH INZ=2*S%NDGNHD ALLOCATE(S%FMM_INTI) CALL SETUP_SEEFMM(INX,F%RMU,INZ,F%RMU2,S%FMM_INTI) ENDIF !* 3.2 Computes related arrays IF(.NOT.D%LGRIDONLY) THEN ALLOCATE(S%FA(D%NUMP)) ALLOCATE(F%R1MU2(R%NDGL)) IF (LLP2) WRITE(NOUT,9) 'F%R1MU2 ',SIZE(F%R1MU2),SHAPE(F%R1MU2 ) ALLOCATE(F%RACTHE(R%NDGL)) IF (LLP2) WRITE(NOUT,9) 'F%RACTHE ',SIZE(F%RACTHE),SHAPE(F%RACTHE ) IF( S%LUSE_BELUSOV) THEN ALLOCATE(F%RPNM(R%NLEI3,D%NSPOLEGL)) IF (LLP2) WRITE(NOUT,9) 'F%RPNM ',SIZE(F%RPNM),SHAPE(F%RPNM) DO JNM=1,D%NSPOLEGL F%RPNM(R%NLEI3,JNM) = 0.0_JPRD ENDDO ENDIF !* 3.2 Computes related arrays DO JGL=1,R%NDGL ! test cosine differently ZTHETA = ASIN(ZLRMUZ(JGL)) ZCOS = COS(ZTHETA) F%R1MU2(JGL) = ZCOS**2 F%RACTHE(JGL) = 1.0_JPRD/ZCOS/REAL(RA,JPRD) ! F%R1MU2(JGL) = 1.0_JPRD-ZLRMUZ(JGL)*ZLRMUZ(JGL) ! F%RACTHE(JGL) = 1.0_JPRD/SQRT(1.0_JPRD-ZLRMUZ(JGL)*ZLRMUZ(JGL))/REAL(RA,JPRD) ENDDO !* 3.3 Working arrays ! compute the Legendre polynomials as a function of the z_k (Gaussian Latitudes) ! this may be faster than calling supolf for each m but uses extra communication ! and the parallelism is more limited ? Nils IF( S%LUSE_BELUSOV .AND. .NOT. C%LREAD_LEGPOL ) THEN INSMAX = R%NTMAX+1 IF( INSMAX /= R%NDGL) THEN DEALLOCATE(ZFN) ALLOCATE(ZFN(0:INSMAX,0:INSMAX)) ! Belousov, Swarztrauber use ZFN(0,0)=SQRT(2._JPRD) ! IFS normalisation chosen to be 0.5*Integral(Pnm**2) = 1 ZFN(0,0)=2._JPRD DO JN=1,INSMAX ZFNN=ZFN(0,0) DO JGL=1,JN ZFNN=ZFNN*SQRT(1._JPRD-0.25_JPRD/REAL(JGL**2,JPRD)) ENDDO IODD=MOD(JN,2) ZFN(JN,JN)=ZFNN DO JGL=2,JN-IODD,2 ZFN(JN,JN-JGL)=ZFN(JN,JN-JGL+2)*REAL((JGL-1)*(2*JN-JGL+2),JPRD)/REAL(JGL*(2*JN-JGL+1),JPRD) ENDDO ENDDO ENDIF ALLOCATE(ZLFPOL(0:INSMAX,0:INSMAX)) ALLOCATE(ZPNMG(R%NSPOLEG)) DO JH=1,IHEMIS IF (JH==1) THEN IGL1=D%NLATLS(MYSETW,MYSETV) IGL2=D%NLATLE(MYSETW,MYSETV) ELSE IGL1=R%NDGL-D%NLATLE(MYSETW,MYSETV)+1 IGL2=R%NDGL-D%NLATLS(MYSETW,MYSETV)+1 ENDIF ILOOP=0 DO JGL=IGL1,IGL2 INM = 0 CALL SUPOL(INSMAX,ZLRMUZ(JGL),ZFN,ZLFPOL) DO JM=0,R%NSMAX DO JN=INSMAX,JM,-1 INM = INM+1 ZPNMG(INM) = ZLFPOL(JM,JN) ENDDO ENDDO CALL GSTATS(1801,2) ILOOP = JGL-IGL1+1 CALL SUTRLE(ZPNMG,JGL,ILOOP) CALL GSTATS(1801,3) ENDDO ILATSMAX=0 DO JW=1,NPRTRW DO JV=1,NPRTRV ILATSMAX=MAX(ILATSMAX,D%NLATLE(JW,JV)-D%NLATLS(JW,JV)+1) ENDDO ENDDO ILATS=IGL2-IGL1+1 IF (S%LSOUTHPNM .AND. IHEMIS==1 .AND. ILATSMAX-1 >= ILATS) THEN ! I don't know what to do for south pole. But isn't this piece of code ! a dead stuff for poles rows ? CALL ABORT_TRANS('SULEG: WILL BE BROKEN FOR SOUTH HEMISPHERE') ENDIF DO J=ILATS,ILATSMAX-1 ILOOP=ILOOP+1 CALL GSTATS(1801,2) CALL SUTRLE(ZPNMG,-1,ILOOP) CALL GSTATS(1801,3) ENDDO ENDDO DEALLOCATE(ZLFPOL) IF( ALLOCATED(ZFN) ) DEALLOCATE(ZFN) DEALLOCATE(ZPNMG) IF(LLP1) WRITE(NOUT,*) '=== SULEG: Finished RPNM ===' ENDIF CALL SETUP_GEOM IMAXN=R%NTMAX+1 ITAG=MTAGLETR ITAG1=MTAGLETR+1 IMAXRECVA=0 IMAXRECVS=0 DO JMLOC=1,D%NUMP IM = D%MYMS(JMLOC) ILA = (R%NSMAX-IM+2)/2 ILS = (R%NSMAX-IM+3)/2 IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) ISL = MAX(R%NDGNH-G%NDGLU(IM)+1,1) IMAXRECVA = MAX(IDGLU*ILA,IMAXRECVA) IMAXRECVS = MAX(IDGLU*ILS,IMAXRECVS) !find nearest starting latitude of the dual set IF( S%LDLL ) THEN INMAX=MIN(R%NTMAX+1,S%NDGL) IDGLU2=S%NDGNHD S%FA(JMLOC)%ISLD = 1 LLA:DO JGL=1,S%NDGNHD-1 IF( (ZLRMUZ2(JGL) < ZLRMUZ(ISL)) ) THEN S%FA(JMLOC)%ISLD = JGL IDGLU2 = S%NDGNHD-S%FA(JMLOC)%ISLD+1 EXIT LLA ENDIF ENDDO LLA IF( .NOT. C%LREAD_LEGPOL ) THEN ! compute auxiliary quantities for the dual mapping ! output data latitudes ALLOCATE(ZPNMCDO(2*IDGLU2,2)) !$OMP PARALLEL PRIVATE(JGL,ZLPOL) IF (.NOT.ALLOCATED(ZLPOL)) ALLOCATE(ZLPOL(0:INMAX)) !$OMP DO SCHEDULE(DYNAMIC,1) DO JGL=1,2*IDGLU2 CALL SUPOLF(IM,INMAX,ZLRMUZ2(S%FA(JMLOC)%ISLD+JGL-1),ZLPOL(0:INMAX)) ZPNMCDO(JGL,1)=ZLPOL(INMAX-1) ZPNMCDO(JGL,2)=ZLPOL(INMAX) ENDDO !$OMP END DO IF (ALLOCATED(ZLPOL)) DEALLOCATE(ZLPOL) !$OMP END PARALLEL ! internal (gg-roots) latitudes ALLOCATE(ZPNMCDD(2*IDGLU,2)) !$OMP PARALLEL PRIVATE(JGL,ZLPOL,JI,JN) IF (.NOT.ALLOCATED(ZLPOL)) ALLOCATE(ZLPOL(0:INMAX)) !$OMP DO SCHEDULE(DYNAMIC,1) DO JGL=1,2*IDGLU CALL SUPOLF(IM,INMAX,ZLRMUZ(ISL+JGL-1),ZLPOL(0:INMAX)) ZPNMCDD(JGL,1)=ZLPOL(INMAX-1) ZPNMCDD(JGL,2)=ZLPOL(INMAX) ENDDO !$OMP END DO IF (ALLOCATED(ZLPOL)) DEALLOCATE(ZLPOL) !$OMP END PARALLEL stop 'Error: code path not (yet) supported in GPU version' !CALL PREPSNM(IM,JMLOC,ZEPSNM) ALLOCATE(S%FA(JMLOC)%RPNMWI(2*IDGLU,1:2)) DO JGL=1,2*IDGLU ! inverse trafo S%FA(JMLOC)%RPNMWI(JGL,1) = F%RW(ISL+JGL-1)*ZPNMCDD(JGL,1) S%FA(JMLOC)%RPNMWI(JGL,2) = F%RW(ISL+JGL-1)*ZPNMCDD(JGL,2) ! direct trafo needed if mapping to another set of gg roots !S%FA(JMLOC)%RPNMWI(JGL,3) = -ZEPSNM(IMAXN)*ZPNMCDD(JGL,2) !S%FA(JMLOC)%RPNMWI(JGL,4) = -ZEPSNM(IMAXN)*ZPNMCDD(JGL,1) ENDDO DEALLOCATE(ZPNMCDD) ALLOCATE(S%FA(JMLOC)%RPNMWO(2*IDGLU2,1:2)) DO JGL=1,2*IDGLU2 ! inverse trafo S%FA(JMLOC)%RPNMWO(JGL,1) = -ZEPSNM(IMAXN)*ZPNMCDO(JGL,2) S%FA(JMLOC)%RPNMWO(JGL,2) = -ZEPSNM(IMAXN)*ZPNMCDO(JGL,1) ! only needed in direct trafo, need if mapping to another set of roots !S%FA(JMLOC)%RPNMWO(JGL,3) = F%RW2(S%FA(JMLOC)%ISLD+JGL-1)*ZPNMCDO(JGL,1) !S%FA(JMLOC)%RPNMWO(JGL,4) = F%RW2(S%FA(JMLOC)%ISLD+JGL-1)*ZPNMCDO(JGL,2) ENDDO DEALLOCATE(ZPNMCDO) ENDIF ! LREAD_LEGPOL ENDIF ! LDLL ENDDO IF( S%LDLL ) THEN DEALLOCATE(ZLRMUZ2) ENDIF CALL GSTATS(1801,2) IF(.NOT.C%LREAD_LEGPOL) THEN ! not correct logic DO JMLOC=1,D%NUMP,NPRTRV ! +++++++++++++++++++++ JMLOC LOOP +++++++++++++++++++++++ IPRTRV=MIN(NPRTRV,D%NUMP-JMLOC+1) ! --------------------anti-symmetric----------------------- ! --------------------anti-symmetric----------------------- ! --------------------anti-symmetric----------------------- DO JSETV=1,IPRTRV IMLOC=JMLOC+JSETV-1 IM = D%MYMS(IMLOC) ILA = (R%NSMAX-IM+2)/2 IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) ALLOCATE(S%FA(IMLOC)%RPNMA(IDGLU,ILA)) ENDDO IF( .NOT. S%LUSE_BELUSOV ) THEN ISREQ = 0 IRREQ = 0 ALLOCATE (ZRCVBUFV(IMAXRECVA,IPRTRV)) CALL GSTATS(851,0) DO JSETV=1,IPRTRV CALL SET2PE(IRECV,0,0,MYSETW,JSETV) IF( .NOT.LMPOFF )THEN IRREQ = IRREQ+1 CALL MPL_RECV(ZRCVBUFV(:,JSETV),KSOURCE=NPRCIDS(IRECV), & & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IRECVREQ(IRREQ),& & KTAG=ITAG,CDSTRING='SULEG:') ENDIF ENDDO CALL GSTATS(851,1) IF( JMLOC+MYSETV-1 <= D%NUMP )THEN IMLOC=JMLOC+MYSETV-1 IM = D%MYMS(IMLOC) ISL = MAX(R%NDGNH-G%NDGLU(IM)+1,1) IA = 1+MOD(R%NSMAX-IM+2,2) ILA = (R%NSMAX-IM+2)/2 IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) ALLOCATE(ZSNDBUFV(IDGLU*ILA)) IF(MOD(IMAXN-IM,2) == 0) THEN INMAX=IMAXN+1 ELSE INMAX=IMAXN ENDIF CALL GSTATS(1251,0) IF (.NOT.ALLOCATED(ZLPOL)) ALLOCATE(ZLPOL(0:INMAX)) !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JGL,ZLPOL,JI,JN) DO JGL=1,IDGLU CALL SUPOLF(IM,INMAX,ZLRMUZ(ISL+JGL-1),ZLPOL(0:INMAX),KCHEAP=3) DO JI=1,ILA JN=IM+2*(JI-1)+1 ZSNDBUFV((JGL-1)*ILA+JI)=ZLPOL(JN) ENDDO ENDDO !$OMP END PARALLEL DO IF (ALLOCATED(ZLPOL)) DEALLOCATE(ZLPOL) CALL GSTATS(1251,1) CALL GSTATS(851,0) DO JSETV=1,NPRTRV CALL SET2PE(ISEND,0,0,MYSETW,JSETV) IF( .NOT.LMPOFF )THEN ISREQ = ISREQ+1 CALL MPL_SEND(ZSNDBUFV(:),KDEST=NPRCIDS(ISEND), & & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(ISREQ),& & KTAG=ITAG,CDSTRING='SULEG:') ENDIF ENDDO CALL GSTATS(851,1) ENDIF CALL GSTATS(851,0) IF(IRREQ > 0) THEN CALL MPL_WAIT(KREQUEST=IRECVREQ(1:IRREQ), & & CDSTRING='SUTRLE: SULEG') ENDIF IF(ISREQ > 0) THEN CALL MPL_WAIT(KREQUEST=ISENDREQ(1:ISREQ), & & CDSTRING='SUTRLE: SULEG') ENDIF IF( NPROC==1.AND.LMPOFF )THEN ZRCVBUFV(1:SIZE(ZSNDBUFV(:)),1)=ZSNDBUFV(:) ENDIF CALL GSTATS(851,1) CALL GSTATS(1251,0) !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JSETV,IMLOC,IM,ISL,IA,ILA,IDGLU,JGL,JI) DO JSETV=1,IPRTRV IMLOC=JMLOC+JSETV-1 IM = D%MYMS(IMLOC) ISL = MAX(R%NDGNH-G%NDGLU(IM)+1,1) IA = 1+MOD(R%NSMAX-IM+2,2) ILA = (R%NSMAX-IM+2)/2 IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) DO JGL=1,IDGLU DO JI=1,ILA S%FA(IMLOC)%RPNMA(JGL,ILA-JI+1)=ZRCVBUFV((JGL-1)*ILA+JI,JSETV) ENDDO ENDDO ENDDO !$OMP END PARALLEL DO CALL GSTATS(1251,1) IF( ALLOCATED(ZSNDBUFV) ) DEALLOCATE(ZSNDBUFV) IF( ALLOCATED(ZRCVBUFV) ) DEALLOCATE(ZRCVBUFV) ELSE CALL GSTATS(1251,0) !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JSETV,IMLOC,IM,ISL,IA,ILA,IDGLU,JGL,JI) DO JSETV=1,IPRTRV IMLOC=JMLOC+JSETV-1 IM = D%MYMS(IMLOC) ISL = MAX(R%NDGNH-G%NDGLU(IM)+1,1) IA = 1+MOD(R%NSMAX-IM+2,2) ILA = (R%NSMAX-IM+2)/2 IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) DO JI=1,ILA DO JGL=1,IDGLU S%FA(IMLOC)%RPNMA(JGL,JI) = F%RPNM(ISL+JGL-1,D%NPMS(IM)+IA+(JI-1)*2) ENDDO ENDDO ENDDO !$OMP END PARALLEL DO CALL GSTATS(1251,1) ENDIF ! --------------------symmetric----------------------- ! --------------------symmetric----------------------- ! --------------------symmetric----------------------- DO JSETV=1,IPRTRV IMLOC=JMLOC+JSETV-1 IM = D%MYMS(IMLOC) ILS = (R%NSMAX-IM+3)/2 IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) ALLOCATE(S%FA(IMLOC)%RPNMS(IDGLU,ILS)) ENDDO IF( .NOT. S%LUSE_BELUSOV ) THEN ISREQ = 0 IRREQ = 0 ALLOCATE (ZRCVBUFV(IMAXRECVS,IPRTRV)) CALL GSTATS(851,0) DO JSETV=1,IPRTRV CALL SET2PE(IRECV,0,0,MYSETW,JSETV) IF( .NOT.LMPOFF )THEN IRREQ = IRREQ+1 CALL MPL_RECV(ZRCVBUFV(:,JSETV),KSOURCE=NPRCIDS(IRECV), & & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IRECVREQ(IRREQ),& & KTAG=ITAG,CDSTRING='SULEG:') ENDIF ENDDO CALL GSTATS(851,1) IF( JMLOC+MYSETV-1 <= D%NUMP )THEN IMLOC=JMLOC+MYSETV-1 IM = D%MYMS(IMLOC) ISL = MAX(R%NDGNH-G%NDGLU(IM)+1,1) IS = 1+MOD(R%NSMAX-IM+1,2) ILS = (R%NSMAX-IM+3)/2 IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) ALLOCATE(ZSNDBUFV(IDGLU*ILS)) IF(MOD(IMAXN-IM,2) == 0) THEN INMAX=IMAXN ELSE INMAX=IMAXN+1 ENDIF IF (.NOT.ALLOCATED(ZLPOL)) ALLOCATE(ZLPOL(0:INMAX)) CALL GSTATS(1251,0) !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JGL,ZLPOL,JI,JN) DO JGL=1,IDGLU CALL SUPOLF(IM,INMAX,ZLRMUZ(ISL+JGL-1),ZLPOL(0:INMAX),KCHEAP=2) DO JI=1,ILS JN=IM+2*(JI-1) ZSNDBUFV((JGL-1)*ILS+JI)=ZLPOL(JN) ENDDO ENDDO !$OMP END PARALLEL DO CALL GSTATS(1251,1) IF (ALLOCATED(ZLPOL)) DEALLOCATE(ZLPOL) CALL GSTATS(851,0) DO JSETV=1,NPRTRV CALL SET2PE(ISEND,0,0,MYSETW,JSETV) IF( .NOT.LMPOFF )THEN ISREQ = ISREQ+1 CALL MPL_SEND(ZSNDBUFV(:),KDEST=NPRCIDS(ISEND), & & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(ISREQ),& & KTAG=ITAG,CDSTRING='SULEG:') ENDIF ENDDO CALL GSTATS(851,1) ENDIF CALL GSTATS(851,0) IF(IRREQ > 0) THEN CALL MPL_WAIT(KREQUEST=IRECVREQ(1:IRREQ), & & CDSTRING='SUTRLE: SULEG') ENDIF IF(ISREQ > 0) THEN CALL MPL_WAIT(KREQUEST=ISENDREQ(1:ISREQ), & & CDSTRING='SUTRLE: SULEG') ENDIF IF( NPROC==1.AND.LMPOFF )THEN ZRCVBUFV(1:SIZE(ZSNDBUFV(:)),1)=ZSNDBUFV(:) ENDIF CALL GSTATS(851,1) CALL GSTATS(1251,0) !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JSETV,IMLOC,IM,ISL,IS,ILS,IDGLU,JGL,JI) DO JSETV=1,IPRTRV IMLOC=JMLOC+JSETV-1 IM = D%MYMS(IMLOC) ISL = MAX(R%NDGNH-G%NDGLU(IM)+1,1) IS = 1+MOD(R%NSMAX-IM+1,2) ILS = (R%NSMAX-IM+3)/2 IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) DO JGL=1,IDGLU DO JI=1,ILS S%FA(IMLOC)%RPNMS(JGL,ILS-JI+1)=ZRCVBUFV((JGL-1)*ILS+JI,JSETV) ENDDO ENDDO ENDDO !$OMP END PARALLEL DO CALL GSTATS(1251,1) IF( ALLOCATED(ZSNDBUFV) ) DEALLOCATE(ZSNDBUFV) IF( ALLOCATED(ZRCVBUFV) ) DEALLOCATE(ZRCVBUFV) ELSE CALL GSTATS(1251,0) !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JSETV,IMLOC,IM,ISL,IS,ILS,IDGLU,JGL,JI) DO JSETV=1,IPRTRV IMLOC=JMLOC+JSETV-1 IM = D%MYMS(IMLOC) ISL = MAX(R%NDGNH-G%NDGLU(IM)+1,1) IS = 1+MOD(R%NSMAX-IM+1,2) ILS = (R%NSMAX-IM+3)/2 IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) DO JI=1,ILS DO JGL=1,IDGLU S%FA(IMLOC)%RPNMS(JGL,JI) = F%RPNM(ISL+JGL-1,D%NPMS(IM)+IS+(JI-1)*2) ENDDO ENDDO END DO !$OMP END PARALLEL DO CALL GSTATS(1251,1) ENDIF ENDDO ! +++++++++++++++++++++ END JMLOC LOOP +++++++++++++++++++++++ ENDIF CALL GSTATS(1801,3) IF(S%LUSE_BELUSOV) DEALLOCATE(F%RPNM) IF(C%LWRITE_LEGPOL) CALL WRITE_LEGPOL IF(C%LREAD_LEGPOL) CALL READ_LEGPOL ENDIF CALL GSTATS(1801,1) CALL GSTATS(140,1) ! ------------------------------------------------------------------ 9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) CALL END_POL END SUBROUTINE SULEG END MODULE SULEG_MOD