! (C) Copyright 2022- NVIDIA. ! ! This software is licensed under the terms of the Apache Licence Version 2.0 ! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. ! In applying this licence, ECMWF does not waive the privileges and immunities ! granted to it by virtue of its status as an intergovernmental organisation ! nor does it submit to any jurisdiction. #define ALIGN(I, A) (((I)+(A)-1)/(A)*(A)) MODULE BUFFERED_ALLOCATOR_MOD USE PARKIND_ECTRANS ,ONLY : JPIM USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS USE ISO_C_BINDING ,ONLY : C_INT8_T, C_SIZE_T, C_LOC, C_F_POINTER USE GROWING_ALLOCATOR_MOD IMPLICIT NONE PRIVATE PUBLIC :: BUFFERED_ALLOCATOR, ALLOCATION_RESERVATION_HANDLE, RESERVE, ASSIGN_PTR, GET_ALLOCATION PUBLIC :: MAKE_BUFFERED_ALLOCATOR, INSTANTIATE_ALLOCATOR ! The buffered allocator uses double buffering. The idea is that the allocator ! iterates through its two buffers, and each allocate returns one or the other ! buffer. It is a two-step allocator - it expects you to create reservation ! handles first for all allocations. Then the allocator is instantiated (i.e. ! the buffers are actually allocated). Instantiation will do an allocation ! that is large enough two hold all consecutive allocations. Other allocations ! might be overwritten (like you can't access the allocation done two steps ! before). ! After instantiation, you can retrieve your buffers by passing the allocator ! and the handles to GET_ALLOCATION. Also, we provide helper function ! ASSIGN_PTR, because an allocation is often split among several "sub-buffers", ! so you can for example assign the first half of an allocation to one ! buffer, while the second half to another buffer. ! If you see "Logical errors" that usually means you try to retrieve a buffer ! that is not within the reserved allocation size. This might be a valid ! region in the sense that it is physically allocated, but it might be part of ! the double buffer. INTEGER(KIND=JPIM), PARAMETER :: NBUF = 2 TYPE BUFFERED_ALLOCATOR INTEGER(KIND=C_SIZE_T) :: BUFR_SZ(0:NBUF-1) INTEGER(KIND=JPIM) :: NEXT_BUF TYPE(GROWING_ALLOCATION_TYPE), POINTER :: PTR END TYPE TYPE ALLOCATION_RESERVATION_HANDLE INTEGER(KIND=C_SIZE_T) :: SZ INTEGER(KIND=JPIM) :: BUF END TYPE INTERFACE ASSIGN_PTR MODULE PROCEDURE ASSIGN_PTR_FLOAT, ASSIGN_PTR_DOUBLE END INTERFACE CONTAINS ! TODO This is not perfect yet. We will over-allocate up to 2X in theory. ! It would be better to always keep the previous allocation size and then ! have one allocation sitting at the the top, and the double-buffer at ! the bottom of the allocation. FUNCTION MAKE_BUFFERED_ALLOCATOR() IMPLICIT NONE TYPE(BUFFERED_ALLOCATOR) :: MAKE_BUFFERED_ALLOCATOR MAKE_BUFFERED_ALLOCATOR%BUFR_SZ(:) = 0 MAKE_BUFFERED_ALLOCATOR%NEXT_BUF = 0 END FUNCTION MAKE_BUFFERED_ALLOCATOR FUNCTION RESERVE(ALLOCATOR, SZ) IMPLICIT NONE TYPE(BUFFERED_ALLOCATOR), INTENT(INOUT) :: ALLOCATOR INTEGER(KIND=C_SIZE_T), INTENT(IN) :: SZ TYPE(ALLOCATION_RESERVATION_HANDLE) :: RESERVE ALLOCATOR%BUFR_SZ(ALLOCATOR%NEXT_BUF) = MAX(ALLOCATOR%BUFR_SZ(ALLOCATOR%NEXT_BUF),SZ) RESERVE%BUF = ALLOCATOR%NEXT_BUF RESERVE%SZ = SZ ALLOCATOR%NEXT_BUF = MOD(ALLOCATOR%NEXT_BUF+1,NBUF) END FUNCTION RESERVE SUBROUTINE INSTANTIATE_ALLOCATOR(ALLOCATOR, GROWING_ALLOCATION) IMPLICIT NONE TYPE(BUFFERED_ALLOCATOR), INTENT(INOUT) :: ALLOCATOR !!TYPE(GROWING_ALLOCATION_TYPE), INTENT(IN), POINTER :: GROWING_ALLOCATION TYPE(GROWING_ALLOCATION_TYPE), target, INTENT(INout) :: GROWING_ALLOCATION INTEGER :: I DO I = 0, NBUF-1 ALLOCATOR%BUFR_SZ(I) = ALIGN(ALLOCATOR%BUFR_SZ(I),128) ENDDO ALLOCATOR%PTR => GROWING_ALLOCATION CALL REALLOCATE_GROWING_ALLOCATION(GROWING_ALLOCATION, SUM(ALLOCATOR%BUFR_SZ)) END SUBROUTINE FUNCTION GET_ALLOCATION(ALLOCATOR, RESERVATION) IMPLICIT NONE TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR TYPE(ALLOCATION_RESERVATION_HANDLE), INTENT(IN) :: RESERVATION INTEGER(KIND=C_INT8_T), POINTER :: GET_ALLOCATION(:) IF (RESERVATION%SZ > ALLOCATOR%BUFR_SZ(RESERVATION%BUF)) THEN CALL ABORT_TRANS( "Logical Error in GET_ALLOCATION") ENDIF IF (RESERVATION%BUF == 0) THEN GET_ALLOCATION(1:) => ALLOCATOR%PTR%PTR(1:RESERVATION%SZ) ELSE GET_ALLOCATION(1:) => ALLOCATOR%PTR%PTR(SUM(ALLOCATOR%BUFR_SZ(0:RESERVATION%BUF-1))+1: & SUM(ALLOCATOR%BUFR_SZ(0:RESERVATION%BUF-1))+RESERVATION%SZ) ENDIF END FUNCTION GET_ALLOCATION SUBROUTINE ASSIGN_PTR_FLOAT(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALUE, SET_STREAM) USE ISO_C_BINDING USE OPENACC, ONLY: ACC_ASYNC_SYNC IMPLICIT NONE INTEGER(KIND=C_INT8_T), POINTER, INTENT(INOUT) :: SRC(:) REAL(KIND=C_FLOAT), POINTER, INTENT(OUT) :: DST(:) LOGICAL, INTENT(IN), OPTIONAL :: SET_VALUE INTEGER(KIND=4), INTENT(IN), OPTIONAL :: SET_STREAM LOGICAL :: SET_VALUE_EFF INTEGER(KIND=4) :: SET_STREAM_EFF INTEGER(KIND=C_SIZE_T) :: START_IN_BYTES, LENGTH_IN_BYTES IF (START_IN_BYTES + LENGTH_IN_BYTES - 1 > SIZE(SRC, KIND=C_SIZE_T)) THEN CALL ABORT_TRANS("Logical Error in ASSIGN_PTR - OOB assignment") ENDIF IF (PRESENT(SET_VALUE)) THEN SET_VALUE_EFF = SET_VALUE ELSE SET_VALUE_EFF = .FALSE. ENDIF IF (PRESENT(SET_STREAM)) THEN SET_STREAM_EFF = SET_STREAM ELSE SET_STREAM_EFF = ACC_ASYNC_SYNC ENDIF IF (SET_VALUE_EFF .AND. LENGTH_IN_BYTES > 0) THEN ! This option is turned off by default, but for experimentation we can turn it on. This is ! setting all bits to 1 (meaning NaN in floating point) !$ACC KERNELS PRESENT(SRC) ASYNC(SET_STREAM_EFF) SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1) = -1 !$ACC END KERNELS!! LOOP ENDIF CALL C_F_POINTER(C_LOC(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1)), DST, & & [SIZEOF(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1))/SIZEOF(DST(0))]) END SUBROUTINE ASSIGN_PTR_FLOAT SUBROUTINE ASSIGN_PTR_DOUBLE(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALUE, SET_STREAM) USE ISO_C_BINDING USE OPENACC, ONLY: ACC_ASYNC_SYNC IMPLICIT NONE INTEGER(KIND=C_INT8_T), POINTER, INTENT(INOUT) :: SRC(:) REAL(KIND=C_DOUBLE), POINTER, INTENT(OUT) :: DST(:) LOGICAL, INTENT(IN), OPTIONAL :: SET_VALUE INTEGER(KIND=4), INTENT(IN), OPTIONAL :: SET_STREAM LOGICAL :: SET_VALUE_EFF INTEGER(KIND=4) :: SET_STREAM_EFF INTEGER(KIND=C_SIZE_T) :: START_IN_BYTES, LENGTH_IN_BYTES IF (START_IN_BYTES + LENGTH_IN_BYTES - 1 > SIZE(SRC, KIND=C_SIZE_T)) THEN CALL ABORT_TRANS("Logical Error in ASSIGN_PTR - OOB assignment") ENDIF IF (PRESENT(SET_VALUE)) THEN SET_VALUE_EFF = SET_VALUE ELSE SET_VALUE_EFF = .FALSE. ENDIF IF (PRESENT(SET_STREAM)) THEN SET_STREAM_EFF = SET_STREAM ELSE SET_STREAM_EFF = ACC_ASYNC_SYNC ENDIF IF (SET_VALUE_EFF .AND. LENGTH_IN_BYTES > 0) THEN ! This option is turned off by default, but for experimentation we can turn it on. This is ! setting all bits to 1 (meaning NaN in floating point) !$ACC KERNELS PRESENT(SRC) ASYNC(SET_STREAM_EFF) SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1) = -1 !$ACC END KERNELS!! LOOP ENDIF CALL C_F_POINTER(C_LOC(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1)), DST, & & [SIZEOF(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1))/SIZEOF(DST(0))]) END SUBROUTINE ASSIGN_PTR_DOUBLE END MODULE