loki.transformations.temporaries.stack_allocator
Classes
|
Base Transformation to inject a stack that allocates large scratch spaces per block and per datatype on the driver and maps temporary arrays in kernels to this scratch space. |
|
Transformation to inject a stack that allocates large scratch spaces per block and per datatype on the driver and maps temporary arrays in kernels to this scratch space. |
|
Transformation to inject a stack that allocates large scratch spaces per block and per datatype on the driver and maps temporary arrays in kernels to this scratch space. |
- class FtrPtrStackTransformation(block_dim, horizontal, stack_name='STACK', local_int_var_name_pattern='JD_{name}', int_kind='JWIM', driver_horizontal=None, **kwargs)
Bases:
BaseStackTransformation
Transformation to inject a stack that allocates large scratch spaces per block and per datatype on the driver and maps temporary arrays in kernels to this scratch space.
Starting from:
SUBROUTINE driver (nlon, klev, nb, ydml_phy_mf) USE kernel_mod, ONLY: kernel IMPLICIT NONE INTEGER, INTENT(IN) :: nlon INTEGER, INTENT(IN) :: klev INTEGER, INTENT(IN) :: nb INTEGER :: jstart INTEGER :: jend INTEGER :: b REAL(KIND=jprb), DIMENSION(nlon, klev) :: zzz jstart = 1 jend = nlon DO b=1,nb CALL kernel(nlon, klev, jstart, jend, zzz) END DO END SUBROUTINE driver SUBROUTINE kernel (nlon, klev, jstart, jend, pzz) IMPLICIT NONE INTEGER, INTENT(IN) :: nlon INTEGER, INTENT(IN) :: klev INTEGER, INTENT(IN) :: jstart INTEGER, INTENT(IN) :: jend REAL, INTENT(IN), DIMENSION(nlon, klev) :: pzz REAL, DIMENSION(nlon, klev) :: zzx REAL(KIND=SELECTED_REAL_KIND(13, 300)), DIMENSION(nlon, klev) :: zzy LOGICAL, DIMENSION(nlon, klev) :: zzl INTEGER :: testint INTEGER :: jl, jlev zzl = .false. DO jl=1,nlon DO jlev=1,klev zzx(jl, jlev) = pzz(jl, jlev) zzy(jl, jlev) = pzz(jl, jlev) END DO END DO END SUBROUTINE kernel
This transformation generates:
SUBROUTINE driver (nlon, klev, nb) USE kernel_mod, ONLY: kernel IMPLICIT NONE INTEGER, INTENT(IN) :: nlon INTEGER, INTENT(IN) :: klev INTEGER(KIND=JWIM) :: nb INTEGER :: jstart INTEGER :: jend INTEGER(KIND=JWIM) :: b REAL(KIND=jprb), DIMENSION(nlon, klev) :: zzz INTEGER(KIND=JWIM) :: J_Z_STACK_SIZE REAL, ALLOCATABLE :: Z_STACK(:, :) INTEGER(KIND=JWIM) :: J_Z_STACK_USED INTEGER(KIND=JWIM) :: J_Z_SELECTED_REAL_KIND_13_300_STACK_SIZE REAL(KIND=SELECTED_REAL_KIND(13, 300)), ALLOCATABLE :: Z_SELECTED_REAL_KIND_13_300_STACK(:, :) INTEGER(KIND=JWIM) :: J_Z_SELECTED_REAL_KIND_13_300_STACK_USED INTEGER(KIND=JWIM) :: J_LL_STACK_SIZE LOGICAL, ALLOCATABLE :: LL_STACK(:, :) INTEGER(KIND=JWIM) :: J_LL_STACK_USED J_Z_STACK_SIZE = klev*nlon ALLOCATE (Z_STACK(klev*nlon, nb)) J_Z_STACK_USED = 1 J_Z_SELECTED_REAL_KIND_13_300_STACK_SIZE = klev*nlon ALLOCATE (Z_SELECTED_REAL_KIND_13_300_STACK(klev*nlon, nb)) J_Z_SELECTED_REAL_KIND_13_300_STACK_USED = 1 J_LL_STACK_SIZE = klev*nlon ALLOCATE (LL_STACK(klev*nlon, nb)) J_LL_STACK_USED = 1 !$loki unstructured-data create( z_stack, z_selected_real_kind_13_300_stack, ll_stack ) jstart = 1 jend = nlon DO b=1,nb CALL kernel(nlon, klev, jstart, jend, zzz, J_Z_STACK_SIZE, Z_STACK(:, b), J_Z_STACK_USED, & & J_Z_SELECTED_REAL_KIND_13_300_STACK_SIZE, Z_SELECTED_REAL_KIND_13_300_STACK(:, b), & & J_Z_SELECTED_REAL_KIND_13_300_STACK_USED, J_LL_STACK_SIZE, LL_STACK(:, b), J_LL_STACK_USED) END DO !$loki end unstructured-data delete( z_stack, z_selected_real_kind_13_300_stack, ll_stack ) DEALLOCATE (Z_STACK) DEALLOCATE (Z_SELECTED_REAL_KIND_13_300_STACK) DEALLOCATE (LL_STACK) END SUBROUTINE driver SUBROUTINE kernel (nlon, klev, jstart, jend, pzz, K_P_STACK_SIZE, P_STACK, JD_P_STACK_USED, & & K_P_SELECTED_REAL_KIND_13_300_STACK_SIZE, P_SELECTED_REAL_KIND_13_300_STACK, & & JD_P_SELECTED_REAL_KIND_13_300_STACK_USED, & & K_LD_STACK_SIZE, LD_STACK, JD_LD_STACK_USED) IMPLICIT NONE INTEGER, INTENT(IN) :: nlon INTEGER, INTENT(IN) :: klev INTEGER, INTENT(IN) :: jstart INTEGER, INTENT(IN) :: jend REAL, INTENT(IN), DIMENSION(nlon, klev) :: pzz REAL, POINTER, CONTIGUOUS, DIMENSION(:, :) :: zzx REAL(KIND=SELECTED_REAL_KIND(13, 300)), POINTER, CONTIGUOUS, DIMENSION(:, :) :: zzy LOGICAL, POINTER, CONTIGUOUS, DIMENSION(:, :) :: zzl INTEGER :: testint INTEGER :: jl, jlev INTEGER(KIND=JWIM) :: JD_incr INTEGER(KIND=JWIM) :: JD_incr_SELECTED_REAL_KIND_13_300 INTEGER(KIND=JWIM) :: JD_incr INTEGER(KIND=JWIM) :: J_P_STACK_USED INTEGER(KIND=JWIM) :: J_P_SELECTED_REAL_KIND_13_300_STACK_USED INTEGER(KIND=JWIM) :: J_LD_STACK_USED INTEGER(KIND=JWIM), INTENT(IN) :: K_P_STACK_SIZE REAL, TARGET, CONTIGUOUS, INTENT(INOUT) :: P_STACK(K_P_STACK_SIZE) INTEGER(KIND=JWIM), INTENT(INOUT) :: JD_P_STACK_USED INTEGER(KIND=JWIM), INTENT(IN) :: K_P_SELECTED_REAL_KIND_13_300_STACK_SIZE REAL(KIND=SELECTED_REAL_KIND(13, 300)), TARGET, CONTIGUOUS, INTENT(INOUT) :: & & P_SELECTED_REAL_KIND_13_300_STACK(K_P_SELECTED_REAL_KIND_13_300_STACK_SIZE) INTEGER(KIND=JWIM), INTENT(INOUT) :: JD_P_SELECTED_REAL_KIND_13_300_STACK_USED INTEGER(KIND=JWIM), INTENT(IN) :: K_LD_STACK_SIZE LOGICAL, TARGET, CONTIGUOUS, INTENT(INOUT) :: LD_STACK(K_LD_STACK_SIZE) INTEGER(KIND=JWIM), INTENT(INOUT) :: JD_LD_STACK_USED J_P_STACK_USED = JD_P_STACK_USED J_P_SELECTED_REAL_KIND_13_300_STACK_USED = JD_P_SELECTED_REAL_KIND_13_300_STACK_USED J_LD_STACK_USED = JD_LD_STACK_USED !$loki device-present vars( p_stack, p_selected_real_kind_13_300_stack, ld_stack ) JD_incr = J_P_STACK_USED zzx(1:nlon, 1:klev) => P_STACK(JD_incr:JD_incr + nlon*klev) J_P_STACK_USED = JD_incr + klev*nlon JD_incr_SELECTED_REAL_KIND_13_300 = J_P_SELECTED_REAL_KIND_13_300_STACK_USED zzy(1:nlon, 1:klev) => & & P_SELECTED_REAL_KIND_13_300_STACK(JD_incr_SELECTED_REAL_KIND_13_300: & & JD_incr_SELECTED_REAL_KIND_13_300 + nlon*klev) J_P_SELECTED_REAL_KIND_13_300_STACK_USED = JD_incr_SELECTED_REAL_KIND_13_300 + klev*nlon JD_incr = J_LD_STACK_USED zzl(1:nlon, 1:klev) => LD_STACK(JD_incr:JD_incr + nlon*klev) J_LD_STACK_USED = JD_incr + klev*nlon zzl = .false. DO jl=1,nlon DO jlev=1,klev zzx(jl, jlev) = pzz(jl, jlev) zzy(jl, jlev) = pzz(jl, jlev) END DO END DO !$loki end device-present END SUBROUTINE kernel
- Parameters:
block_dim (
Dimension
) –Dimension
object to define the blocking dimension.horizontal (
Dimension
) –Dimension
object to define the horizontal dimension.stack_name (str, optional) – Name of the stack (default: ‘STACK’)
local_int_var_name_pattern (str, optional) – Local integer variable names pattern (default: ‘JD_{name}’)
int_kind (str, optional) – Integer kind (default: ‘JWIM’)
- adapt_temp_declarations(routine, temporary_arrays)
- apply_pool_allocator_to_temporaries(routine, item=None)
Apply raw stack allocator to local temporary arrays
This appends the relevant argument to the routine’s dummy argument list and creates the assignment for the local copy of the stack type. For all local arrays, a Cray pointer is instantiated and the temporaries are mapped via Cray pointers to the pool-allocated memory region.
The cumulative size of all temporary arrays is determined and returned.
- Parameters:
routine (
Subroutine
) – Subroutine object to apply transformation to- Returns:
stack_dict – dict with required stack size mapped to type and kind
- Return type:
- class DirectIdxStackTransformation(block_dim, horizontal, stack_name='STACK', local_int_var_name_pattern='JD_{name}', int_kind='JWIM', driver_horizontal=None, **kwargs)
Bases:
BaseStackTransformation
Transformation to inject a stack that allocates large scratch spaces per block and per datatype on the driver and maps temporary arrays in kernels to this scratch space.
Starting from:
SUBROUTINE driver (nlon, klev, nb, ydml_phy_mf) USE kernel_mod, ONLY: kernel IMPLICIT NONE INTEGER, INTENT(IN) :: nlon INTEGER, INTENT(IN) :: klev INTEGER, INTENT(IN) :: nb INTEGER :: jstart INTEGER :: jend INTEGER :: b REAL(KIND=jprb), DIMENSION(nlon, klev) :: zzz jstart = 1 jend = nlon DO b=1,nb CALL kernel(nlon, klev, jstart, jend, zzz) END DO END SUBROUTINE driver SUBROUTINE kernel (nlon, klev, jstart, jend, pzz) IMPLICIT NONE INTEGER, INTENT(IN) :: nlon INTEGER, INTENT(IN) :: klev INTEGER, INTENT(IN) :: jstart INTEGER, INTENT(IN) :: jend REAL, INTENT(IN), DIMENSION(nlon, klev) :: pzz REAL, DIMENSION(nlon, klev) :: zzx REAL(KIND=SELECTED_REAL_KIND(13, 300)), DIMENSION(nlon, klev) :: zzy LOGICAL, DIMENSION(nlon, klev) :: zzl INTEGER :: testint INTEGER :: jl, jlev zzl = .false. DO jl=1,nlon DO jlev=1,klev zzx(jl, jlev) = pzz(jl, jlev) zzy(jl, jlev) = pzz(jl, jlev) END DO END DO END SUBROUTINE kernel
This transformation generates:
SUBROUTINE driver (nlon, klev, nb) USE kernel_mod, ONLY: kernel IMPLICIT NONE INTEGER, INTENT(IN) :: nlon INTEGER, INTENT(IN) :: klev INTEGER(KIND=JWIM) :: nb INTEGER :: jstart INTEGER :: jend INTEGER(KIND=JWIM) :: b REAL(KIND=jprb), DIMENSION(nlon, klev) :: zzz INTEGER(KIND=JWIM) :: J_Z_STACK_SIZE REAL, ALLOCATABLE :: Z_STACK(:, :) INTEGER(KIND=JWIM) :: J_Z_STACK_USED INTEGER(KIND=JWIM) :: J_Z_SELECTED_REAL_KIND_13_300_STACK_SIZE REAL(KIND=SELECTED_REAL_KIND(13, 300)), ALLOCATABLE :: Z_SELECTED_REAL_KIND_13_300_STACK(:, :) INTEGER(KIND=JWIM) :: J_Z_SELECTED_REAL_KIND_13_300_STACK_USED INTEGER(KIND=JWIM) :: J_LL_STACK_SIZE LOGICAL, ALLOCATABLE :: LL_STACK(:, :) INTEGER(KIND=JWIM) :: J_LL_STACK_USED J_Z_STACK_SIZE = klev*nlon ALLOCATE (Z_STACK(klev*nlon, nb)) J_Z_STACK_USED = 1 J_Z_SELECTED_REAL_KIND_13_300_STACK_SIZE = klev*nlon ALLOCATE (Z_SELECTED_REAL_KIND_13_300_STACK(klev*nlon, nb)) J_Z_SELECTED_REAL_KIND_13_300_STACK_USED = 1 J_LL_STACK_SIZE = klev*nlon ALLOCATE (LL_STACK(klev*nlon, nb)) J_LL_STACK_USED = 1 !$loki unstructured-data create( z_stack, z_selected_real_kind_13_300_stack, ll_stack ) jstart = 1 jend = nlon DO b=1,nb CALL kernel(nlon, klev, jstart, jend, zzz, J_Z_STACK_SIZE, Z_STACK(:, b), J_Z_STACK_USED, & & J_Z_SELECTED_REAL_KIND_13_300_STACK_SIZE, Z_SELECTED_REAL_KIND_13_300_STACK(:, b), & & J_Z_SELECTED_REAL_KIND_13_300_STACK_USED, J_LL_STACK_SIZE, LL_STACK(:, b), J_LL_STACK_USED) END DO !$loki end unstructured-data delete( z_stack, z_selected_real_kind_13_300_stack, ll_stack ) DEALLOCATE (Z_STACK) DEALLOCATE (Z_SELECTED_REAL_KIND_13_300_STACK) DEALLOCATE (LL_STACK) END SUBROUTINE driver SUBROUTINE kernel (nlon, klev, jstart, jend, pzz, K_P_STACK_SIZE, P_STACK, JD_P_STACK_USED, & & K_P_SELECTED_REAL_KIND_13_300_STACK_SIZE, P_SELECTED_REAL_KIND_13_300_STACK, & & JD_P_SELECTED_REAL_KIND_13_300_STACK_USED, & & K_LD_STACK_SIZE, LD_STACK, JD_LD_STACK_USED) IMPLICIT NONE INTEGER, INTENT(IN) :: nlon INTEGER, INTENT(IN) :: klev INTEGER, INTENT(IN) :: jstart INTEGER, INTENT(IN) :: jend REAL, INTENT(IN), DIMENSION(nlon, klev) :: pzz INTEGER :: testint INTEGER :: jl, jlev INTEGER(KIND=JWIM) :: JD_zzx INTEGER(KIND=JWIM) :: JD_zzy INTEGER(KIND=JWIM) :: JD_zzl INTEGER(KIND=JWIM) :: J_P_STACK_USED INTEGER(KIND=JWIM) :: J_P_SELECTED_REAL_KIND_13_300_STACK_USED INTEGER(KIND=JWIM) :: J_LD_STACK_USED INTEGER(KIND=JWIM), INTENT(IN) :: K_P_STACK_SIZE REAL, TARGET, CONTIGUOUS, INTENT(INOUT) :: P_STACK(K_P_STACK_SIZE) INTEGER(KIND=JWIM), INTENT(INOUT) :: JD_P_STACK_USED INTEGER(KIND=JWIM), INTENT(IN) :: K_P_SELECTED_REAL_KIND_13_300_STACK_SIZE REAL(KIND=SELECTED_REAL_KIND(13, 300)), TARGET, CONTIGUOUS, INTENT(INOUT) :: & & P_SELECTED_REAL_KIND_13_300_STACK(K_P_SELECTED_REAL_KIND_13_300_STACK_SIZE) INTEGER(KIND=JWIM), INTENT(INOUT) :: JD_P_SELECTED_REAL_KIND_13_300_STACK_USED INTEGER(KIND=JWIM), INTENT(IN) :: K_LD_STACK_SIZE LOGICAL, TARGET, CONTIGUOUS, INTENT(INOUT) :: LD_STACK(K_LD_STACK_SIZE) INTEGER(KIND=JWIM), INTENT(INOUT) :: JD_LD_STACK_USED J_P_STACK_USED = JD_P_STACK_USED J_P_SELECTED_REAL_KIND_13_300_STACK_USED = JD_P_SELECTED_REAL_KIND_13_300_STACK_USED J_LD_STACK_USED = JD_LD_STACK_USED !$loki device-present vars( p_stack, p_selected_real_kind_13_300_stack, ld_stack ) JD_zzx = J_P_STACK_USED J_P_STACK_USED = JD_zzx + klev*nlon JD_zzy = J_P_SELECTED_REAL_KIND_13_300_STACK_USED J_P_SELECTED_REAL_KIND_13_300_STACK_USED = JD_zzy + klev*nlon JD_zzl = J_LD_STACK_USED J_LD_STACK_USED = JD_zzl + klev*nlon LD_STACK(1:klev*nlon) = .false. DO jl=1,nlon DO jlev=1,klev P_STACK(JD_zzx + jl - nlon + jlev*nlon) = pzz(jl, jlev) P_SELECTED_REAL_KIND_13_300_STACK(JD_zzy + jl - nlon + jlev*nlon) = pzz(jl, jlev) END DO END DO !$loki end device-present END SUBROUTINE kernel
- Parameters:
block_dim (
Dimension
) –Dimension
object to define the blocking dimension.horizontal (
Dimension
) –Dimension
object to define the horizontal dimension.stack_name (str, optional) – Name of the stack (default: ‘STACK’)
local_int_var_name_pattern (str, optional) – Local integer variable names pattern (default: ‘JD_{name}’)
int_kind (str, optional) – Integer kind (default: ‘JWIM’)
- apply_pool_allocator_to_temporaries(routine, item=None)
Apply raw stack allocator to local temporary arrays
This appends the relevant argument to the routine’s dummy argument list and creates the assignment for the local copy of the stack type. For all local arrays, a Cray pointer is instantiated and the temporaries are mapped via Cray pointers to the pool-allocated memory region.
The cumulative size of all temporary arrays is determined and returned.
- Parameters:
routine (
Subroutine
) – Subroutine object to apply transformation to- Returns:
stack_dict – dict with required stack size mapped to type and kind
- Return type: