loki.transformations.temporaries.stack_allocator

Classes

BaseStackTransformation(block_dim, horizontal)

Base Transformation to inject a stack that allocates large scratch spaces per block and per datatype on the driver and maps temporary arrays in kernels to this scratch space.

DirectIdxStackTransformation(block_dim, ...)

Transformation to inject a stack that allocates large scratch spaces per block and per datatype on the driver and maps temporary arrays in kernels to this scratch space.

FtrPtrStackTransformation(block_dim, horizontal)

Transformation to inject a stack that allocates large scratch spaces per block and per datatype on the driver and maps temporary arrays in kernels to this scratch space.

class FtrPtrStackTransformation(block_dim, horizontal, stack_name='STACK', local_int_var_name_pattern='JD_{name}', int_kind='JWIM', driver_horizontal=None, **kwargs)

Bases: BaseStackTransformation

Transformation to inject a stack that allocates large scratch spaces per block and per datatype on the driver and maps temporary arrays in kernels to this scratch space.

Starting from:

SUBROUTINE driver (nlon, klev, nb, ydml_phy_mf)

  USE kernel_mod, ONLY: kernel

  IMPLICIT NONE

  INTEGER, INTENT(IN) :: nlon
  INTEGER, INTENT(IN) :: klev
  INTEGER, INTENT(IN) :: nb

  INTEGER :: jstart
  INTEGER :: jend

  INTEGER :: b

  REAL(KIND=jprb), DIMENSION(nlon, klev) :: zzz

  jstart = 1
  jend = nlon

  DO b=1,nb
    CALL kernel(nlon, klev, jstart, jend, zzz)
  END DO

END SUBROUTINE driver

SUBROUTINE kernel (nlon, klev, jstart, jend, pzz)

  IMPLICIT NONE

  INTEGER, INTENT(IN) :: nlon
  INTEGER, INTENT(IN) :: klev

  INTEGER, INTENT(IN) :: jstart
  INTEGER, INTENT(IN) :: jend

  REAL, INTENT(IN), DIMENSION(nlon, klev) :: pzz

  REAL, DIMENSION(nlon, klev) :: zzx
  REAL(KIND=SELECTED_REAL_KIND(13, 300)), DIMENSION(nlon, klev) :: zzy
  LOGICAL, DIMENSION(nlon, klev) :: zzl

  INTEGER :: testint
  INTEGER :: jl, jlev

  zzl = .false.
  DO jl=1,nlon
    DO jlev=1,klev
      zzx(jl, jlev) = pzz(jl, jlev)
      zzy(jl, jlev) = pzz(jl, jlev)
    END DO
  END DO

END SUBROUTINE kernel

This transformation generates:

SUBROUTINE driver (nlon, klev, nb)

  USE kernel_mod, ONLY: kernel

  IMPLICIT NONE

  INTEGER, INTENT(IN) :: nlon
  INTEGER, INTENT(IN) :: klev
  INTEGER(KIND=JWIM) :: nb

  INTEGER :: jstart
  INTEGER :: jend

  INTEGER(KIND=JWIM) :: b

  REAL(KIND=jprb), DIMENSION(nlon, klev) :: zzz
  INTEGER(KIND=JWIM) :: J_Z_STACK_SIZE
  REAL, ALLOCATABLE :: Z_STACK(:, :)
  INTEGER(KIND=JWIM) :: J_Z_STACK_USED
  INTEGER(KIND=JWIM) :: J_Z_SELECTED_REAL_KIND_13_300_STACK_SIZE
  REAL(KIND=SELECTED_REAL_KIND(13, 300)), ALLOCATABLE :: Z_SELECTED_REAL_KIND_13_300_STACK(:, :)
  INTEGER(KIND=JWIM) :: J_Z_SELECTED_REAL_KIND_13_300_STACK_USED
  INTEGER(KIND=JWIM) :: J_LL_STACK_SIZE
  LOGICAL, ALLOCATABLE :: LL_STACK(:, :)
  INTEGER(KIND=JWIM) :: J_LL_STACK_USED
  J_Z_STACK_SIZE = klev*nlon
  ALLOCATE (Z_STACK(klev*nlon, nb))
  J_Z_STACK_USED = 1
  J_Z_SELECTED_REAL_KIND_13_300_STACK_SIZE = klev*nlon
  ALLOCATE (Z_SELECTED_REAL_KIND_13_300_STACK(klev*nlon, nb))
  J_Z_SELECTED_REAL_KIND_13_300_STACK_USED = 1
  J_LL_STACK_SIZE = klev*nlon
  ALLOCATE (LL_STACK(klev*nlon, nb))
  J_LL_STACK_USED = 1
!$loki unstructured-data create( z_stack, z_selected_real_kind_13_300_stack, ll_stack )

  jstart = 1
  jend = nlon

  DO b=1,nb
    CALL kernel(nlon, klev, jstart, jend, zzz, J_Z_STACK_SIZE, Z_STACK(:, b), J_Z_STACK_USED,  &
    & J_Z_SELECTED_REAL_KIND_13_300_STACK_SIZE, Z_SELECTED_REAL_KIND_13_300_STACK(:, b),  &
    & J_Z_SELECTED_REAL_KIND_13_300_STACK_USED, J_LL_STACK_SIZE, LL_STACK(:, b), J_LL_STACK_USED)
  END DO

!$loki end unstructured-data delete( z_stack, z_selected_real_kind_13_300_stack, ll_stack )
  DEALLOCATE (Z_STACK)
  DEALLOCATE (Z_SELECTED_REAL_KIND_13_300_STACK)
  DEALLOCATE (LL_STACK)
END SUBROUTINE driver

SUBROUTINE kernel (nlon, klev, jstart, jend, pzz, K_P_STACK_SIZE, P_STACK, JD_P_STACK_USED,  &
& K_P_SELECTED_REAL_KIND_13_300_STACK_SIZE, P_SELECTED_REAL_KIND_13_300_STACK, &
& JD_P_SELECTED_REAL_KIND_13_300_STACK_USED,  &
& K_LD_STACK_SIZE, LD_STACK, JD_LD_STACK_USED)

  IMPLICIT NONE

  INTEGER, INTENT(IN) :: nlon
  INTEGER, INTENT(IN) :: klev

  INTEGER, INTENT(IN) :: jstart
  INTEGER, INTENT(IN) :: jend

  REAL, INTENT(IN), DIMENSION(nlon, klev) :: pzz

  REAL, POINTER, CONTIGUOUS, DIMENSION(:, :) :: zzx
  REAL(KIND=SELECTED_REAL_KIND(13, 300)), POINTER, CONTIGUOUS, DIMENSION(:, :) :: zzy
  LOGICAL, POINTER, CONTIGUOUS, DIMENSION(:, :) :: zzl

  INTEGER :: testint
  INTEGER :: jl, jlev
  INTEGER(KIND=JWIM) :: JD_incr
  INTEGER(KIND=JWIM) :: JD_incr_SELECTED_REAL_KIND_13_300
  INTEGER(KIND=JWIM) :: JD_incr
  INTEGER(KIND=JWIM) :: J_P_STACK_USED
  INTEGER(KIND=JWIM) :: J_P_SELECTED_REAL_KIND_13_300_STACK_USED
  INTEGER(KIND=JWIM) :: J_LD_STACK_USED
  INTEGER(KIND=JWIM), INTENT(IN) :: K_P_STACK_SIZE
  REAL, TARGET, CONTIGUOUS, INTENT(INOUT) :: P_STACK(K_P_STACK_SIZE)
  INTEGER(KIND=JWIM), INTENT(INOUT) :: JD_P_STACK_USED
  INTEGER(KIND=JWIM), INTENT(IN) :: K_P_SELECTED_REAL_KIND_13_300_STACK_SIZE
  REAL(KIND=SELECTED_REAL_KIND(13, 300)), TARGET, CONTIGUOUS, INTENT(INOUT) ::  &
  & P_SELECTED_REAL_KIND_13_300_STACK(K_P_SELECTED_REAL_KIND_13_300_STACK_SIZE)
  INTEGER(KIND=JWIM), INTENT(INOUT) :: JD_P_SELECTED_REAL_KIND_13_300_STACK_USED
  INTEGER(KIND=JWIM), INTENT(IN) :: K_LD_STACK_SIZE
  LOGICAL, TARGET, CONTIGUOUS, INTENT(INOUT) :: LD_STACK(K_LD_STACK_SIZE)
  INTEGER(KIND=JWIM), INTENT(INOUT) :: JD_LD_STACK_USED
  J_P_STACK_USED = JD_P_STACK_USED
  J_P_SELECTED_REAL_KIND_13_300_STACK_USED = JD_P_SELECTED_REAL_KIND_13_300_STACK_USED
  J_LD_STACK_USED = JD_LD_STACK_USED
!$loki device-present vars( p_stack, p_selected_real_kind_13_300_stack, ld_stack )
  JD_incr = J_P_STACK_USED
  zzx(1:nlon, 1:klev) => P_STACK(JD_incr:JD_incr + nlon*klev)
  J_P_STACK_USED = JD_incr + klev*nlon
  JD_incr_SELECTED_REAL_KIND_13_300 = J_P_SELECTED_REAL_KIND_13_300_STACK_USED
  zzy(1:nlon, 1:klev) =>  &
  & P_SELECTED_REAL_KIND_13_300_STACK(JD_incr_SELECTED_REAL_KIND_13_300: &
      & JD_incr_SELECTED_REAL_KIND_13_300 + nlon*klev)
  J_P_SELECTED_REAL_KIND_13_300_STACK_USED = JD_incr_SELECTED_REAL_KIND_13_300 + klev*nlon
  JD_incr = J_LD_STACK_USED
  zzl(1:nlon, 1:klev) => LD_STACK(JD_incr:JD_incr + nlon*klev)
  J_LD_STACK_USED = JD_incr + klev*nlon

  zzl = .false.
  DO jl=1,nlon
    DO jlev=1,klev
      zzx(jl, jlev) = pzz(jl, jlev)
      zzy(jl, jlev) = pzz(jl, jlev)
    END DO
  END DO

!$loki end device-present
END SUBROUTINE kernel
Parameters:
  • block_dim (Dimension) – Dimension object to define the blocking dimension.

  • horizontal (Dimension) – Dimension object to define the horizontal dimension.

  • stack_name (str, optional) – Name of the stack (default: ‘STACK’)

  • local_int_var_name_pattern (str, optional) – Local integer variable names pattern (default: ‘JD_{name}’)

  • int_kind (str, optional) – Integer kind (default: ‘JWIM’)

adapt_temp_declarations(routine, temporary_arrays)
apply_pool_allocator_to_temporaries(routine, item=None)

Apply raw stack allocator to local temporary arrays

This appends the relevant argument to the routine’s dummy argument list and creates the assignment for the local copy of the stack type. For all local arrays, a Cray pointer is instantiated and the temporaries are mapped via Cray pointers to the pool-allocated memory region.

The cumulative size of all temporary arrays is determined and returned.

Parameters:

routine (Subroutine) – Subroutine object to apply transformation to

Returns:

stack_dict – dict with required stack size mapped to type and kind

Return type:

dict

class DirectIdxStackTransformation(block_dim, horizontal, stack_name='STACK', local_int_var_name_pattern='JD_{name}', int_kind='JWIM', driver_horizontal=None, **kwargs)

Bases: BaseStackTransformation

Transformation to inject a stack that allocates large scratch spaces per block and per datatype on the driver and maps temporary arrays in kernels to this scratch space.

Starting from:

SUBROUTINE driver (nlon, klev, nb, ydml_phy_mf)

  USE kernel_mod, ONLY: kernel

  IMPLICIT NONE

  INTEGER, INTENT(IN) :: nlon
  INTEGER, INTENT(IN) :: klev
  INTEGER, INTENT(IN) :: nb

  INTEGER :: jstart
  INTEGER :: jend

  INTEGER :: b

  REAL(KIND=jprb), DIMENSION(nlon, klev) :: zzz

  jstart = 1
  jend = nlon

  DO b=1,nb
    CALL kernel(nlon, klev, jstart, jend, zzz)
  END DO

END SUBROUTINE driver

SUBROUTINE kernel (nlon, klev, jstart, jend, pzz)

  IMPLICIT NONE

  INTEGER, INTENT(IN) :: nlon
  INTEGER, INTENT(IN) :: klev

  INTEGER, INTENT(IN) :: jstart
  INTEGER, INTENT(IN) :: jend

  REAL, INTENT(IN), DIMENSION(nlon, klev) :: pzz

  REAL, DIMENSION(nlon, klev) :: zzx
  REAL(KIND=SELECTED_REAL_KIND(13, 300)), DIMENSION(nlon, klev) :: zzy
  LOGICAL, DIMENSION(nlon, klev) :: zzl

  INTEGER :: testint
  INTEGER :: jl, jlev

  zzl = .false.
  DO jl=1,nlon
    DO jlev=1,klev
      zzx(jl, jlev) = pzz(jl, jlev)
      zzy(jl, jlev) = pzz(jl, jlev)
    END DO
  END DO

END SUBROUTINE kernel

This transformation generates:

SUBROUTINE driver (nlon, klev, nb)

  USE kernel_mod, ONLY: kernel

  IMPLICIT NONE

  INTEGER, INTENT(IN) :: nlon
  INTEGER, INTENT(IN) :: klev
  INTEGER(KIND=JWIM) :: nb

  INTEGER :: jstart
  INTEGER :: jend

  INTEGER(KIND=JWIM) :: b

  REAL(KIND=jprb), DIMENSION(nlon, klev) :: zzz
  INTEGER(KIND=JWIM) :: J_Z_STACK_SIZE
  REAL, ALLOCATABLE :: Z_STACK(:, :)
  INTEGER(KIND=JWIM) :: J_Z_STACK_USED
  INTEGER(KIND=JWIM) :: J_Z_SELECTED_REAL_KIND_13_300_STACK_SIZE
  REAL(KIND=SELECTED_REAL_KIND(13, 300)), ALLOCATABLE :: Z_SELECTED_REAL_KIND_13_300_STACK(:, :)
  INTEGER(KIND=JWIM) :: J_Z_SELECTED_REAL_KIND_13_300_STACK_USED
  INTEGER(KIND=JWIM) :: J_LL_STACK_SIZE
  LOGICAL, ALLOCATABLE :: LL_STACK(:, :)
  INTEGER(KIND=JWIM) :: J_LL_STACK_USED
  J_Z_STACK_SIZE = klev*nlon
  ALLOCATE (Z_STACK(klev*nlon, nb))
  J_Z_STACK_USED = 1
  J_Z_SELECTED_REAL_KIND_13_300_STACK_SIZE = klev*nlon
  ALLOCATE (Z_SELECTED_REAL_KIND_13_300_STACK(klev*nlon, nb))
  J_Z_SELECTED_REAL_KIND_13_300_STACK_USED = 1
  J_LL_STACK_SIZE = klev*nlon
  ALLOCATE (LL_STACK(klev*nlon, nb))
  J_LL_STACK_USED = 1
!$loki unstructured-data create( z_stack, z_selected_real_kind_13_300_stack, ll_stack )

  jstart = 1
  jend = nlon

  DO b=1,nb
    CALL kernel(nlon, klev, jstart, jend, zzz, J_Z_STACK_SIZE, Z_STACK(:, b), J_Z_STACK_USED,  &
    & J_Z_SELECTED_REAL_KIND_13_300_STACK_SIZE, Z_SELECTED_REAL_KIND_13_300_STACK(:, b),  &
    & J_Z_SELECTED_REAL_KIND_13_300_STACK_USED, J_LL_STACK_SIZE, LL_STACK(:, b), J_LL_STACK_USED)
  END DO

!$loki end unstructured-data delete( z_stack, z_selected_real_kind_13_300_stack, ll_stack )
  DEALLOCATE (Z_STACK)
  DEALLOCATE (Z_SELECTED_REAL_KIND_13_300_STACK)
  DEALLOCATE (LL_STACK)
END SUBROUTINE driver

SUBROUTINE kernel (nlon, klev, jstart, jend, pzz, K_P_STACK_SIZE, P_STACK, JD_P_STACK_USED,  &
& K_P_SELECTED_REAL_KIND_13_300_STACK_SIZE, P_SELECTED_REAL_KIND_13_300_STACK, &
& JD_P_SELECTED_REAL_KIND_13_300_STACK_USED,  &
& K_LD_STACK_SIZE, LD_STACK, JD_LD_STACK_USED)

  IMPLICIT NONE

  INTEGER, INTENT(IN) :: nlon
  INTEGER, INTENT(IN) :: klev

  INTEGER, INTENT(IN) :: jstart
  INTEGER, INTENT(IN) :: jend

  REAL, INTENT(IN), DIMENSION(nlon, klev) :: pzz


  INTEGER :: testint
  INTEGER :: jl, jlev
  INTEGER(KIND=JWIM) :: JD_zzx
  INTEGER(KIND=JWIM) :: JD_zzy
  INTEGER(KIND=JWIM) :: JD_zzl
  INTEGER(KIND=JWIM) :: J_P_STACK_USED
  INTEGER(KIND=JWIM) :: J_P_SELECTED_REAL_KIND_13_300_STACK_USED
  INTEGER(KIND=JWIM) :: J_LD_STACK_USED
  INTEGER(KIND=JWIM), INTENT(IN) :: K_P_STACK_SIZE
  REAL, TARGET, CONTIGUOUS, INTENT(INOUT) :: P_STACK(K_P_STACK_SIZE)
  INTEGER(KIND=JWIM), INTENT(INOUT) :: JD_P_STACK_USED
  INTEGER(KIND=JWIM), INTENT(IN) :: K_P_SELECTED_REAL_KIND_13_300_STACK_SIZE
  REAL(KIND=SELECTED_REAL_KIND(13, 300)), TARGET, CONTIGUOUS, INTENT(INOUT) ::  &
  & P_SELECTED_REAL_KIND_13_300_STACK(K_P_SELECTED_REAL_KIND_13_300_STACK_SIZE)
  INTEGER(KIND=JWIM), INTENT(INOUT) :: JD_P_SELECTED_REAL_KIND_13_300_STACK_USED
  INTEGER(KIND=JWIM), INTENT(IN) :: K_LD_STACK_SIZE
  LOGICAL, TARGET, CONTIGUOUS, INTENT(INOUT) :: LD_STACK(K_LD_STACK_SIZE)
  INTEGER(KIND=JWIM), INTENT(INOUT) :: JD_LD_STACK_USED
  J_P_STACK_USED = JD_P_STACK_USED
  J_P_SELECTED_REAL_KIND_13_300_STACK_USED = JD_P_SELECTED_REAL_KIND_13_300_STACK_USED
  J_LD_STACK_USED = JD_LD_STACK_USED
!$loki device-present vars( p_stack, p_selected_real_kind_13_300_stack, ld_stack )
  JD_zzx = J_P_STACK_USED
  J_P_STACK_USED = JD_zzx + klev*nlon
  JD_zzy = J_P_SELECTED_REAL_KIND_13_300_STACK_USED
  J_P_SELECTED_REAL_KIND_13_300_STACK_USED = JD_zzy + klev*nlon
  JD_zzl = J_LD_STACK_USED
  J_LD_STACK_USED = JD_zzl + klev*nlon

  LD_STACK(1:klev*nlon) = .false.
  DO jl=1,nlon
    DO jlev=1,klev
      P_STACK(JD_zzx + jl - nlon + jlev*nlon) = pzz(jl, jlev)
      P_SELECTED_REAL_KIND_13_300_STACK(JD_zzy + jl - nlon + jlev*nlon) = pzz(jl, jlev)
    END DO
  END DO

!$loki end device-present
END SUBROUTINE kernel
Parameters:
  • block_dim (Dimension) – Dimension object to define the blocking dimension.

  • horizontal (Dimension) – Dimension object to define the horizontal dimension.

  • stack_name (str, optional) – Name of the stack (default: ‘STACK’)

  • local_int_var_name_pattern (str, optional) – Local integer variable names pattern (default: ‘JD_{name}’)

  • int_kind (str, optional) – Integer kind (default: ‘JWIM’)

apply_pool_allocator_to_temporaries(routine, item=None)

Apply raw stack allocator to local temporary arrays

This appends the relevant argument to the routine’s dummy argument list and creates the assignment for the local copy of the stack type. For all local arrays, a Cray pointer is instantiated and the temporaries are mapped via Cray pointers to the pool-allocated memory region.

The cumulative size of all temporary arrays is determined and returned.

Parameters:

routine (Subroutine) – Subroutine object to apply transformation to

Returns:

stack_dict – dict with required stack size mapped to type and kind

Return type:

dict