//------------------------------------------------------------------------------
// GB_ek_slice_kernels.h: slice the entries and vectors of a matrix
//------------------------------------------------------------------------------

// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2024, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

//------------------------------------------------------------------------------

#ifndef GB_EK_SLICE_KERNELS_H
#define GB_EK_SLICE_KERNELS_H

static inline void GB_e_slice
(
    int64_t *Slice,         // array of size ntasks+1
    int64_t e,              // number items to partition amongst the tasks
    const int ntasks        // # of tasks
)
{
    Slice [0] = 0 ;
    for (int tid = 1 ; tid < ntasks ; tid++)
    { 
        Slice [tid] = (int64_t) GB_PART (tid, e, ntasks) ;
    }
    Slice [ntasks] = e ;
}

//------------------------------------------------------------------------------
// GB_ek_slice_ntasks: determine # of threads and tasks to use for GB_ek_slice
//------------------------------------------------------------------------------

static inline void GB_ek_slice_ntasks
(
    // output
    int *nthreads,              // # of threads to use for GB_ek_slice
    int *ntasks,                // # of tasks to create for GB_ek_slice
    // input
    int64_t anz_held,           // GB_nnz_held(A) of the matrix to slice
    int ntasks_per_thread,      // # of tasks per thread
    double work,                // total work to do
    double chunk,               // give each thread at least this much work
    int nthreads_max            // max # of threads to use
)
{
    if (anz_held == 0)
    {
        (*nthreads) = 1 ;
        (*ntasks) = 1 ;
    }
    else
    {
        (*nthreads) = GB_nthreads (work, chunk, nthreads_max) ;
        (*ntasks) = (*nthreads == 1) ? 1 : ((ntasks_per_thread) * (*nthreads)) ;
        (*ntasks) = (int) GB_IMIN (*ntasks, anz_held) ;
        (*ntasks) = (int) GB_IMAX (*ntasks, 1) ;
    }
}

//------------------------------------------------------------------------------
// GB_SLICE_MATRIX: slice a single matrix using GB_ek_slice
//------------------------------------------------------------------------------

// chunk and nthreads_max must already be defined.

#define GB_SLICE_MATRIX_WORK(X,ntasks_per_thread,work,xnz_held)               \
    GB_ek_slice_ntasks (&(X ## _nthreads), &(X ## _ntasks), xnz_held,         \
        ntasks_per_thread, work, chunk, nthreads_max) ;                       \
    GB_WERK_PUSH (X ## _ek_slicing, 3*(X ## _ntasks)+1, int64_t) ;            \
    if (X ## _ek_slicing == NULL)                                             \
    {                                                                         \
        /* out of memory */                                                   \
        GB_FREE_ALL ;                                                         \
        return (GrB_OUT_OF_MEMORY) ;                                          \
    }                                                                         \
    GB_ek_slice (X ## _ek_slicing, X, X ## _ntasks) ;                         \
    const int64_t *kfirst_ ## X ## slice = X ## _ek_slicing ;                 \
    const int64_t *klast_  ## X ## slice = X ## _ek_slicing + X ## _ntasks ;  \
    const int64_t *pstart_ ## X ## slice = X ## _ek_slicing + X ## _ntasks*2 ;

#define GB_SLICE_MATRIX(X,ntasks_per_thread)                                  \
    const int64_t X ## _held = GB_nnz_held (X) ;                              \
    const double  X ## _wrk = X ## _held + X->nvec ;                          \
    GB_SLICE_MATRIX_WORK (X, ntasks_per_thread, X ## _wrk, X ## _held)

//------------------------------------------------------------------------------
// GB_GET_PA_AND_PC: find the part of A(:,k) and C(:,k) for this task
//------------------------------------------------------------------------------

// The tasks were generated by GB_ek_slice.

// as a macro, where p0, p1, and p2 are first obtained as above:
//  p0 = GBP (Ap, k, avlen) ;
//  p1 = GBP (Ap, k+1, avlen) ;
//  p2 = GBP (Cp, k, cvlen) ;
#define GB_GET_PA_AND_PC(pA_start,pA_end,pC,tid,k,kfirst,klast,pstart_slice,Cp_kfirst,p0,p1,p2)    \
    int64_t pA_start, pA_end, pC ;                                          \
    if (k == kfirst)                                                        \
    {                                                                       \
        /* First vector for task tid; may only be partially owned. */       \
        pA_start = pstart_slice [tid] ;                                     \
        pA_end   = GB_IMIN (p1, pstart_slice [tid+1]) ;                     \
        pC = Cp_kfirst [tid] ;                                              \
    }                                                                       \
    else if (k == klast)                                                    \
    {                                                                       \
        /* Last vector for task tid; may only be partially owned. */        \
        pA_start = p0 ;                                                     \
        pA_end   = pstart_slice [tid+1] ;                                   \
        pC = p2 ;                                                           \
    }                                                                       \
    else                                                                    \
    {                                                                       \
        /* task tid entirely owns this vector A(:,k). */                    \
        pA_start = p0 ;                                                     \
        pA_end   = p1 ;                                                     \
        pC = p2 ;                                                           \
    }

//------------------------------------------------------------------------------
// GB_GET_PA: find the part of A(:,k) to be operated on by this task
//------------------------------------------------------------------------------

// The tasks were generated by GB_ek_slice.

// as a macro, where p0 and p1 are first obtained as above:
//  p0 = GBP (Ap, k, avlen) ;
//  p1 = GBP (Ap, k+1, avlen) ;
#define GB_GET_PA(pA_start,pA_end,tid,k,kfirst,klast,pstart_slice,p0,p1)    \
    int64_t pA_start, pA_end ;                                              \
    if (k == kfirst)                                                        \
    {                                                                       \
        /* First vector for task tid; may only be partially owned. */       \
        pA_start = pstart_slice [tid] ;                                     \
        pA_end   = GB_IMIN (p1, pstart_slice [tid+1]) ;                     \
    }                                                                       \
    else if (k == klast)                                                    \
    {                                                                       \
        /* Last vector for task tid; may only be partially owned. */        \
        pA_start = p0 ;                                                     \
        pA_end   = pstart_slice [tid+1] ;                                   \
    }                                                                       \
    else                                                                    \
    {                                                                       \
        /* task tid entirely owns this vector A(:,k). */                    \
        pA_start = p0 ;                                                     \
        pA_end   = p1 ;                                                     \
    }


#endif

