ParallelFor Class Reference

#include <parallelfor.h>

Detailed Description

This class contains of several methods that implement a for loop which is distributing the work load on multiple threads. You can choose between static or dynamic distribution and optionally specify objects to initialize and finalize per worker thread data.

Classes

class  BaseContext
 
struct  BreakCondition
 
class  BreakContext
 
class  Dummy
 
struct  DynamicContext
 
class  DynamicJob
 
struct  ForAlignedContext
 
struct  ForAlignedContext< FORCONTEXT, INDEXTYPE, Dummy, Dummy >
 
struct  ForState
 
class  InvokeSelector
 
class  NoBreakContext
 
class  NoContext
 
class  NoContextSelector
 
class  ParallelForJob
 
struct  StaticContext
 
class  StaticJob
 

Static Public Member Functions

template<typename FROMTYPE , typename INDEXTYPE , typename LOOP >
static auto Dynamic (FROMTYPE from, INDEXTYPE to, const LOOP &obj, Int threadCnt=PARALLELFOR_USEMAXIMUMTHREADS, const Int granularity=PARALLELFOR_DEFAULTGRANULARITY, JobQueueInterface *queue=JOBQUEUE_CURRENT) -> decltype(obj(to))
 
template<typename CONTEXT , typename FROMTYPE , typename INDEXTYPE , typename LOOP >
static MAXON_ATTRIBUTE_NO_INLINE CONTEXT::ResultValueType Dynamic (FROMTYPE from, INDEXTYPE to, const LOOP &obj, Int threadCnt=PARALLELFOR_USEMAXIMUMTHREADS, const Int granularity=PARALLELFOR_DEFAULTGRANULARITY, JobQueueInterface *queue=JOBQUEUE_CURRENT)
 
template<typename CONTEXT , typename FROMTYPE , typename INDEXTYPE , typename LOOP , typename INIT , typename FINALIZE >
static std::enable_if<!std::is_convertible< typename std::remove_reference< LOOP >::type, maxon::Int >::value, typename CONTEXT::ResultValueType >::type Dynamic (FROMTYPE from, INDEXTYPE to, const INIT &init, const LOOP &obj, const FINALIZE &finalize, Int threadCnt=PARALLELFOR_USEMAXIMUMTHREADS, const Int granularity=PARALLELFOR_DEFAULTGRANULARITY, JobQueueInterface *queue=JOBQUEUE_CURRENT)
 
template<typename CONTEXT , PARALLELFORFLAGS FLAGS, typename FROMTYPE , typename INDEXTYPE , typename LOOP , typename INIT , typename FINALIZE >
static MAXON_ATTRIBUTE_NO_INLINE CONTEXT::ResultValueType Dynamic (FROMTYPE from, INDEXTYPE to, const INIT &init, const LOOP &obj, const FINALIZE &finalize, Int threadCnt=PARALLELFOR_USEMAXIMUMTHREADS, const Int granularity=PARALLELFOR_DEFAULTGRANULARITY, JobQueueInterface *queue=JOBQUEUE_CURRENT)
 
template<typename FROMTYPE , typename INDEXTYPE , typename LOOP >
static auto Static (FROMTYPE from, INDEXTYPE to, const LOOP &obj, Int threadCnt=PARALLELFOR_USEMAXIMUMTHREADS, const Int minChunkSize=PARALLELFOR_DEFAULTGRANULARITY, JobQueueInterface *queue=JOBQUEUE_CURRENT) -> decltype(obj(to))
 
template<typename CONTEXT , typename FROMTYPE , typename INDEXTYPE , typename LOOP >
static MAXON_ATTRIBUTE_NO_INLINE CONTEXT::ResultValueType Static (FROMTYPE from, INDEXTYPE to, const LOOP &obj, Int threadCnt=PARALLELFOR_USEMAXIMUMTHREADS, const Int minChunkSize=PARALLELFOR_DEFAULTGRANULARITY, JobQueueInterface *queue=JOBQUEUE_CURRENT)
 
template<typename CONTEXT , typename FROMTYPE , typename INDEXTYPE , typename LOOP , typename INIT , typename FINALIZE >
static std::enable_if<!std::is_convertible< typename std::remove_reference< LOOP >::type, maxon::Int >::value, typename CONTEXT::ResultValueType >::type Static (FROMTYPE from, INDEXTYPE to, const INIT &init, const LOOP &obj, const FINALIZE &finalize, Int threadCnt=PARALLELFOR_USEMAXIMUMTHREADS, const Int minChunkSize=PARALLELFOR_DEFAULTGRANULARITY, JobQueueInterface *queue=JOBQUEUE_CURRENT)
 
template<typename CONTEXT , PARALLELFORFLAGS FLAGS, typename FROMTYPE , typename INDEXTYPE , typename LOOP , typename INIT , typename FINALIZE >
static MAXON_ATTRIBUTE_NO_INLINE CONTEXT::ResultValueType Static (FROMTYPE from, INDEXTYPE to, const INIT &init, const LOOP &obj, const FINALIZE &finalize, Int threadCnt=PARALLELFOR_USEMAXIMUMTHREADS, const Int minChunkSize=PARALLELFOR_DEFAULTGRANULARITY, JobQueueInterface *queue=JOBQUEUE_CURRENT)
 

Static Private Member Functions

template<typename INDEXTYPE >
static INDEXTYPE CalculateStaticChunkSize (UInt cnt, Int &threadCnt, Int minChunkSize)
 

Member Function Documentation

◆ Dynamic() [1/4]

static auto Dynamic ( FROMTYPE  from,
INDEXTYPE  to,
const LOOP &  obj,
Int  threadCnt = PARALLELFOR_USEMAXIMUMTHREADS,
const Int  granularity = PARALLELFOR_DEFAULTGRANULARITY,
JobQueueInterface queue = JOBQUEUE_CURRENT 
) -> decltype(obj(to))
static

Runs a parallelized for (i = from; i < to; i++) loop using dynamic distribution.

[](Int i)
{
// ... do something ...
});
Parameters
[in]fromStart index.
[in]toEnd index (excluded)
[in]objLambda or object with operator (), object will be referenced.
[in]threadCntPARALLELFOR_USEMAXIMUMTHREADS for default handling (uses the maximum number of threads available), otherwise maximum number of threads to be used. A value of 1 will disable parallelization.
[in]granularityThe number of iterations after which a thread shares work with idle threads (1 means best distribution at the cost of higher synchronization, PARALLELFOR_DEFAULTGRANULARITY is the default).
[in]queueOptional queue that the parallel for is executed within.
Template Parameters
FROMTYPEAn integral type used for the from variable, might be different than INDEXTYPE but is not allowed to have a bigger range.
INDEXTYPEAn integral type used for the index of the loop.
LOOPA class containing an operator ()(INDEXTYPE) used for the loop.
Returns
Depending on the type of #LOOP either void or Result<void> (OK on success. Will only return an error if your loop object returns one). Any errors will be returned as an AggregatedError which you can iterate to check for individual errors.

◆ Dynamic() [2/4]

static MAXON_ATTRIBUTE_NO_INLINE CONTEXT::ResultValueType Dynamic ( FROMTYPE  from,
INDEXTYPE  to,
const LOOP &  obj,
Int  threadCnt = PARALLELFOR_USEMAXIMUMTHREADS,
const Int  granularity = PARALLELFOR_DEFAULTGRANULARITY,
JobQueueInterface queue = JOBQUEUE_CURRENT 
)
static

Runs a parallelized for (i = from; i < to; i++) loop using dynamic distribution.

ParallelFor::Dynamic<ParallelFor::BreakContext>(from, to,
[&shouldWeBreak](Int i, ParallelFor::BreakContext& context)
{
// ... example for a break condition ...
if (shouldWeBreak)
{
// tell ParallelFor that it should cancel the loop and return from the closure
context.Break();
return;
}
// ... do something ...
});
Parameters
[in]fromStart index.
[in]toEnd index (excluded)
[in]objLambda or object with operator (), object will be referenced.
[in]threadCntPARALLELFOR_USEMAXIMUMTHREADS for default handling (uses the maximum number of threads available), otherwise maximum number of threads to be used. A value of 1 will disable parallelization.
[in]granularityThe number of iterations after which a thread shares work with idle threads (1 means best distribution at the cost of higher synchronization, PARALLELFOR_DEFAULTGRANULARITY is the default).
[in]queueOptional queue that the parallel for is executed within.
Template Parameters
CONTEXTA class derived from ParallelFor::BaseContext.
FROMTYPEAn integral type used for the from variable, might be different than INDEXTYPE but is not allowed to have a bigger range.
INDEXTYPEAn integral type used for the index of the loop.
LOOPA class containing an operator ()(INDEXTYPE, CONTEXT&) used for the loop.
Returns
Depending on the type of #LOOP and #CONTEXT either void or Result<void> (OK on success. Will only return an error if your loop object returns one). Any errors will be returned as an AggregatedError which you can iterate to check for individual errors.

◆ Dynamic() [3/4]

static std::enable_if<!std::is_convertible<typename std::remove_reference< LOOP >::type, maxon::Int>::value, typename CONTEXT::ResultValueType >::type Dynamic ( FROMTYPE  from,
INDEXTYPE  to,
const INIT &  init,
const LOOP &  obj,
const FINALIZE &  finalize,
Int  threadCnt = PARALLELFOR_USEMAXIMUMTHREADS,
const Int  granularity = PARALLELFOR_DEFAULTGRANULARITY,
JobQueueInterface queue = JOBQUEUE_CURRENT 
)
static

Runs a parallelized for (i = from; i < to; i++) loop using dynamic distribution. The init method is called concurrently per worker thread whereas the finalize method is called synchronously (no concurrency) after the loop has finished.

struct MyContext : public ParallelFor::BaseContext
{
// your thread local data ...
};
ParallelFor::Dynamic<MyContext>(from, to,
[](MyContext& context)
{
// ... init thread local data ...
},
[](Int i, MyContext& context)
{
// ... computation in the loop body ...
},
[](MyContext& context)
{
// ... finalize thread local data ...
});
Parameters
[in]fromStart index.
[in]toEnd index (excluded)
[in]initLambda being called before the loop starts (threaded)
[in]objLambda or object with operator (), object will be referenced.
[in]finalizeLambda being called after the loop has finished (synchronous)
[in]threadCntPARALLELFOR_USEMAXIMUMTHREADS for default handling (uses the maximum number of threads available), otherwise maximum number of threads to be used. A value of 1 will disable parallelization.
[in]granularityThe number of iterations after which a thread shares work with idle threads (1 means best distribution at the cost of higher synchronization, PARALLELFOR_DEFAULTGRANULARITY is the default).
[in]queueOptional queue that the parallel for is executed within.
Template Parameters
CONTEXTA class derived from ParallelFor::BaseContext for thread local data storage.
FROMTYPEAn integral type used for the from variable, might be different than INDEXTYPE but is not allowed to have a bigger range.
INDEXTYPEAn integral type used for the index of the loop.
LOOPA class containing an operator ()(INDEXTYPE, CONTEXT&) used for the loop.
INITA class containing an operator ()(CONTEXT&) for per-thread initialization before the loop.
FINALIZEA class containing an operator ()(CONTEXT&) for per-thread cleanup after the loop.
Returns
Depending on the type of #LOOP and #CONTEXT either void or Result<void> (OK on success. Will only return an error if your loop object returns one). Any errors will be returned as an AggregatedError which you can iterate to check for individual errors.

◆ Dynamic() [4/4]

static MAXON_ATTRIBUTE_NO_INLINE CONTEXT::ResultValueType Dynamic ( FROMTYPE  from,
INDEXTYPE  to,
const INIT &  init,
const LOOP &  obj,
const FINALIZE &  finalize,
Int  threadCnt = PARALLELFOR_USEMAXIMUMTHREADS,
const Int  granularity = PARALLELFOR_DEFAULTGRANULARITY,
JobQueueInterface queue = JOBQUEUE_CURRENT 
)
static

Runs a parallelized for (i = from; i < to; i++) loop using dynamic distribution. The FLAGS template parameter can be used to specify if the init or finalize closure are called serialized or threaded.

struct MyContext : public ParallelFor::BaseContext
{
// your thread local data ...
};
ParallelFor::Dynamic<MyContext, PARALLELFORFLAGS::INITTHREADED_FINALIZESYNC>(from, to,
[](MyContext& context)
{
// ... init thread local data ...
},
[](Int i, MyContext& context)
{
// ... computation in the loop body ...
},
[](MyContext& context)
{
// ... finalize thread local data ...
});
Parameters
[in]fromStart index.
[in]toEnd index (excluded)
[in]initLambda being called before the loop starts (threaded by default)
[in]objLambda or object with operator (), object will be referenced.
[in]finalizeLambda being called after the loop has finished (synchronous by default)
[in]threadCntPARALLELFOR_USEMAXIMUMTHREADS for default handling (uses the maximum number of threads available), otherwise maximum number of threads to be used. A value of 1 will disable parallelization.
[in]granularityThe number of iterations after which a thread shares work with idle threads (1 means best distribution at the cost of higher synchronization, PARALLELFOR_DEFAULTGRANULARITY is the default).
[in]queueOptional queue that the parallel for is executed within.
Template Parameters
CONTEXTA class derived from ParallelFor::BaseContext for thread local data storage.
FLAGSFlags that specify whether init or finalize will be called threaded or synchronously.
FROMTYPEAn integral type used for the from variable, might be different than INDEXTYPE but is not allowed to have a bigger range.
INDEXTYPEAn integral type used for the index of the loop.
LOOPA class containing an operator ()(INDEXTYPE, CONTEXT&) used for the loop.
INITA class containing an operator ()(CONTEXT&) for per-thread initialization before the loop.
FINALIZEA class containing an operator ()(CONTEXT&) for per-thread cleanup after the loop.
Returns
Depending on the type of #LOOP and #CONTEXT either void or Result<void> (OK on success. Will only return an error if your loop object returns one). Any errors will be returned as an AggregatedError which you can iterate to check for individual errors.

◆ Static() [1/4]

static auto Static ( FROMTYPE  from,
INDEXTYPE  to,
const LOOP &  obj,
Int  threadCnt = PARALLELFOR_USEMAXIMUMTHREADS,
const Int  minChunkSize = PARALLELFOR_DEFAULTGRANULARITY,
JobQueueInterface queue = JOBQUEUE_CURRENT 
) -> decltype(obj(to))
static

Runs a parallelized for (i = from; i < to; i++) loop using static distribution.

[](Int i)
{
// ... do something ...
});
Parameters
[in]fromStart index.
[in]toEnd index (excluded)
[in]objLambda or object with operator (), object will be referenced.
[in]threadCntPARALLELFOR_USEMAXIMUMTHREADS for default handling (uses the maximum number of threads available), otherwise maximum number of threads to be used. A value of 1 will disable parallelization.
[in]minChunkSizeMinimum number of loops that one thread should handle (default is PARALLELFOR_DEFAULTGRANULARITY)
[in]queueOptional queue that the parallel for is executed within.
Template Parameters
FROMTYPEAn integral type used for the from variable, might be different than INDEXTYPE but is not allowed to have a bigger range.
INDEXTYPEAn integral type used for the index of the loop.
LOOPA class containing an operator ()(INDEXTYPE) used for the loop.
Returns
Depending on the type of #LOOP either void or Result<void> (OK on success. Will only return an error if your loop object returns one). Any errors will be returned as an AggregatedError which you can iterate to check for individual errors.

◆ Static() [2/4]

static MAXON_ATTRIBUTE_NO_INLINE CONTEXT::ResultValueType Static ( FROMTYPE  from,
INDEXTYPE  to,
const LOOP &  obj,
Int  threadCnt = PARALLELFOR_USEMAXIMUMTHREADS,
const Int  minChunkSize = PARALLELFOR_DEFAULTGRANULARITY,
JobQueueInterface queue = JOBQUEUE_CURRENT 
)
static

Runs a parallelized for (i = from; i < to; i++) loop using static distribution.

ParallelFor::Static<ParallelFor::BreakContext>(from, to,
[&shouldWeBreak](Int i, ParallelFor::BreakContext& context)
{
// ... example for a break condition ...
if (shouldWeBreak)
{
// tell ParallelFor that it should cancel the loop and return from the closure
context.Break();
return;
}
// ... do something ...
});
Parameters
[in]fromStart index.
[in]toEnd index (excluded)
[in]objLambda or object with operator (), object will be referenced.
[in]threadCntPARALLELFOR_USEMAXIMUMTHREADS for default handling (uses the maximum number of threads available), otherwise maximum number of threads to be used. A value of 1 will disable parallelization.
[in]minChunkSizeMinimum number of loops that one thread should handle (default is PARALLELFOR_DEFAULTGRANULARITY)
[in]queueOptional queue that the parallel for is executed within.
Template Parameters
CONTEXTA class derived from ParallelFor::BaseContext.
FROMTYPEAn integral type used for the from variable, might be different than INDEXTYPE but is not allowed to have a bigger range.
INDEXTYPEAn integral type used for the index of the loop.
LOOPA class containing an operator ()(INDEXTYPE) used for the loop.
Returns
Depending on the type of #LOOP and #CONTEXT either void or Result<void> (OK on success. Will only return an error if your loop object returns one). Any errors will be returned as an AggregatedError which you can iterate to check for individual errors.

◆ Static() [3/4]

static std::enable_if<!std::is_convertible<typename std::remove_reference< LOOP >::type, maxon::Int>::value, typename CONTEXT::ResultValueType >::type Static ( FROMTYPE  from,
INDEXTYPE  to,
const INIT &  init,
const LOOP &  obj,
const FINALIZE &  finalize,
Int  threadCnt = PARALLELFOR_USEMAXIMUMTHREADS,
const Int  minChunkSize = PARALLELFOR_DEFAULTGRANULARITY,
JobQueueInterface queue = JOBQUEUE_CURRENT 
)
static

Runs a parallelized for (i = from; i < to; i++) loop using static distribution. The init method is called concurrently per worker thread whereas the finalize method is called synchronously (no concurrency) after the loop has finished.

struct MyContext : public ParallelFor::BaseContext
{
// your thread local data ...
};
ParallelFor::Static<MyContext>(from, to,
[](MyContext& context)
{
// ... init thread local data ...
},
[](Int i, MyContext& context)
{
// ... computation in the loop body ...
},
[](MyContext& context)
{
// ... finalize thread local data ...
});
Parameters
[in]fromStart index.
[in]toEnd index (excluded)
[in]initLambda being called before the loop starts (threaded)
[in]objLambda or object with operator (), object will be referenced.
[in]finalizeLambda being called after the loop has finished (synchronously)
[in]threadCntPARALLELFOR_USEMAXIMUMTHREADS for default handling (uses the maximum number of threads available), otherwise maximum number of threads to be used. A value of 1 will disable parallelization.
[in]minChunkSizeMinimum number of loops that one thread should handle (default is PARALLELFOR_DEFAULTGRANULARITY)
[in]queueOptional queue that the parallel for is executed within.
Template Parameters
CONTEXTA class derived from ParallelFor::BaseContext for thread local data storage.
FROMTYPEAn integral type used for the from variable, might be different than INDEXTYPE but is not allowed to have a bigger range.
INDEXTYPEAn integral type used for the index of the loop.
LOOPA class containing an operator ()(INDEXTYPE, CONTEXT&) used for the loop.
INITA class containing an operator ()(CONTEXT&) for per-thread initialization before the loop.
FINALIZEA class containing an operator ()(CONTEXT&) for per-thread cleanup after the loop.
Returns
Depending on the type of #LOOP and #CONTEXT either void or Result<void> (OK on success. Will only return an error if your loop object returns one). Any errors will be returned as an AggregatedError which you can iterate to check for individual errors.

◆ Static() [4/4]

static MAXON_ATTRIBUTE_NO_INLINE CONTEXT::ResultValueType Static ( FROMTYPE  from,
INDEXTYPE  to,
const INIT &  init,
const LOOP &  obj,
const FINALIZE &  finalize,
Int  threadCnt = PARALLELFOR_USEMAXIMUMTHREADS,
const Int  minChunkSize = PARALLELFOR_DEFAULTGRANULARITY,
JobQueueInterface queue = JOBQUEUE_CURRENT 
)
static

Runs a parallelized for (i = from; i < to; i++) loop using static distribution. The FLAGS template parameter can be used to specify if the init or finalize closure are called serialized or threaded.

struct MyContext : public ParallelFor::BaseContext
{
// your thread local data ...
};
ParallelFor::Dynamic<MyContext, PARALLELFORFLAGS::INITTHREADED_FINALIZESYNC>(from, to,
[](MyContext& context)
{
// ... init thread local data ...
},
[](Int i, MyContext& context)
{
// ... computation in the loop body ...
},
[](MyContext& context)
{
// ... finalize thread local data ...
});
Parameters
[in]fromStart index.
[in]toEnd index (excluded)
[in]initLambda being called before the loop starts (threaded by default)
[in]objLambda or object with operator (), object will be referenced.
[in]finalizeLambda being called after the loop has finished (synchronously by default)
[in]threadCntPARALLELFOR_USEMAXIMUMTHREADS for default handling (uses the maximum number of threads available), otherwise maximum number of threads to be used. A value of 1 will disable parallelization.
[in]minChunkSizeMinimum number of loops that one thread should handle (default is PARALLELFOR_DEFAULTGRANULARITY)
[in]queueOptional queue that the parallel for is executed within.
Template Parameters
CONTEXTA class derived from ParallelFor::BaseContext for thread local data storage.
FLAGSFlags that specify whether init or finalize will be called threaded or synchronously.
FROMTYPEAn integral type used for the from variable, might be different than INDEXTYPE but is not allowed to have a bigger range.
INDEXTYPEAn integral type used for the index of the loop.
LOOPA class containing an operator ()(INDEXTYPE, CONTEXT&) used for the loop.
INITA class containing an operator ()(CONTEXT&) for per-thread initialization before the loop.
FINALIZEA class containing an operator ()(CONTEXT&) for per-thread cleanup after the loop.
Returns
Depending on the type of #LOOP and #CONTEXT either void or Result<void> (OK on success. Will only return an error if your loop object returns one). Any errors will be returned as an AggregatedError which you can iterate to check for individual errors.

◆ CalculateStaticChunkSize()

static INDEXTYPE CalculateStaticChunkSize ( UInt  cnt,
Int threadCnt,
Int  minChunkSize 
)
staticprivate

Computes chunk size for static distribution. If only one thread is used, the index is signed and the range of the loop requires an unsigned counter the return value will overflow, but the loops that assign the chunks to the jobs take care of this.

Parameters
[in]cntNumber of iterations.
[in,out]threadCntNumber of threads to use.
[in]minChunkSizeMinimum chunk size.
Template Parameters
INDEXTYPEAn integral type used for the index of the loop.
Returns
Size of a chunk.