Hi, I used parallel_for to parallelize some same tasks for the elements of a vector. Somehow it did not result in speed up. I wonder why. Is it because the number of threads or grain size?
I have tested the code with different number of threads, It seems there is no obvious difference in running time when using from 1 to 8 threads.
Any advice? Thanks. One parallel_for function is defined as follows:
struct OneMove
{
short TheNode;
short aNode;
short route1;
short route2;
float tempObj;
};
template
class PSwap
{
public:
myType* sol;
concurrent_vector* pMoveList;
void operator() (const blocked_range& r) const
{
short TheNode,routeNum1,routeNum2,aNode;
float tempSolObj;
for(short i=r.begin();i!=r.end();++i)
{
// find the routeNum of the candidate route
TheNode=i;
routeNum1=sol->RouteNumList[TheNode];
vector nearNeighborList=sol->pinst->get_PNL(TheNode,sol->listSize);
// find a node to move theNode in a candidate route
for(unsigned int j=1;j {
aNode=nearNeighborList;
if(aNode==0)
continue;
routeNum2=sol->RouteNumList[aNode];
//abolish those in the same route
if(routeNum2==routeNum1)
continue;
//evaluate the obj of swap the two nodes
//tempObj=obj+alpha*VioC+belta*VioD
tempSolObj=sol->evaluateOneSwapeMove(TheNode,aNode,routeNum1,routeNum2);
OneMove move;
move.TheNode =TheNode;
move.aNode =aNode;
move.route1 =routeNum1;
move.route2 =routeNum2;
move.tempObj =tempSolObj;
(*pMoveList).push_back(move);
}
}
}
PSwap(myType* s,concurrent_vector* pl):
sol(s),pMoveList(pl) {}
};
template
static void DoPSwap(myType* s, concurrent_vector* pl, int size)
{
PSwap pswap(s,pl);
parallel_for(blocked_range(1,size),pswap,auto_partitioner());
}