[C++][OpenMP] A shared memory quick sort openmp tasks (example, source code)

After a first shoot of quick sort on shared memory (to be able to create an mpi version, but that is not inplace)
I created a real shared memory version.
Be aware that these versions need the OpenMP task!
Also we use a depth line 106, you may parametrize this number based on the number of threads and the size of the array.

PS : I developed several quick sort (available on this blog), a sequential version, an openmp tasks version, a openmp not inplace version, an mpi version and a Qt concurent version.

A sequential version is also available here.

The Code

#include <cstdio>
#include <omp.h>
#include <cmath>
#include <ctime>
#include <cstdlib>
#include <cstring>

/* Result
g++ --version
g++ (GCC) 4.6.0
  ====== Quick Sort =====
Sorting 600000000 elements
1 threads...
Elapsed time 83.777031 s
Is sorted
2 threads...
Elapsed time 43.194530 s
Is sorted
4 threads...
Elapsed time 23.494222 s
Is sorted
8 threads...
Elapsed time 18.924786 s
Is sorted
*/
////////////////////////////////////////////////////////////
// Miscialenous functions
////////////////////////////////////////////////////////////

/** Swap to value */
template <class NumType>
inline void Swap(NumType& value, NumType& other){
    NumType temp = value;
    value = other;
    other = temp;
}


////////////////////////////////////////////////////////////
// Quick sort
////////////////////////////////////////////////////////////

/* use in the sequential qs */
template <class SortType>
long QsPartition(SortType outputArray[], long left, long right){
    const long part = right;
    Swap(outputArray[part],outputArray[left + (right - left ) / 2]);
    const SortType partValue = outputArray[part];
    --right;

    while(true){
        while(outputArray[left] < partValue){
            ++left;
        }
        while(right >= left && partValue <= outputArray[right]){
            --right;
        }
        if(right < left) break;

        Swap(outputArray[left],outputArray[right]);
        ++left;
        --right;
    }

    Swap(outputArray[part],outputArray[left]);

    return left;
}

/* a sequential qs */
template <class SortType>
void QsSequential(SortType array[], const long left, const long right){
    if(left < right){
        const long part = QsPartition(array, left, right);
        QsSequential(array,part + 1,right);
        QsSequential(array,left,part - 1);
    }
}

/** A task dispatcher */
template <class SortType>
void QuickSortOmpTask(SortType array[], const long left, const long right, const int deep){
    if(left < right){
        const long part = QsPartition(array, left, right);
        if( deep ){
            #pragma omp task
            QuickSortOmpTask(array,part + 1,right, deep - 1);
            #pragma omp task
            QuickSortOmpTask(array,left,part - 1, deep - 1);
        }
        else {
            QsSequential(array,part + 1,right);
            QsSequential(array,left,part - 1);
        }
    }
}

/** The openmp quick sort */
template <class SortType>
void QuickSortOmp(SortType array[], const long size){
    const int nbTasksRequiere = (omp_get_max_threads() * 5);
    int deep = 0;
    while( (1 << deep) < nbTasksRequiere ) deep += 1;

    #pragma omp parallel
    {
        #pragma omp single nowait
        {
            QuickSortOmpTask(array, 0, size - 1 , deep);
        }
    }
}

////////////////////////////////////////////////////////////
// Main
////////////////////////////////////////////////////////////

bool isSorted(long long array[], const long size){
    for(int idx = 1; idx < size ; ++idx){
        if(array[idx-1] > array[idx]){
            return false;
        }
    }
    return true;
}

void print(long long array[], const int size){
    for(int idx = 0 ;idx < size; ++idx){
        printf("%lldt",array[idx]);
    }
    printf("n");
}

int main(int, char**){
    const long Size = 600000000;
    long long* const array = new long long[Size];

    printf("Sorting %ld elementsn", Size);

    for(int idxThread = 1 ; idxThread <= 8 ; idxThread *= 2){
        printf("%d threads...n", idxThread);

        omp_set_num_threads(idxThread);

        srand(0);
        for(long idx = 0 ; idx < Size ; ++idx){
            array[idx] = int(Size*(float(rand())/RAND_MAX));
        }

        const double startTime = omp_get_wtime();
        QuickSortOmp(array, Size);
        printf("Elapsed time %lf sn", omp_get_wtime() - startTime);

        if(isSorted(array,Size)){
            printf("Is sortedn");
        }
        else{
            printf("Error array is not sorted!n");
            if( Size <= 20) print(array,Size);
            return -1;
        }

    }

    delete [] array;

    return 0;
}