After a first shoot of quick sort on shared memory (to be able to create an mpi version, but that is not inplace)
I created a real shared memory version.
Be aware that these versions need the OpenMP task!
PS : I developed several quick sort (available on this blog), a sequential version, an openmp tasks version, a openmp not inplace version, an mpi version and a Qt concurent version.
A sequential version is also available here.
The Code
#include <cstdio>
#include <omp.h>
#include <cmath>
#include <ctime>
#include <cstdlib>
#include <cstring>
/* Result
g++ --version
g++ (GCC) 4.6.0
====== Quick Sort =====
Sorting 600000000 elements
1 threads...
Elapsed time 83.777031 s
Is sorted
2 threads...
Elapsed time 43.194530 s
Is sorted
4 threads...
Elapsed time 23.494222 s
Is sorted
8 threads...
Elapsed time 18.924786 s
Is sorted
*/
////////////////////////////////////////////////////////////
// Miscialenous functions
////////////////////////////////////////////////////////////
/** Swap to value */
template <class NumType>
inline void Swap(NumType& value, NumType& other){
NumType temp = value;
value = other;
other = temp;
}
////////////////////////////////////////////////////////////
// Quick sort
////////////////////////////////////////////////////////////
/* use in the sequential qs */
template <class SortType>
long QsPartition(SortType outputArray[], long left, long right){
const long part = right;
Swap(outputArray[part],outputArray[left + (right - left ) / 2]);
const SortType partValue = outputArray[part];
--right;
while(true){
while(outputArray[left] < partValue){
++left;
}
while(right >= left && partValue <= outputArray[right]){
--right;
}
if(right < left) break;
Swap(outputArray[left],outputArray[right]);
++left;
--right;
}
Swap(outputArray[part],outputArray[left]);
return left;
}
/* a sequential qs */
template <class SortType>
void QsSequential(SortType array[], const long left, const long right){
if(left < right){
const long part = QsPartition(array, left, right);
QsSequential(array,part + 1,right);
QsSequential(array,left,part - 1);
}
}
/** A task dispatcher */
template <class SortType>
void QuickSortOmpTask(SortType array[], const long left, const long right, const int deep){
if(left < right){
if( deep ){
const long part = QsPartition(array, left, right);
#pragma omp task
QuickSortOmpTask(array,part + 1,right, deep - 1);
#pragma omp task
QuickSortOmpTask(array,left,part - 1, deep - 1);
}
else {
const long part = QsPartition(array, left, right);
QsSequential(array,part + 1,right);
QsSequential(array,left,part - 1);
}
}
}
/** The openmp quick sort */
template <class SortType>
void QuickSortOmp(SortType array[], const long size){
#pragma omp parallel
{
#pragma omp single nowait
{
QuickSortOmpTask(array, 0, size - 1 , 15);
}
}
}
////////////////////////////////////////////////////////////
// Main
////////////////////////////////////////////////////////////
bool isSorted(long long array[], const long size){
for(int idx = 1; idx < size ; ++idx){
if(array[idx-1] > array[idx]){
return false;
}
}
return true;
}
void print(long long array[], const int size){
for(int idx = 0 ;idx < size; ++idx){
printf("%lldt",array[idx]);
}
printf("n");
}
int main(int, char**){
const long Size = 600000000;
long long* const array = new long long[Size];
printf("Sorting %ld elementsn", Size);
for(int idxThread = 1 ; idxThread <= 8 ; idxThread *= 2){
printf("%d threads...n", idxThread);
omp_set_num_threads(idxThread);
srand(0);
for(long idx = 0 ; idx < Size ; ++idx){
array[idx] = int(Size*(float(rand())/RAND_MAX));
}
const double startTime = omp_get_wtime();
QuickSortOmp(array, Size);
printf("Elapsed time %lf sn", omp_get_wtime() - startTime);
if(isSorted(array,Size)){
printf("Is sortedn");
}
else{
printf("Error array is not sorted!n");
if( Size <= 20) print(array,Size);
return -1;
}
}
delete [] array;
return 0;
}