<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	>

<channel>
	<title>HPC, AI, C++, Silverlight ... Among Others</title>
	<atom:link href="http://berenger.eu/blog/feed/" rel="self" type="application/rss+xml" />
	<link>http://berenger.eu/blog</link>
	<description>This website is my personal blog and my technical sharing center.</description>
	<lastBuildDate>Wed, 18 Apr 2012 16:23:47 +0000</lastBuildDate>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
	<generator>http://wordpress.org/?v=3.3.1</generator>
		<item>
		<title>Know more about Flops</title>
		<link>http://berenger.eu/blog/2012/04/18/know-more-about-flops/</link>
		<comments>http://berenger.eu/blog/2012/04/18/know-more-about-flops/#comments</comments>
		<pubDate>Wed, 18 Apr 2012 16:23:47 +0000</pubDate>
		<dc:creator>Berenger</dc:creator>
				<category><![CDATA[Programming]]></category>

		<guid isPermaLink="false">http://berenger.eu/blog/?p=1309</guid>
		<description><![CDATA[If you want to know more about flops (on CPU or on GPU) a good first (but good step) is to use this link: http://folding.stanford.edu/English/FAQ-flops They give lots of details and are very clear, in bref, a good reference. &#160;]]></description>
			<content:encoded><![CDATA[<p>If you want to know more about flops (on CPU or on GPU) a good first (but good step) is to use this link:</p>
<p><a href="http://folding.stanford.edu/English/FAQ-flops" target="_blank">http://folding.stanford.edu/English/FAQ-flops</a></p>
<p>They give lots of details and are very clear, in bref, a good reference.</p>
<p>&nbsp;</p>
<div style='clear:both'></div>]]></content:encoded>
			<wfw:commentRss>http://berenger.eu/blog/2012/04/18/know-more-about-flops/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>[CUDA] Compiling with cmake</title>
		<link>http://berenger.eu/blog/2012/04/10/cuda-compiling-with-cmake/</link>
		<comments>http://berenger.eu/blog/2012/04/10/cuda-compiling-with-cmake/#comments</comments>
		<pubDate>Tue, 10 Apr 2012 16:06:55 +0000</pubDate>
		<dc:creator>Berenger</dc:creator>
				<category><![CDATA[Programming]]></category>
		<category><![CDATA[CMAKE]]></category>
		<category><![CDATA[CUDA]]></category>

		<guid isPermaLink="false">http://berenger.eu/blog/?p=1303</guid>
		<description><![CDATA[Here is an simple cmake file to compile a cuda project. I use Gcc4.4 because my cuda library does not work with Gcc4.6, you in the future of course it may not be needed anymore.]]></description>
			<content:encoded><![CDATA[<p>Here is an simple cmake file to compile a cuda project.<br />
<span id="more-1303"></span></p>
<pre class="brush: bash; title: ; notranslate">
set(CMAKE_CXX_COMPILER g++-4.4)
set(CMAKE_CC_COMPILER gcc-4.4)

#The name of the project
PROJECT(DIRECT_CUDA)

#The FindCUDA script is distributed since version 2.8
CMAKE_MINIMUM_REQUIRED(VERSION 2.8)

FIND_PACKAGE(CUDA)

INCLUDE(FindCUDA)

CUDA_ADD_EXECUTABLE(direct.exe direct.cu main.cpp)
</pre>
<p>I use Gcc4.4 because my cuda library does not work with Gcc4.6, you in the future of course it may not be needed anymore.</p>
<div style='clear:both'></div>]]></content:encoded>
			<wfw:commentRss>http://berenger.eu/blog/2012/04/10/cuda-compiling-with-cmake/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>[CUDA] Constant memory (__constant__)</title>
		<link>http://berenger.eu/blog/2012/04/10/cuda-constant-memory-__constant__/</link>
		<comments>http://berenger.eu/blog/2012/04/10/cuda-constant-memory-__constant__/#comments</comments>
		<pubDate>Tue, 10 Apr 2012 15:18:02 +0000</pubDate>
		<dc:creator>Berenger</dc:creator>
				<category><![CDATA[Others]]></category>
		<category><![CDATA[Programming]]></category>
		<category><![CDATA[CUDA]]></category>
		<category><![CDATA[GPU]]></category>

		<guid isPermaLink="false">http://berenger.eu/blog/?p=1295</guid>
		<description><![CDATA[Quick resume about CUDA __constant__ type First declare  the block of data in your .cu file: Then in your .c/.cpp file, you do not need to allocate using cudaMalloc since the block as already been declared, you just need to copy from host to device using cudaMemcpyToSymbol.]]></description>
			<content:encoded><![CDATA[<p>Quick resume about CUDA __constant__ type</p>
<p><span id="more-1295"></span><br />
First declare  the block of data in your .cu file:</p>
<pre class="brush: cpp; title: ; notranslate">
// in the .cu file

__constant__ int ConstArray[512];

// use it as any normal array in your cuda functions
</pre>
<p>Then in your .c/.cpp file, you do not need to allocate using cudaMalloc since the block as already been declared, you just need to copy from host to device using cudaMemcpyToSymbol.</p>
<pre class="brush: cpp; title: ; notranslate">
int main(){
	int array[512];

	// work with array

	cudaMemcpyToSymbol(&quot;ConstArray&quot;, array, sizeof(int) * 512 );

	// call your gpu func

	return 0;
}
</pre>
<div style='clear:both'></div>]]></content:encoded>
			<wfw:commentRss>http://berenger.eu/blog/2012/04/10/cuda-constant-memory-__constant__/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>Creates facial transformations with octobooth</title>
		<link>http://berenger.eu/blog/2012/01/22/creates-facial-transformations-with-octobooth/</link>
		<comments>http://berenger.eu/blog/2012/01/22/creates-facial-transformations-with-octobooth/#comments</comments>
		<pubDate>Sun, 22 Jan 2012 21:22:57 +0000</pubDate>
		<dc:creator>Berenger</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://berenger.eu/blog/?p=1290</guid>
		<description><![CDATA[A friend has created a website called http://www.octobooth.com/  OctoBooth is a website that creates facial transformations Change race, age or sex, and share photos with your friends OctoBooth is free, and no registration is required Enjoy.]]></description>
			<content:encoded><![CDATA[<p>A friend has created a website called <a href="http://www.octobooth.com/" target="_blank">http://www.octobooth.com/</a></p>
<blockquote><p> OctoBooth is a website that creates facial transformations</p>
<p>Change race, age or sex, and share photos with your friends</p>
<p>OctoBooth is free, and no registration is required</p></blockquote>
<p>Enjoy.</p>
<div style='clear:both'></div>]]></content:encoded>
			<wfw:commentRss>http://berenger.eu/blog/2012/01/22/creates-facial-transformations-with-octobooth/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>[C++][Qt] Parallel Quick Sort with QtConcurrent (Shared memory generic quick sort)</title>
		<link>http://berenger.eu/blog/2011/12/02/cqt-parallel-quick-sort-with-qtconcurrent-shared-memory-generic-quick-sort/</link>
		<comments>http://berenger.eu/blog/2011/12/02/cqt-parallel-quick-sort-with-qtconcurrent-shared-memory-generic-quick-sort/#comments</comments>
		<pubDate>Fri, 02 Dec 2011 11:32:21 +0000</pubDate>
		<dc:creator>Berenger</dc:creator>
				<category><![CDATA[C++]]></category>
		<category><![CDATA[Programming]]></category>
		<category><![CDATA[Code]]></category>
		<category><![CDATA[Resource]]></category>

		<guid isPermaLink="false">http://berenger.eu/blog/?p=1262</guid>
		<description><![CDATA[PS : I developed several quick sort (available on this blog), a sequential version, an openmp tasks version, a openmp not inplace version, an mpi version and a Qt concurent version. Here I developed a quick sort based on the great qt feature QtConcurrent. It is mostly similar to the openmp tasks versions. Advices Change [...]]]></description>
			<content:encoded><![CDATA[<p>PS : I developed several quick sort (available on this blog), <a href="http://berenger.eu/blog/?p=1052">a sequential version</a>, an <a href="http://berenger.eu/blog/?p=1201">openmp tasks version</a>, <a href="http://berenger.eu/blog/?p=1117">a openmp not inplace version</a>, <a href="http://berenger.eu/blog/?p=1128">an mpi version</a> and <a href="http://berenger.eu/blog/?p=1262">a Qt concurent version</a>.</p>
<p>Here I developed a quick sort based on the great qt feature QtConcurrent.<br />
It is mostly similar to the <a href="http://berenger.eu/blog/?p=1201">openmp tasks versions</a>.<br />
<span id="more-1262"></span></p>
<h2> Advices </h2>
<p>Change the fallowing lines in the code to perform different tests</p>
<pre class="brush: cpp; title: ; notranslate">
QThreadPool::globalInstance()-&gt;setMaxThreadCount(1);
const long Size = 10000000;
</pre>
<h2> The code </h2>
<pre class="brush: cpp; title: ; notranslate">
#include &lt;QtCore/QCoreApplication&gt;
#include &lt;QTime&gt;
#include &lt;QtConcurrentRun&gt;

#include &lt;iostream&gt;

////////////////////////////////////////////////////////////
// Miscialenous functions
////////////////////////////////////////////////////////////

/** Swap to value */
template &lt;class NumType&gt;
inline void Swap(NumType&amp; value, NumType&amp; other){
    NumType temp = value;
    value = other;
    other = temp;
}

////////////////////////////////////////////////////////////
// Quick sort
////////////////////////////////////////////////////////////

/* use in the sequential qs */
template &lt;class SortType&gt;
long QsPartition(SortType outputArray[], long left, long right){
    const long part = right;
    Swap(outputArray[part],outputArray[left + (right - left ) / 2]);
    const SortType partValue = outputArray[part];
    --right;

    while(true){
        while(outputArray[left] &lt; partValue){
            ++left;
        }
        while(right &gt;= left &amp;&amp; partValue &lt;= outputArray[right]){
            --right;
        }
        if(right &lt; left) break;

        Swap(outputArray[left],outputArray[right]);
        ++left;
        --right;
    }

    Swap(outputArray[part],outputArray[left]);

    return left;
}

/* a sequential qs */
template &lt;class SortType&gt;
void QsSequential(SortType array[], const long left, const long right){
    if(left &lt; right){
        const long part = QsPartition(array, left, right);
        QsSequential(array,part + 1,right);
        QsSequential(array,left,part - 1);
    }
}

/** A task dispatcher */
template &lt;class SortType&gt;
void QuickSortTask(SortType array[], const long left, const long right, const int deep){
    if(left &lt; right){
        if( deep ){
            const long part = QsPartition(array, left, right);

            QtConcurrent::run(QuickSortTask&lt;SortType&gt;, array, part + 1, right, deep - 1);
            QtConcurrent::run(QuickSortTask&lt;SortType&gt;, array, left, part - 1, deep - 1);
        }
        else {
            const long part = QsPartition(array, left, right);
            QsSequential(array,part + 1,right);
            QsSequential(array,left,part - 1);
        }
    }
}

////////////////////////////////////////////////////////////
// Main
////////////////////////////////////////////////////////////

template &lt;class SortedType&gt;
bool isSorted(SortedType array[], const long size){
    for(int idx = 1; idx &lt; size ; ++idx){
        if(array[idx-1] &gt; array[idx]){
            return false;
        }
    }
    return true;
}

template &lt;class SortedType&gt;
void print(SortedType array[], const int size){
    for(int idx = 0 ;idx &lt; size; ++idx){
        std::cout &lt;&lt; array[idx] &lt;&lt; &quot;\t&quot;;
    }
    std::cout &lt;&lt; &quot;\n&quot;;
}

int main(int argc, char** argv){
    QCoreApplication app(argc, argv);
    // Change to test efficiency
    // QThreadPool::globalInstance()-&gt;setMaxThreadCount(1);

    const long Size = 10000000;//600000000;
    long* const array = new long[Size];

    // Create array
    srand(0);
    for(long idx = 0 ; idx &lt; Size ; ++idx){
        array[idx] = int(Size*(float(rand())/RAND_MAX));
    }

    printf(&quot;Sorting %ld elements\n&quot;, Size);
    // Start sorting
    QTime timer;
    timer.start();
    QtConcurrent::run(QuickSortTask&lt;long&gt;, array, 0, Size - 1, 6);
    QThreadPool::globalInstance()-&gt;waitForDone();
    printf(&quot;Elapsed time %f s\n&quot;, timer.elapsed()/1000.0);

    // Test result
    if(isSorted(array,Size)){
        printf(&quot;Is sorted\n&quot;);
    }
    else{
        printf(&quot;Error array is not sorted!\n&quot;);
        if( Size &lt;= 20) print(array,Size);
        return -1;
    }

    // remove array and quit
    delete [] array;
    return 0;
}
</pre>
<p>Licence : lgpl.</p>
<div style='clear:both'></div>]]></content:encoded>
			<wfw:commentRss>http://berenger.eu/blog/2011/12/02/cqt-parallel-quick-sort-with-qtconcurrent-shared-memory-generic-quick-sort/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>[C++][HPC] C++ tips for High Performance Programming</title>
		<link>http://berenger.eu/blog/2011/11/25/chpc-c-tips-for-high-performance-programming/</link>
		<comments>http://berenger.eu/blog/2011/11/25/chpc-c-tips-for-high-performance-programming/#comments</comments>
		<pubDate>Fri, 25 Nov 2011 15:56:02 +0000</pubDate>
		<dc:creator>Berenger</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://berenger.eu/blog/?p=1255</guid>
		<description><![CDATA[A week ago I had to give some tips about C++ in HPC, here is an extract of the document. It has been done quickly, it is an unpretentious document. Of course an entire book about efficient C++ is better. Get the doc here (pdf)]]></description>
			<content:encoded><![CDATA[<p>A week ago I had to give some tips about C++ in HPC,<br />
here is an extract of the document.<br />
<span id="more-1255"></span></p>
<p>It has been done quickly, it is an unpretentious document.<br />
Of course an entire book about efficient C++ is better.</p>
<p><a href="http://berenger.eu/blog/wp-content/uploads/2011/11/CHPP.pdf">Get the doc here (pdf)</a></p>
<div style='clear:both'></div>]]></content:encoded>
			<wfw:commentRss>http://berenger.eu/blog/2011/11/25/chpc-c-tips-for-high-performance-programming/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>[Qt][GPU] C++ Application for dynamic transition images (QtConcurrent, QtOpenCL)</title>
		<link>http://berenger.eu/blog/2011/11/25/qtgpu-c-application-for-dynamic-transition-images-qtconcurrent-qtopencl/</link>
		<comments>http://berenger.eu/blog/2011/11/25/qtgpu-c-application-for-dynamic-transition-images-qtconcurrent-qtopencl/#comments</comments>
		<pubDate>Fri, 25 Nov 2011 15:47:56 +0000</pubDate>
		<dc:creator>Berenger</dc:creator>
				<category><![CDATA[C++]]></category>
		<category><![CDATA[Programming]]></category>
		<category><![CDATA[Code]]></category>
		<category><![CDATA[GPU]]></category>
		<category><![CDATA[Resource]]></category>
		<category><![CDATA[Tutorial]]></category>

		<guid isPermaLink="false">http://berenger.eu/blog/?p=1241</guid>
		<description><![CDATA[In this post, I will briefly introduce my first test of the qtopencl tool. What I developed a basic application that goes from one image to another in a given time step (4s but it is customizable). Lets say we have 3 pixels, source (from source image) dest (from destination image) and frame (from the [...]]]></description>
			<content:encoded><![CDATA[<p>In this post, I will briefly introduce my first test of the qtopencl tool.</p>
<p><span id="more-1241"></span></p>
<h2>What</h2>
<p>I developed a basic application that goes from one image to another in a given time step (4s but it is customizable).<br />
Lets say we have 3 pixels, source (from source image) dest (from destination image) and frame (from the computed image).</p>
<ul>
<li>At time 0: frame = source</li>
<li>At time STEP: frame = dest</li>
<li>At time STEP/2: frame is between source and dest.</li>
</ul>
<p>More generally,<br />
frame(r,g,b) = (dest(r,g,b) &#8211; source(r,g,b)) * percent + source(r,g,b)</p>
<p>Examples:</p>
<p><a href="http://berenger.eu/blog/wp-content/uploads/2011/11/transfer.jpg"><img src="http://berenger.eu/blog/wp-content/uploads/2011/11/transfer.jpg" alt="Transfer image example" /></a></p>
<h2>The application</h2>
<p><a href="http://berenger.eu/blog/wp-content/uploads/2011/11/transfersoft.jpg"><img src="http://berenger.eu/blog/wp-content/uploads/2011/11/transfersoft.jpg" alt="Transfer Image Interface" /></a></p>
<p>So the application lets you choose the images, choose the type of computation (thread,sequential,gpu) and start stop.</p>
<h2>The efficency</h2>
<p>Images size : 1024 x 768<br />
Processors : i7 (4 x CPU) 2.8 GHz<br />
Ubuntu : 11.04<br />
Transfer Time : 4s<br />
Sequential FPS : 6.3 f/s<br />
Thread FPS : 25.s f/s<br />
GPU FPS : 145.2 f/s</p>
<h2>Processing Code</h2>
<h3>Sequential</h3>
<pre class="brush: cpp; title: ; notranslate">
void ImageWorker::runSequential(){
    stopFlag = false;

    int totalTime        = 0;
    int nbFrameProcessed = 0;

    // emit source
    emit result(sourceImage, 0);

    QTime timer;
    timer.start();
    // compute until TimeToDest
    while(!stopFlag &amp;&amp; timer.elapsed() &lt; TimeToDest){
        totalTime        += sequentialStep( (timer.elapsed()/float(TimeToDest)) );
        nbFrameProcessed += 1;
        const float fps   = nbFrameProcessed / (totalTime/1000.0);

        emit result(frame, fps);
    }

    // emit dest
    const float fps   = nbFrameProcessed / (totalTime/1000.0);
    emit result(destImage, fps);
}

int ImageWorker::sequentialStep(const float percent){
    QTime timer;
    timer.start();

    for( int idxX = 0; idxX &lt; sourceImage.width() ; ++idxX){
        for( int idxY = 0; idxY &lt; sourceImage.height() ; ++idxY){
            // ARGB quadruplet on the format #AARRGGBB convert into signed
            int sourceValue = static_cast&lt;int&gt;(sourceImage.pixel(idxX, idxY));
            int destValue = static_cast&lt;int&gt;(destImage.pixel(idxX, idxY));

            const int red   = ((((destValue &gt;&gt; 16) &amp; 0xFF) - ((sourceValue &gt;&gt; 16) &amp; 0xFF)) * percent) + ((sourceValue &gt;&gt; 16) &amp; 0xFF);
            const int green = ((((destValue &gt;&gt;  <img src='http://berenger.eu/blog/wp-includes/images/smilies/icon_cool.gif' alt='8)' class='wp-smiley' /> &amp; 0xFF) - ((sourceValue &gt;&gt;  <img src='http://berenger.eu/blog/wp-includes/images/smilies/icon_cool.gif' alt='8)' class='wp-smiley' /> &amp; 0xFF)) * percent) + ((sourceValue &gt;&gt;  <img src='http://berenger.eu/blog/wp-includes/images/smilies/icon_cool.gif' alt='8)' class='wp-smiley' /> &amp; 0xFF);
            const int blue  = ((((destValue &gt;&gt;  0) &amp; 0xFF) - ((sourceValue &gt;&gt;  0) &amp; 0xFF)) * percent) + ((sourceValue &gt;&gt;  0) &amp; 0xFF);

            // set value into intermediate image
            frame.setPixel(idxX, idxY, qRgb(red, green, blue));
        }
    }

    return timer.elapsed();
}
</pre>
<h3>QConcurent</h3>
<pre class="brush: cpp; title: ; notranslate">
void ImageWorker::runConcurrent(){
    stopFlag = false;

    QVector&lt; QPair&lt;int, int&gt; &gt; chunkSizes(QThread::idealThreadCount());
    const double aChunk = ceil(double(sourceImage.height())/QThread::idealThreadCount());
    chunkSizes[0] = QPair&lt;int, int&gt;(0, aChunk);
    for( int idxThread = 1 ; idxThread &lt; QThread::idealThreadCount() ; ++idxThread){
        chunkSizes[idxThread].first  = chunkSizes[idxThread-1].second;
        chunkSizes[idxThread].second = (idxThread + 1) * aChunk;
    }
    chunkSizes[QThread::idealThreadCount() - 1].second = sourceImage.height();

    int totalTime        = 0;
    int nbFrameProcessed = 0;

    emit result(sourceImage, 0);

    QTime timer;
    timer.start();
    while(!stopFlag &amp;&amp; timer.elapsed() &lt; TimeToDest){
        totalTime        += concurrentStep( (timer.elapsed()/float(TimeToDest)), chunkSizes );
        nbFrameProcessed += 1;
        const float fps   = nbFrameProcessed / (totalTime/1000.0);

        emit result(frame, fps);
    }

    const float fps   = nbFrameProcessed / (totalTime/1000.0);
    emit result(destImage, fps);
}

void convertImages(const QImage&amp; sourceImage, const QImage&amp; destImage, QImage* const frame,
                   const float percent, const QPair&lt;int, int&gt;&amp; realHeight){
    const int width = sourceImage.width();
    QRgb * const threadBuffer = new QRgb[width];

    static QMutex locker;

    for( int idxY = realHeight.first; idxY &lt; realHeight.second ; ++idxY){
        for( int idxX = 0; idxX &lt; width ; ++idxX){
            // ARGB quadruplet on the format #AARRGGBB
            int sourceValue = static_cast&lt;int&gt;(sourceImage.pixel(idxX, idxY));
            int destValue = static_cast&lt;int&gt;(destImage.pixel(idxX, idxY));

            const int red   = ((((destValue &gt;&gt; 16) &amp; 0xFF) - ((sourceValue &gt;&gt; 16) &amp; 0xFF)) * percent) + ((sourceValue &gt;&gt; 16) &amp; 0xFF);
            const int green = ((((destValue &gt;&gt;  <img src='http://berenger.eu/blog/wp-includes/images/smilies/icon_cool.gif' alt='8)' class='wp-smiley' /> &amp; 0xFF) - ((sourceValue &gt;&gt;  <img src='http://berenger.eu/blog/wp-includes/images/smilies/icon_cool.gif' alt='8)' class='wp-smiley' /> &amp; 0xFF)) * percent) + ((sourceValue &gt;&gt;  <img src='http://berenger.eu/blog/wp-includes/images/smilies/icon_cool.gif' alt='8)' class='wp-smiley' /> &amp; 0xFF);
            const int blue  = ((((destValue &gt;&gt;  0) &amp; 0xFF) - ((sourceValue &gt;&gt;  0) &amp; 0xFF)) * percent) + ((sourceValue &gt;&gt;  0) &amp; 0xFF);

            threadBuffer[idxX] = qRgb(red, green, blue);
        }
        locker.lock();
        for( int idxX = 0; idxX &lt; width ; ++idxX){
            frame-&gt;setPixel(idxX, idxY, threadBuffer[idxX]);
        }
        locker.unlock();
    }

    delete[] threadBuffer;
}

int ImageWorker::concurrentStep(const float percent, const QVector&lt; QPair&lt;int, int&gt; &gt;&amp; chunkSizes){
    QTime timer;
    timer.start();

    for( int idxThread = 0 ; idxThread &lt; QThread::idealThreadCount() ; ++idxThread){
        QtConcurrent::run(convertImages, sourceImage, destImage, &amp;frame, percent, chunkSizes[idxThread]);
    }

    // QThreadPool::globalInstance()-&gt;activeThreadCount()
    QThreadPool::globalInstance()-&gt;waitForDone();

    return timer.elapsed();
}
</pre>
<h3>QtOpencl</h3>
<pre class="brush: cpp; title: ; notranslate">
#include &lt;qclcontext.h&gt;
#include &lt;qclprogram.h&gt;
#include &lt;qclkernel.h&gt;
#include &lt;qclimage.h&gt;

void ImageWorker::runOpencl(){
    stopFlag = false;

    QCLContext context;

    if (!context.create())
        qFatal(&quot;Could not create OpenCL context&quot;);

    if (!context.create(QCLDevice::GPU))
        qFatal(&quot;Could not create OpenCL context&quot;);

    QCLProgram program = context.buildProgramFromSourceFile(QLatin1String(&quot;:/transferimage.cl&quot;));

    QCLImage2D sourceImageBuffer = context.createImage2DCopy(sourceImage, QCLMemoryObject::ReadOnly);
    QCLImage2D destImageBuffer = context.createImage2DCopy(destImage, QCLMemoryObject::ReadOnly);
    QCLImage2D frameBuffer = context.createImage2DDevice(frame.format(), frame.size(), QCLMemoryObject::WriteOnly);

    QCLKernel compute = program.createKernel(&quot;transfer&quot;);
    compute.setGlobalWorkSize(sourceImage.size());
    compute.setLocalWorkSize(8, 8);

    int totalTime        = 0;
    int nbFrameProcessed = 0;

    emit result(sourceImage, 0);

    QTime timerFrame;
    QTime timer;
    timer.start();
    while(!stopFlag &amp;&amp; timer.elapsed() &lt; TimeToDest){
        timerFrame.start();
        compute(sourceImageBuffer, destImageBuffer, frameBuffer, (timer.elapsed()/float(TimeToDest)) );
        frameBuffer.read(&amp;frame);

        totalTime        += timerFrame.elapsed();
        nbFrameProcessed += 1;
        const float fps   = nbFrameProcessed / (totalTime/1000.0);

        emit result(frame, fps);
    }

    const float fps   = nbFrameProcessed / (totalTime/1000.0);
    emit result(destImage, fps);
}

const sampler_t samp = CLK_ADDRESS_CLAMP_TO_EDGE |
                       CLK_FILTER_LINEAR;
__kernel void transfer(__read_only image2d_t sourceImage,
                       __read_only image2d_t destImage,
                       __write_only image2d_t frameImage,
                       float percent)
{
    int2 pos = (int2)(get_global_id(0), get_global_id(1));
    float4 sourceColor = read_imagef(sourceImage, samp, pos);
    float4 destColor = read_imagef(destImage, samp, pos);

    float4 frameColor;
    frameColor.x = ((destColor.x - sourceColor.x) * percent) + sourceColor.x;
    frameColor.y = ((destColor.y - sourceColor.y) * percent) + sourceColor.y;
    frameColor.z = ((destColor.z - sourceColor.z) * percent) + sourceColor.z;
    frameColor.w = sourceColor.w;

    write_imagef(frameImage, pos, clamp(frameColor, 0.0f, 1.0f));
}
</pre>
<h2>Download the code</h2>
<p><a href="http://berenger.eu/blog/wp-content/uploads/2011/11/TransferImageBerenger.zip">The code is here.</a></p>
<h2>References</h2>
<p>http://labs.qt.nokia.com/2010/04/07/using-opencl-with-qt/</p>
<p>http://doc.qt.nokia.com/opencl-snapshot/concurrent.html</p>
<div style='clear:both'></div>]]></content:encoded>
			<wfw:commentRss>http://berenger.eu/blog/2011/11/25/qtgpu-c-application-for-dynamic-transition-images-qtconcurrent-qtopencl/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>[GPU] Install Cuda Opencl on Ubuntu</title>
		<link>http://berenger.eu/blog/2011/11/25/gpu-install-cuda-opencl-on-ubuntu/</link>
		<comments>http://berenger.eu/blog/2011/11/25/gpu-install-cuda-opencl-on-ubuntu/#comments</comments>
		<pubDate>Fri, 25 Nov 2011 10:47:11 +0000</pubDate>
		<dc:creator>Berenger</dc:creator>
				<category><![CDATA[Others]]></category>
		<category><![CDATA[Programming]]></category>
		<category><![CDATA[GPU]]></category>
		<category><![CDATA[Resource]]></category>
		<category><![CDATA[Tutorial]]></category>

		<guid isPermaLink="false">http://berenger.eu/blog/?p=1236</guid>
		<description><![CDATA[The steps to install cuda, opencl for nvidia gpu on ubuntu. Keep default directories. Download what you need from NVidia website: http://developer.nvidia.com/cuda-downloads Take: &#8220;CUDA Toolkit for Ubuntu Linux 10.10&#8243; (gpucomputingsdk_4.0.17_linux) &#8220;GPU Computing SDK &#8211; complete package including all code samples&#8221; (cudatoolkit_4.0.17_linux_32_ubuntu10.10.run) And if you want (not recommended): &#8220;Developer Drivers for Linux&#8221; (devdriver_4.0_linux_32_270.41.19.run) Install NVidia Driver [...]]]></description>
			<content:encoded><![CDATA[<p>The steps to install cuda, opencl for nvidia gpu on ubuntu.<br />
<span id="more-1236"></span><br />
Keep default directories.</p>
<p>Download what you need from NVidia website:<br />
<a title="http://developer.nvidia.com/cuda-downloads" href="http://developer.nvidia.com/cuda-downloads" target="_blank">http://developer.nvidia.com/cuda-downloads</a></p>
<p>Take:<br />
&#8220;CUDA Toolkit for Ubuntu Linux 10.10&#8243; (gpucomputingsdk_4.0.17_linux)<br />
&#8220;GPU Computing SDK &#8211; complete package including all code samples&#8221; (cudatoolkit_4.0.17_linux_32_ubuntu10.10.run)<br />
And if you want (not recommended):<br />
&#8220;Developer Drivers for Linux&#8221; (devdriver_4.0_linux_32_270.41.19.run)</p>
<h2>Install NVidia Driver</h2>
<p><span style="color: #ff0000;">I recommend not to install Official NVidia Driver!!</span><br />
(I did the first time and had a lot of problems!)<br />
Use the safest install:</p>
<pre class="brush: bash; title: ; notranslate">
sudo apt-get install linux-headers-generic
sudo apt-get install nvidia-current
sudo nvidia-xconfig
</pre>
<p>If you need to remove bad drivers:</p>
<pre class="brush: bash; title: ; notranslate">
sudo apt-get remove --purge nvidia-current
</pre>
<p>Anyway, if you prefer you can install official nvidia package:</p>
<pre class="brush: bash; title: ; notranslate">
sudo /etc/init.d/gdm stop
sudo sh devdriver_4.0_linux_32_270.41.19.run
reboot
</pre>
<h2>Install SDK</h2>
<p><a href="http://developer.nvidia.com/cuda-downloads" target="_blank">SDK has to be downloaded from the NVidia offical website.</a></p>
<p>Run the downloaded package:</p>
<pre class="brush: bash; title: ; notranslate">
sudo sh cudatoolkit_4.0.17_linux_32_ubuntu10.10.run
</pre>
<p>Then change your path and log dirs:</p>
<pre class="brush: bash; title: ; notranslate">
gedit ~/.bashrc
</pre>
<p>And paste:</p>
<pre class="brush: bash; title: ; notranslate">
# opencl
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/lib
export PATH=$PATH:/usr/local/cuda/lib:/usr/local/cuda/bin
</pre>
<p>(note, in case of 32bits you do not need /usr/local/cuda/lib64)</p>
<p>Restart your terminal or tape:</p>
<pre class="brush: bash; title: ; notranslate">
source ~/.bashrc
</pre>
<h2>Install Examples</h2>
<p><a href="http://developer.nvidia.com/cuda-downloads" target="_blank">Examples have to be downloaded from the NVidia offical website.</a></p>
<pre class="brush: bash; title: ; notranslate">
sudo sh gpucomputingsdk_4.0.17_linux
</pre>
<p>I need to install some dependencies:</p>
<pre class="brush: bash; title: ; notranslate">
sudo apt-get install libxmu-dev libxmu6
sudo apt-get install freeglut3-dev
</pre>
<p>Then compile OpenCl examples (remplace user name with yours):</p>
<pre class="brush: bash; title: ; notranslate">
cd ~
sudo chown -R “user name” NVIDIA_GPU_Computing_SDK
sudo chmod -R 777 NVIDIA_GPU_Computing_SDK
cd NVIDIA_GPU_Computing_SDK
cd OpenCL
make
</pre>
<p>Run examples in (for example run oclNbody)</p>
<pre class="brush: bash; title: ; notranslate">
cd ~/NVIDIA_GPU_Computing_SDK/OpenCL/bin/linux/release
./oclNbody
</pre>
<h2>References</h2>
<p>http://vgerscorner.wordpress.com/2010/10/24/opencl-ubuntu-install-guide/</p>
<p>http://forums.nvidia.com/index.php?showtopic=87692h2</p>
<div style='clear:both'></div>]]></content:encoded>
			<wfw:commentRss>http://berenger.eu/blog/2011/11/25/gpu-install-cuda-opencl-on-ubuntu/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>[C++][Omp] OpenMP version _OPENMP directive</title>
		<link>http://berenger.eu/blog/2011/10/17/comp-openmp-version-_openmp-directive/</link>
		<comments>http://berenger.eu/blog/2011/10/17/comp-openmp-version-_openmp-directive/#comments</comments>
		<pubDate>Mon, 17 Oct 2011 08:16:40 +0000</pubDate>
		<dc:creator>Berenger</dc:creator>
				<category><![CDATA[C++]]></category>
		<category><![CDATA[Programming]]></category>
		<category><![CDATA[openmp]]></category>
		<category><![CDATA[Resource]]></category>

		<guid isPermaLink="false">http://berenger.eu/blog/?p=1230</guid>
		<description><![CDATA[Sometime you may want to know what version of openmp you are using at compile time. This is possible using the _OPENMP directive. Based on the specification: http://openmp.org/wp/openmp-specifications/ You can notice that the openmp vers. 3.0 has been released in 2008.05 From this information you can use task or not depending on the version of [...]]]></description>
			<content:encoded><![CDATA[<p>Sometime you may want to know what version of openmp you are using at compile time.<br />
This is possible using the _OPENMP directive.</p>
<p><span id="more-1230"></span></p>
<p>Based on the specification:<br />
<a href="http://openmp.org/wp/openmp-specifications/" target="_blank">http://openmp.org/wp/openmp-specifications/</a><br />
You can notice that the openmp vers. 3.0 has been released in 2008.05<br />
From this information you can use task or not depending on the version of your openmp.</p>
<pre class="brush: cpp; title: ; notranslate">
#include &lt;cstdio&gt;
#include &lt;omp.h&gt;

int main(){
#if _OPENMP &gt;= 200805
    // I use tasks
    printf(&quot;_OPENMP &gt;= 200805 (vers = %d)\n&quot;,_OPENMP);
#else
    // Tasks do not exist...
    printf(&quot;_OPENMP &lt; 200805 (vers = %d)\n&quot;,_OPENMP);
#endif

    return 0;
}
</pre>
<div style='clear:both'></div>]]></content:encoded>
			<wfw:commentRss>http://berenger.eu/blog/2011/10/17/comp-openmp-version-_openmp-directive/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>[C++][Mpi] Bitonic parallel Sort (Bitonic Sorting network in parallel)</title>
		<link>http://berenger.eu/blog/2011/10/14/cmpi-bitonic-parallel-sort-bitonic-sorting-network-in-parallel/</link>
		<comments>http://berenger.eu/blog/2011/10/14/cmpi-bitonic-parallel-sort-bitonic-sorting-network-in-parallel/#comments</comments>
		<pubDate>Fri, 14 Oct 2011 13:23:47 +0000</pubDate>
		<dc:creator>Berenger</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://berenger.eu/blog/?p=1223</guid>
		<description><![CDATA[In this post I put the code of a bitonic sorting in parallel. The method are templatized so you can use it as you like. Be aware that this version need a number of processes power of 2. Reference Library Support for Parallel Sorting in Scientific Computations http://web.mst.edu/~ercal/387/P3/pr-proj-3.pdf http://en.wikipedia.org/wiki/Bitonic_sort http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm http://en.wikipedia.org/wiki/Bisection_method The Code]]></description>
			<content:encoded><![CDATA[<p>In this post I put the code of a bitonic sorting in parallel.<br />
The method are templatized so you can use it as you like.</p>
<p>Be aware that this version need a number of processes power of 2.</p>
<p><span id="more-1223"></span></p>
<h2> Reference </h2>
<p><a href="http://www.google.com/url?sa=t&#038;source=web&#038;cd=1&#038;ved=0CBoQFjAA&#038;url=http%3A%2F%2Fwww.tu-chemnitz.de%2Finformatik%2FPI%2Fforschung%2Fpub%2Fdownload%2FDHR_europar07.ps&#038;ei=bzeYTrn2OYjLsgbJ8ayABA&#038;usg=AFQjCNHIGl-01REpZ6PRDlsJsIPcESE-JQ&#038;sig2=o4MrhhQ8a3lwRSa5RQ4njQ" target="_blank">Library Support for Parallel Sorting in Scientific Computations</a><br />
<a href="http://web.mst.edu/~ercal/387/P3/pr-proj-3.pdf" title="A good document" target="_blank">http://web.mst.edu/~ercal/387/P3/pr-proj-3.pdf</a><br />
<a href="http://en.wikipedia.org/wiki/Bitonic_sort" target="_blank">http://en.wikipedia.org/wiki/Bitonic_sort</a><br />
<a href="http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm" target="_blank">http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm</a><br />
<a href="http://en.wikipedia.org/wiki/Bisection_method" target="_blank">http://en.wikipedia.org/wiki/Bisection_method</a></p>
<h2> The Code </h2>
<pre class="brush: cpp; title: ; notranslate">
#include &lt;cstdio&gt;
#include &lt;cstdlib&gt;
#include &lt;cmath&gt;

#include &lt;mpi.h&gt;

////////////////////////////////////////////////////////////////
// Sequential sort first!
////////////////////////////////////////////////////////////////

template &lt;class SortType&gt;
inline void swap(SortType&amp; v1, SortType&amp; v2){
    const SortType tmp = v1;
    v1 = v2;
    v2 = tmp;
}

template &lt;class SortType, class CompareType, class FSize&gt;
int partition(SortType* const array, FSize left, FSize right){
    SortType part = right;
    swap(array[part],array[(right+left) / 2]);
    --right;

    while(true){
        while( CompareType(array[left]) &lt; CompareType(array[part])){
            ++left;
        }
        while(right &gt;= left &amp;&amp; CompareType(array[part]) &lt;= CompareType(array[right])){
            --right;
        }
        if(right &lt; left) break;

        swap(array[left],array[right]);
        ++left;
        --right;
    }

    swap(array[part],array[left]);

    return left;
}

template &lt;class SortType, class CompareType, class FSize&gt;
void qs(SortType* const array, const FSize left, const FSize right){
    if(left &lt; right){
        const FSize part = partition&lt;SortType,CompareType&gt;(array, left, right);
        qs&lt;SortType,CompareType&gt;(array,part + 1,right);
        qs&lt;SortType,CompareType&gt;(array,left,part - 1);
    }
}

template &lt;class SortType, class CompareType, class FSize&gt;
void quick(SortType* const array, const FSize size){
    qs&lt;SortType,CompareType&gt;(array,0,size-1);
}

////////////////////////////////////////////////////////////////
// Bitonic parallel sort !
////////////////////////////////////////////////////////////////

// Mpi flag
static const int FlagMin = 5;
static const int FlagMax = 6;
static const int FlagMinMess = 4;
static const int FlagMaxMess = 3;

// This function exchange data with the other rank,
// its send the max value and receive min value
template &lt;class SortType, class CompareType, class FSize&gt;
void sendMaxAndGetMin(SortType array[], const FSize size, const int otherRank){
    FSize left  = -1;
    FSize right = size - 1;
    FSize pivot = left + (right - left + 1)/2;
    CompareType otherValue = -1;
    CompareType tempCompareValue = CompareType(array[pivot]);
    MPI_Sendrecv(&amp;tempCompareValue,sizeof(CompareType),MPI_BYTE,otherRank,FlagMin,&amp;otherValue,sizeof(CompareType),MPI_BYTE,otherRank,FlagMax,MPI_COMM_WORLD,MPI_STATUS_IGNORE);

    while( pivot != left &amp;&amp; pivot != right  &amp;&amp; array[pivot] != otherValue) {

        if( array[pivot] &lt; otherValue ){
            left = pivot;
        }
        else {
            right = pivot;
        }
        pivot = left + (right - left + 1)/2;
        tempCompareValue = CompareType(array[pivot]);

        MPI_Sendrecv(&amp;tempCompareValue,sizeof(CompareType),MPI_BYTE,otherRank,FlagMin,&amp;otherValue,sizeof(CompareType),MPI_BYTE,otherRank,FlagMax,MPI_COMM_WORLD,MPI_STATUS_IGNORE);
    }

    if( otherValue &lt;= array[pivot] ){
        MPI_Sendrecv_replace(&amp;array[pivot], (size - pivot) * sizeof(SortType) , MPI_BYTE,
                               otherRank, FlagMinMess, otherRank, FlagMaxMess,
                               MPI_COMM_WORLD, MPI_STATUS_IGNORE);

    }
    else if( array[pivot] &lt; otherValue){
        if(pivot != size - 1){
            MPI_Sendrecv_replace(&amp;array[pivot + 1], (size - pivot - 1) * sizeof(SortType) , MPI_BYTE,
                                   otherRank, FlagMinMess, otherRank, FlagMaxMess,
                                   MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        }
    }

}

// This function exchange data with the other rank,
// its send the min value and receive max value
template &lt;class SortType, class CompareType, class FSize&gt;
void sendMinAndGetMax(SortType array[], const FSize size, const int otherRank){
    FSize left  = 0;
    FSize right = size ;
    FSize pivot = left + (right - left)/2;
    CompareType otherValue = -1;
    CompareType tempCompareValue = CompareType(array[pivot]);
    MPI_Sendrecv(&amp;tempCompareValue,sizeof(CompareType),MPI_BYTE,otherRank,FlagMax,&amp;otherValue,sizeof(CompareType),MPI_BYTE,otherRank,FlagMin,MPI_COMM_WORLD,MPI_STATUS_IGNORE);

    while(  pivot != left  &amp;&amp; array[pivot] != otherValue) {

        if( array[pivot] &lt; otherValue ){
            left = pivot;
        }
        else {
            right = pivot;
        }
        pivot = left + (right - left)/2;
        tempCompareValue = CompareType(array[pivot]);
        MPI_Sendrecv(&amp;tempCompareValue,sizeof(CompareType),MPI_BYTE,otherRank,FlagMax,&amp;otherValue,sizeof(CompareType),MPI_BYTE,otherRank,FlagMin,MPI_COMM_WORLD,MPI_STATUS_IGNORE);
    }

    if( array[pivot] &lt;= otherValue ){
        MPI_Sendrecv_replace(&amp;array[0], (pivot + 1) * sizeof(SortType) , MPI_BYTE,
                               otherRank, FlagMaxMess, otherRank, FlagMinMess,
                               MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    }
    else if( otherValue &lt; array[pivot]){
        if(pivot != 0){
            MPI_Sendrecv_replace(&amp;array[0], (pivot) * sizeof(SortType) , MPI_BYTE,
                                   otherRank, FlagMaxMess, otherRank, FlagMinMess,
                                   MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        }
    }
}

/*
From :

http://web.mst.edu/~ercal/387/P3/pr-proj-3.pdf

Parallel Bitonic Sort Algorithm for processor Pk (for k := 0 . . . P − 1)
d:= log P
// cube dimension
sort(local − datak ) // sequential sort
// Bitonic Sort follows
for i:=1 to d do
    window-id = Most Significant (d-i) bits of Pk
    for j:=(i-1) down to 0 do
        if((window-id is even AND j th bit of Pk = 0)
        OR (window-id is odd AND j th bit of Pk = 1))
            then call CompareLow(j)
        else
            call CompareHigh(j)
        endif
    endfor
endfor
  */
template &lt;class SortType, class CompareType, class FSize&gt;
void bitonic(SortType array[], const FSize size, const int np, const int rank){
    quick&lt;SortType,CompareType&gt;(array, size);

    const int logNp = log2(np);
    for(int bitIdx = 1 ; bitIdx &lt;= logNp ; ++bitIdx){
        // window-id = Most Significant (d-i) bits of Pk
        const int diBit =  (rank &gt;&gt; bitIdx) &amp; 0x1;

        for(int otherBit = bitIdx - 1 ; otherBit &gt;= 0 ; --otherBit){
            // if((window-id is even AND j th bit of Pk = 0)
            // OR (window-id is odd AND j th bit of Pk = 1))

            const int myOtherBit = (rank &gt;&gt; otherBit) &amp; 0x1;
            const int otherRank = rank ^ (1 &lt;&lt; otherBit);

            if( diBit != myOtherBit ){
                sendMinAndGetMax&lt;SortType,CompareType&gt;(array, size, otherRank);
            }
            else{
                sendMaxAndGetMin&lt;SortType,CompareType&gt;(array, size, otherRank);
            }
            // A merge sort is possible since the array is composed
            // by two part already sorted, but we want to do this in space
            quick&lt;SortType,CompareType&gt;(array, size);
        }
    }
}

////////////////////////////////////////////////////////////////
// Utils
////////////////////////////////////////////////////////////////

template &lt;class SortType, class FSize&gt;
bool isSorted(const SortType array[], const FSize size){
    for(int idx = 1 ; idx &lt; size ; ++idx){
        if( array[idx-1] &gt; array[idx]){
            return false;
        }
    }
    return true;
}

void print(const int array[], const int size, const int rank){
    for(int idx = 0 ; idx &lt; size ; ++idx){
        printf(&quot;array[%d][%d] = %d\n&quot;, rank, idx, array[idx]);
    }
}

int main(int argc, char ** argv){
    MPI_Init(&amp;argc, &amp;argv);

    int rank = 0;
    int nprocs = 0;

    MPI_Comm_size(MPI_COMM_WORLD,&amp;nprocs);
    MPI_Comm_rank(MPI_COMM_WORLD,&amp;rank);

    const int Size = 500;
    long long array[Size];
    srand(Size);

    for(int idx = 0 ; idx &lt; Size ; ++idx){
        //array[idx] = nprocs - rank;
        //array[idx] = rank;
        array[idx] = Size * (rand()/float(RAND_MAX));
    }

    bitonic&lt;long long,int&gt;(array, Size, nprocs, rank);
    //print(array, Size, rank);

    int sorted = isSorted(array,Size);
    bool localySorted = false;
    MPI_Reduce( &amp;sorted, &amp;localySorted, 1, MPI_INT, MPI_LAND , 0, MPI_COMM_WORLD );

    int*const allExtrem = new int[nprocs * 2];
    int extrem[2];
    extrem[0] = array[0];
    extrem[1] = array[Size-1];
    MPI_Gather(extrem, 2, MPI_INT, allExtrem, 2, MPI_INT, 0, MPI_COMM_WORLD);

    printf(sorted?&quot;Is sorted\n&quot;:&quot;NO is not sorted\n&quot;);

    if( rank == 0){
        printf(localySorted?&quot;All sorted\n&quot;:&quot;NO all not sorted\n&quot;);

        int extremOk = true;
        for(int idxProc = 1 ; idxProc &lt; nprocs &amp;&amp; extremOk; ++idxProc){
            if( allExtrem[2 * (idxProc - 1) + 1] &gt; allExtrem[2 * idxProc]){
                extremOk = false;
            }
        }

        printf(extremOk?&quot;Extrem ok\n&quot;:&quot;NO extrem error\n&quot;);
    }

    delete[] allExtrem;

    MPI_Finalize();

    return 0;
}
</pre>
<div style='clear:both'></div>]]></content:encoded>
			<wfw:commentRss>http://berenger.eu/blog/2011/10/14/cmpi-bitonic-parallel-sort-bitonic-sorting-network-in-parallel/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
	</channel>
</rss>

