[Qt][GPU] C++ Application for dynamic transition images (QtConcurrent, QtOpenCL)
In this post, I will briefly introduce my first test of the qtopencl tool.
What
I developed a basic application that goes from one image to another in a given time step (4s but it is customizable).
Lets say we have 3 pixels, source (from source image) dest (from destination image) and frame (from the computed image).
- At time 0: frame = source
- At time STEP: frame = dest
- At time STEP/2: frame is between source and dest.
More generally,
frame(r,g,b) = (dest(r,g,b) – source(r,g,b)) * percent + source(r,g,b)
Examples:
The application
So the application lets you choose the images, choose the type of computation (thread,sequential,gpu) and start stop.
The efficency
Images size : 1024 x 768
Processors : i7 (4 x CPU) 2.8 GHz
Ubuntu : 11.04
Transfer Time : 4s
Sequential FPS : 6.3 f/s
Thread FPS : 25.s f/s
GPU FPS : 145.2 f/s
Processing Code
Sequential
void ImageWorker::runSequential(){
stopFlag = false;
int totalTime = 0;
int nbFrameProcessed = 0;
// emit source
emit result(sourceImage, 0);
QTime timer;
timer.start();
// compute until TimeToDest
while(!stopFlag && timer.elapsed() < TimeToDest){
totalTime += sequentialStep( (timer.elapsed()/float(TimeToDest)) );
nbFrameProcessed += 1;
const float fps = nbFrameProcessed / (totalTime/1000.0);
emit result(frame, fps);
}
// emit dest
const float fps = nbFrameProcessed / (totalTime/1000.0);
emit result(destImage, fps);
}
int ImageWorker::sequentialStep(const float percent){
QTime timer;
timer.start();
for( int idxX = 0; idxX < sourceImage.width() ; ++idxX){
for( int idxY = 0; idxY < sourceImage.height() ; ++idxY){
// ARGB quadruplet on the format #AARRGGBB convert into signed
int sourceValue = static_cast<int>(sourceImage.pixel(idxX, idxY));
int destValue = static_cast<int>(destImage.pixel(idxX, idxY));
const int red = ((((destValue >> 16) & 0xFF) - ((sourceValue >> 16) & 0xFF)) * percent) + ((sourceValue >> 16) & 0xFF);
const int green = ((((destValue >>
& 0xFF) - ((sourceValue >>
& 0xFF)) * percent) + ((sourceValue >>
& 0xFF);
const int blue = ((((destValue >> 0) & 0xFF) - ((sourceValue >> 0) & 0xFF)) * percent) + ((sourceValue >> 0) & 0xFF);
// set value into intermediate image
frame.setPixel(idxX, idxY, qRgb(red, green, blue));
}
}
return timer.elapsed();
}
QConcurent
void ImageWorker::runConcurrent(){
stopFlag = false;
QVector< QPair<int, int> > chunkSizes(QThread::idealThreadCount());
const double aChunk = ceil(double(sourceImage.height())/QThread::idealThreadCount());
chunkSizes[0] = QPair<int, int>(0, aChunk);
for( int idxThread = 1 ; idxThread < QThread::idealThreadCount() ; ++idxThread){
chunkSizes[idxThread].first = chunkSizes[idxThread-1].second;
chunkSizes[idxThread].second = (idxThread + 1) * aChunk;
}
chunkSizes[QThread::idealThreadCount() - 1].second = sourceImage.height();
int totalTime = 0;
int nbFrameProcessed = 0;
emit result(sourceImage, 0);
QTime timer;
timer.start();
while(!stopFlag && timer.elapsed() < TimeToDest){
totalTime += concurrentStep( (timer.elapsed()/float(TimeToDest)), chunkSizes );
nbFrameProcessed += 1;
const float fps = nbFrameProcessed / (totalTime/1000.0);
emit result(frame, fps);
}
const float fps = nbFrameProcessed / (totalTime/1000.0);
emit result(destImage, fps);
}
void convertImages(const QImage& sourceImage, const QImage& destImage, QImage* const frame,
const float percent, const QPair<int, int>& realHeight){
const int width = sourceImage.width();
QRgb * const threadBuffer = new QRgb[width];
static QMutex locker;
for( int idxY = realHeight.first; idxY < realHeight.second ; ++idxY){
for( int idxX = 0; idxX < width ; ++idxX){
// ARGB quadruplet on the format #AARRGGBB
int sourceValue = static_cast<int>(sourceImage.pixel(idxX, idxY));
int destValue = static_cast<int>(destImage.pixel(idxX, idxY));
const int red = ((((destValue >> 16) & 0xFF) - ((sourceValue >> 16) & 0xFF)) * percent) + ((sourceValue >> 16) & 0xFF);
const int green = ((((destValue >>
& 0xFF) - ((sourceValue >>
& 0xFF)) * percent) + ((sourceValue >>
& 0xFF);
const int blue = ((((destValue >> 0) & 0xFF) - ((sourceValue >> 0) & 0xFF)) * percent) + ((sourceValue >> 0) & 0xFF);
threadBuffer[idxX] = qRgb(red, green, blue);
}
locker.lock();
for( int idxX = 0; idxX < width ; ++idxX){
frame->setPixel(idxX, idxY, threadBuffer[idxX]);
}
locker.unlock();
}
delete[] threadBuffer;
}
int ImageWorker::concurrentStep(const float percent, const QVector< QPair<int, int> >& chunkSizes){
QTime timer;
timer.start();
for( int idxThread = 0 ; idxThread < QThread::idealThreadCount() ; ++idxThread){
QtConcurrent::run(convertImages, sourceImage, destImage, &frame, percent, chunkSizes[idxThread]);
}
// QThreadPool::globalInstance()->activeThreadCount()
QThreadPool::globalInstance()->waitForDone();
return timer.elapsed();
}
QtOpencl
#include <qclcontext.h>
#include <qclprogram.h>
#include <qclkernel.h>
#include <qclimage.h>
void ImageWorker::runOpencl(){
stopFlag = false;
QCLContext context;
if (!context.create())
qFatal("Could not create OpenCL context");
if (!context.create(QCLDevice::GPU))
qFatal("Could not create OpenCL context");
QCLProgram program = context.buildProgramFromSourceFile(QLatin1String(":/transferimage.cl"));
QCLImage2D sourceImageBuffer = context.createImage2DCopy(sourceImage, QCLMemoryObject::ReadOnly);
QCLImage2D destImageBuffer = context.createImage2DCopy(destImage, QCLMemoryObject::ReadOnly);
QCLImage2D frameBuffer = context.createImage2DDevice(frame.format(), frame.size(), QCLMemoryObject::WriteOnly);
QCLKernel compute = program.createKernel("transfer");
compute.setGlobalWorkSize(sourceImage.size());
compute.setLocalWorkSize(8, 8);
int totalTime = 0;
int nbFrameProcessed = 0;
emit result(sourceImage, 0);
QTime timerFrame;
QTime timer;
timer.start();
while(!stopFlag && timer.elapsed() < TimeToDest){
timerFrame.start();
compute(sourceImageBuffer, destImageBuffer, frameBuffer, (timer.elapsed()/float(TimeToDest)) );
frameBuffer.read(&frame);
totalTime += timerFrame.elapsed();
nbFrameProcessed += 1;
const float fps = nbFrameProcessed / (totalTime/1000.0);
emit result(frame, fps);
}
const float fps = nbFrameProcessed / (totalTime/1000.0);
emit result(destImage, fps);
}
const sampler_t samp = CLK_ADDRESS_CLAMP_TO_EDGE |
CLK_FILTER_LINEAR;
__kernel void transfer(__read_only image2d_t sourceImage,
__read_only image2d_t destImage,
__write_only image2d_t frameImage,
float percent)
{
int2 pos = (int2)(get_global_id(0), get_global_id(1));
float4 sourceColor = read_imagef(sourceImage, samp, pos);
float4 destColor = read_imagef(destImage, samp, pos);
float4 frameColor;
frameColor.x = ((destColor.x - sourceColor.x) * percent) + sourceColor.x;
frameColor.y = ((destColor.y - sourceColor.y) * percent) + sourceColor.y;
frameColor.z = ((destColor.z - sourceColor.z) * percent) + sourceColor.z;
frameColor.w = sourceColor.w;
write_imagef(frameImage, pos, clamp(frameColor, 0.0f, 1.0f));
}
Download the code
References
http://labs.qt.nokia.com/2010/04/07/using-opencl-with-qt/
http://doc.qt.nokia.com/opencl-snapshot/concurrent.html
Subscribe to the RSS feed and have all new posts delivered straight to you.


