I’m going to post something about my trip to Italy in a bit, after I get my pics uploaded properly to Flickr. But I wanted to put this up so I don’t forget about it later.

A conversation at work got me interested enough in GPGPU calculations to start doing a bit of research into the topic. Seems that at a base level both the ATI and NVidia APIs behave similarly, and that OpenCL is a nice, understandable alternative if you don’t want to be locked in to a single vendor. (Not that I use ATI cards.) I tried getting CUDA working on my Mac, but between weirdness caused by the drivers and the fact that it doesn’t support 64-bit on OSX, I’ve decided to go with OpenCL instead for my tests. Functionally everything will be the same, and after a nice tutorial on ATI’s site detailing some introductory points I have code that compiles and does what I want it to do, which is more than I can say for CUDA.

Anyway, here’s my first test. I wanted something simple for getting device information off my laptop.

#include <iostream>
using namespace std;

#include <stdio.h>
#include <stdlib.h>

#include <OpenGL/gl.h>
#include <OpenGL/CGLDevice.h>
#include <OpenCL/cl.h>
#include <OpenCL/cl_gl.h>
#include <OpenCL/cl_gl_ext.h>
#include <OpenCL/cl_ext.h>
 
#define MSG_SIZE 4096
#define MAX_DEVICE_IDS 16

#define RT_STRING       0
#define RT_UINT         1
#define RT_BOOL         2
#define RT_PLATFORMID   3
#define RT_SIZET        5
#define RT_SIZET_ARR    6

void printDeviceInfo(cl_device_id deviceID) {
   
    cl_device_info param_list[] = {
        CL_DEVICE_NAME, CL_DEVICE_VENDOR, CL_DRIVER_VERSION, CL_DEVICE_VERSION,
        CL_DEVICE_PROFILE,
        CL_DEVICE_PLATFORM, CL_DEVICE_VENDOR_ID,
        CL_DEVICE_MAX_COMPUTE_UNITS, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
        CL_DEVICE_MAX_WORK_ITEM_SIZES, CL_DEVICE_MAX_WORK_GROUP_SIZE,
       
        NULL
    };
   
    int param_return_types[] = {
        RT_STRING, RT_STRING, RT_STRING, RT_STRING,
        RT_STRING, RT_PLATFORMID, RT_UINT,
        RT_UINT, RT_UINT, RT_SIZET_ARR, RT_SIZET,
       
        NULL
    };
   
    char *param_strings[] = {
        "Device Name", "Vendor", "Driver Version", "Device Version",
        "Device Profile", "Device Platform", "Device Vendor ID",
        "Max Compute Units", "Max Work Item Dimensions",
        "Max Work Item Sizes", "Max Work Group Size",
   
        NULL
    };
   
    int i = 0;
    int maxWorkItemDimensions = 3;
   
    while(param_strings[i] != NULL) {
        char    msg[MSG_SIZE];
        size_t  param_sizeT;
        size_t  param_sizeT_array[MSG_SIZE];
        cl_uint param_uint;
        cl_bool param_bool;
        size_t  param_value_ret;
        cl_platform_id param_platformID;
       
        cl_int error;
       
        switch (param_return_types[i]) {
            case RT_STRING:
                error = clGetDeviceInfo(deviceID,
                                        param_list[i],
                                        MSG_SIZE,
                                        msg,
                                        &param_value_ret
                                        );

                cout << param_strings[i] << ": " << msg << "\n";
                break;

            case RT_UINT:
                error = clGetDeviceInfo(deviceID,
                                        param_list[i],
                                        sizeof(cl_uint),
                                        &param_uint,
                                        NULL
                                        );

                cout << param_strings[i] << ": " << param_uint << "\n";
                break;
               
               
            case RT_BOOL:
                error = clGetDeviceInfo(deviceID,
                                        param_list[i],
                                        sizeof(cl_bool),
                                        &param_bool,
                                        NULL
                                        );

                cout << param_strings[i] << ": " << (param_bool ? "True" : "False") << "\n";
                break;
               
            case RT_PLATFORMID:
                error = clGetDeviceInfo(deviceID,
                                        param_list[i],
                                        sizeof(cl_platform_id),
                                        param_platformID,
                                        NULL
                                        );

                cout << param_strings[i] << ": " << param_platformID << "\n";
                break;
           
            case RT_SIZET:
                error = clGetDeviceInfo(deviceID,
                                        param_list[i],
                                        sizeof(size_t),
                                        &param_sizeT,
                                        NULL
                                        );

                cout << param_strings[i] << ": " << param_sizeT << "\n";
                break;
               
            case RT_SIZET_ARR:
                error = clGetDeviceInfo(deviceID,
                                        param_list[i],
                                        sizeof(size_t) * maxWorkItemDimensions,
                                        param_sizeT_array,
                                        &param_value_ret
                                        );

                cout << param_strings[i] << ": " << param_sizeT_array[0] << ", "
                    << param_sizeT_array[1] << ", "
                    << param_sizeT_array[2] << "\n";
                break;
               
               
               
            default:
                break;
        }
       
        i++;
    }
}

int main(int argc, char **argv) {
    cl_device_id deviceID;
    cl_uint numDevices;

    cl_uint err = clGetDeviceIDs(
                NULL,                  
                CL_DEVICE_TYPE_CPU,    
                MAX_DEVICE_IDS,        
                &deviceID,             
                &numDevices
                );
   
    cout << "Number of CPU devices: " << numDevices << "\n";
   
    for (int i = 0; i < numDevices; i++) {
        printDeviceInfo(deviceID);
    }

    cout << "\n\n";

   
   

    err = clGetDeviceIDs(
                NULL,
                CL_DEVICE_TYPE_GPU,
                MAX_DEVICE_IDS,
                &deviceID,
                &numDevices
                );
   
    cout << "Number of GPU devices: " << numDevices << "\n";
   
    for (int i = 0; i < numDevices; i++) {
        printDeviceInfo(deviceID);
    }
   
    cout << "\n\n";
   
}
 

(Hope that copy-and-pasted correctly.) To compile it on your Mac:

g++ test2.cpp -o test2 -framework OpenCL

Here are the results on my laptop, a 2010 Core i7 MacBook Pro 15″:

Number of CPU devices: 1
Device Name: Intel(R) Core(TM) i7 CPU       M 620  @ 2.67GHz
Vendor: Intel
Driver Version: 1.0
Device Version: OpenCL 1.0
Device Profile: FULL_PROFILE
Device Platform: 0
Device Vendor ID: 16909312
Max Compute Units: 4
Max Work Item Dimensions: 3
Max Work Item Sizes: 1, 1, 1
Max Work Group Size: 1

Number of GPU devices: 1 Device Name: GeForce GT 330M Vendor: NVIDIA Driver Version: CLH 1.0 Device Version: OpenCL 1.0 Device Profile: FULL_PROFILE Device Platform: 0 Device Vendor ID: 16918016 Max Compute Units: 6 Max Work Item Dimensions: 3 Max Work Item Sizes: 512, 512, 64 Max Work Group Size: 512

Next step: getting the GPU to do some calculations.