|
A kernel launch operation is a run time operation
Consider the following CUDA program with an error launching the kernel function:
#include <stdio.h>
#include <unistd.h>
__global__ void hello( )
{
printf("threadIdx.x=%d\n", threadIdx.x);
}
int main()
{
hello<<< 1, 1025 >>>( ); // Error: #threads >= 1024 !!!
printf("I am the CPU: Hello World ! \n");
cudaDeviceSynchronize();
}
|
It compiles,
but
you
do not see the
outputs from the
CUDA threads !!!
DEMO:
/home/cs355001/demo/CUDA/1-intro/hello-error.cu
Background information:
|
In next slide, I will show an example on how to use the cudaGetLastError( ) function.
Consider the CUDA program with an error launching the kernel function:
#include <stdio.h>
#include <unistd.h>
__global__ void hello( )
{
printf("threadIdx.x=%d\n", threadIdx.x);
}
int main()
{
hello<<< 1, 1025 >>>( ); // Error: #threads >= 1024 !!!
printf("I am the CPU: Hello World ! \n");
cudaDeviceSynchronize();
}
|
We will add a call to cudaGetLastError() after the launch code to check error status....
You should (must ?) always check for error after you launch a grid:
__global__ void hello( )
{
printf("threadIdx.x=%d\n", threadIdx.x);
}
int main()
{
hello<<< 1, 1025 >>>( ); // Error: #threads >= 1024 !!!
cudaError_t err = cudaGetLastError(); // Get last error code
if ( err != cudaSuccess )
{
printf("CUDA Error: %s\n", cudaGetErrorString(err));
exit(-1);
}
printf("I am the CPU: Hello World ! \n");
cudaDeviceSynchronize();
}
|
DEMO: /home/cs355001/demo/CUDA/1-intro/hello-error2.cu