|
|
|
|
|
|
__managed__ global-C-varible-definition
|
Example:
__managed__ int x; __managed__ float A[100]; |
Note:
|
__managed__ int x; // Defines shared variable !!!
__global__ void GPU_func( )
{
printf("GPU sees x = %d\n", x);
x = 4444;
}
int main()
{
x = 1234;
GPU_func<<< 1, 1 >>>( ); // Run 1 thread
printf("CPU sees x = %d\n", x);
cudaDeviceSynchronize( ); // Wait for GPU function to print
return 0;
}
|
Purpose of the program:
|
/home/cs355001/demo/CUDA/2-unified-mem/shared-global-fail
Output:
CPU sees x = 1234
GPU sees x = 1234 // What's wrong ???
|
Expanation of the observed phenomenom:
|
|
Example:
__managed__ int x; // Defines shared variable !!!
__global__ void GPU_func( )
{
printf("GPU sees x = %d\n", x);
x = 4444;
}
int main()
{
x = 1234;
GPU_func<<< 1, 1 >>>( ); // Start GPU function
cudaDeviceSynchronize(); // Wait until GPU kernel function finishes !!
printf("CPU sees x = %d\n", x); // Now obtain the result !!
return 0;
}
|
/home/cs355001/demo/CUDA/2-unified-mem/shared-global
Output:
GPU sees x = 1234 // GPU prints FIRST !!!
CPU sees x = 4444 // CPU waits until GPU is done !!!
|