#include <stdio.h>
#include <iostream>
using namespace std;
__global__ void hello_from_gpu()
{
const int b = blockIdx.x;
const int tx = threadIdx.x;
const int ty = threadIdx.y;
// cout<<b<<endl;
printf("Hello World from block-%d and thread-(%d, %d)!\n", b, tx, ty);
}
int main(void)
{
const dim3 block_size(2, 4);
hello_from_gpu<<<2, block_size>>>();
cudaDeviceSynchronize();
return 0;
}
nvcc hello5.cu -o hello5
./hello5
Hello World from block-1 and thread-(0, 0)!
Hello World from block-1 and thread-(1, 0)!
Hello World from block-1 and thread-(0, 1)!
Hello World from block-1 and thread-(1, 1)!
Hello World from block-1 and thread-(0, 2)!
Hello World from block-1 and thread-(1, 2)!
Hello World from block-1 and thread-(0, 3)!
Hello World from block-1 and thread-(1, 3)!
Hello World from block-0 and thread-(0, 0)!
Hello World from block-0 and thread-(1, 0)!
Hello World from block-0 and thread-(0, 1)!
Hello World from block-0 and thread-(1, 1)!
Hello World from block-0 and thread-(0, 2)!
Hello World from block-0 and thread-(1, 2)!
Hello World from block-0 and thread-(0, 3)!
Hello World from block-0 and thread-(1, 3)!