C/C++教程

cuda编程 pycuda

本文主要是介绍cuda编程 pycuda,对大家解决编程问题具有一定的参考价值,需要的程序猿们随着小编来一起学习吧!

CUDA】grid、block、thread的关系及thread索引的计算
https://hujingshuang.blog.csdn.net/article/details/53097222

目录
  • 例子1
  • 例子2

例子1

import torch
print(torch.version.cuda) # 11.0
print(torch.__version__) # 1.7.0

import numpy
import pycuda.autoinit
import pycuda.driver as cuda
from pycuda.compiler import SourceModule

mod = SourceModule("""
__global__ void matrix_mul(float *dest, float *a, float *b, int width)
{
    int i = threadIdx.x + blockDim.x * blockIdx.x;
    int j = threadIdx.y + blockDim.y * blockIdx.y;

    float sum = 0;
    for(int k=0;k<width;k++)
    {
        float a_k = a[j*width+k];
        float b_k = b[k*width+i];
        sum += a_k*b_k;
    }
    dest[j*width+i] = sum;
}
""")

matrix_mul = mod.get_function("matrix_mul")

a = numpy.random.randn(400, 400).astype(numpy.float32)
b = numpy.random.randn(400, 400).astype(numpy.float32)
dest = numpy.zeros_like(a)
width = numpy.int32(400)

matrix_mul(cuda.Out(dest), cuda.In(a), cuda.In(b), width, block=(16, 16, 1), grid=(25, 25))

print(dest)

print("="*10)
print(numpy.dot(a,b))

例子2

from __future__ import print_function, division
import os
from PIL import Image
import torch
import torch.utils.data
import torchvision
from skimage import io
from torch.utils.data import Dataset
import random
import numpy as np
import pickle
import lmdb
import sys
import cv2, numpy as np
import sys



import torch
print(torch.version.cuda) # 11.0
print(torch.__version__) # 1.7.0

import pycuda.autoinit
import pycuda.driver as cuda
from pycuda.compiler import SourceModule

mod = SourceModule("""
__global__ void test(float *a, float *dest)
{
 float x = threadIdx.y; //[0,3)
    float y = blockIdx.x; //[0,2)
    
    
    int width = 4;
    int height = 3;
    int dest_width = 3;
    int dest_height = 2;
    int n=0;
    int srcIdxOffl = width * (height * (2 * n + 0) + (int) (y + (float) 0.0)) + (int) (x + (float) 0.0);
    float label = (a[srcIdxOffl]);
    dest[dest_width * (dest_height * (2 * n + 0) + (int) y) + (int) x] = label;

}
""")



a = np.arange(24).reshape(2, 3, 4).astype(np.float32)
a[0][0][0] = -1
b = np.zeros([2,2,3]).astype(np.float32)

test = mod.get_function("test")

test(cuda.In(a),cuda.Out(b), block=(1, 3, 1), grid=(2, 1, 1))

ccc = 0
这篇关于cuda编程 pycuda的文章就介绍到这儿,希望我们推荐的文章对大家有所帮助,也希望大家多多支持为之网!