date: 2022-08-23 11:10:23

tags: cuda cpp

make_unique_with_cuda

#include "stdio.h"
#include <memory>
namespace cuda
{
    template <typename T>
    [[nodiscard]] static auto malloc(std::size_t const size)
    {
        //nodiscard implies must use it return value, or will encounter an error
        static T *d{nullptr};
        cudaMalloc(&d, sizeof(T) * size);
        return d;
    }
    template <typename T>
    static void free(T *ptr)
    {
        if (ptr)
        {
            cudaFree(ptr);
            ptr=nullptr;
        }
    }
    template <typename T>
    [[nodiscard]] static auto makeUnique(std::size_t size)
    {
        return std::unique_ptr<T[], decltype(&free<T>)> { malloc<T>(size), free<T> };
    }
} // namespace name
__global__ void kernel(float3 *d)
{
    int id = threadIdx.x;
    // d[id].x=d[id].y=d[id].z=id*1.1;
    printf("%g\t%g\t%g\n", d[id].x, d[id].y, d[id].z);
}
void showh(float3 *d)
{
    for (int id = 0; id < 5; id++)
    {
        printf("%g\t%g\t%g\n", d[id].x, d[id].y, d[id].z);
    }
}
using namespace cuda;
int main(void)
{
    auto const count = 5;
    auto hp_points{std::make_unique<float3[]>(count)};
    for (int i = 0; i < count; i++)
    {
        hp_points[i].x=i*1.1;
        hp_points[i].y=i*1.1;
        hp_points[i].z=i*1.1;
    }
    // showh(hp_points.get());
    auto dp_points{cuda::makeUnique<float3>(count)};
    cudaMemcpy(dp_points.get(),hp_points.get(), //get() will return unique ptr's address
    sizeof(float3)*count,cudaMemcpyHostToDevice);
    kernel<<<1,5>>>(dp_points.get());
    cudaDeviceSynchronize();
    dp_points.~unique_ptr();
    return 0;
}