#include <iostream>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <cuda_fp16.h>
|
template<typename T > |
__global__ void | matrixAddKernel (T *a, T *b, T *result, size_t rows, size_t cols) |
| Add Kernel.
|
|
template<typename T > |
__global__ void | matrixSubtractKernel (T *a, T *b, T *result, size_t rows, size_t cols) |
| Sub Kernel.
|
|
template<typename T > |
__global__ void | matrixMulKernel (T *a, T *b, T *result, size_t aRows, size_t aCols, size_t bCols) |
| mul Kernel be faster! https://github.com/njuhope/cuda_sgemm/blob/master/gemm.cu
|
|
template<typename T > |
__global__ void | LUDecomposition (T *A, int n) |
| LU Kernel be faster! https://github.com/njuhope/cuda_sgemm/blob/master/gemm.cu.
|
|
◆ BIG_LIMIT
◆ CUDA_CHECK
Value:do { \
} \
} while(0)
__global__ void matrixMulKernel(T *a, T *b, T *result, size_t aRows, size_t aCols, size_t bCols)
mul Kernel be faster! https://github.com/njuhope/cuda_sgemm/blob/master/gemm.cu
Definition a1.cpp:244
检查CUDA函数的错误
- Parameters
-
◆ CUFFT_CHECK
Value:
检查cuFFT函数的错误
- Parameters
-
◆ MAT_16
◆ MAT_16I
◆ MAT_32
◆ MAT_64
◆ MAT_8
◆ MAT_8U
◆ uchar
◆ LUDecomposition()
◆ matrixAddKernel()
Add Kernel.
- Template Parameters
-
- Parameters
-
array1 | |
array2 | |
result_array | |
rows | |
cols | |
- Returns
- global
◆ matrixMulKernel()
template<typename T >
__global__ void matrixMulKernel |
( |
T * | a, |
|
|
T * | b, |
|
|
T * | result, |
|
|
size_t | aRows, |
|
|
size_t | aCols, |
|
|
size_t | bCols ) |
◆ matrixSubtractKernel()
Sub Kernel.
- Template Parameters
-
- Parameters
-
array1 | |
array2 | |
result_array | |
rows | |
cols | |
- Returns
- global