GpuMat 1.0.1
A GpuMat for CS205 C/C++ Project4
Loading...
Searching...
No Matches
Classes | Macros | Functions
a1.cpp File Reference
#include <iostream>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <cuda_fp16.h>

Classes

class  Matrix< T >
 Base MAT class for GPU memory with reference counting. More...
 

Macros

#define uchar   unsigned char
 
#define MAT_8   1
 
#define MAT_8U   3
 
#define MAT_16   2
 
#define MAT_16I   5
 
#define MAT_32   4
 
#define MAT_64   8
 
#define BIG_LIMIT   1000000
 
#define CUDA_CHECK(call)
 检查CUDA函数的错误
 
#define CUFFT_CHECK(call)
 检查cuFFT函数的错误
 

Functions

template<typename T >
__global__ void matrixAddKernel (T *a, T *b, T *result, size_t rows, size_t cols)
 Add Kernel.
 
template<typename T >
__global__ void matrixSubtractKernel (T *a, T *b, T *result, size_t rows, size_t cols)
 Sub Kernel.
 
template<typename T >
__global__ void matrixMulKernel (T *a, T *b, T *result, size_t aRows, size_t aCols, size_t bCols)
 mul Kernel be faster! https://github.com/njuhope/cuda_sgemm/blob/master/gemm.cu
 
template<typename T >
__global__ void LUDecomposition (T *A, int n)
 LU Kernel be faster! https://github.com/njuhope/cuda_sgemm/blob/master/gemm.cu.
 

Macro Definition Documentation

◆ BIG_LIMIT

#define BIG_LIMIT   1000000

◆ CUDA_CHECK

#define CUDA_CHECK ( call)
Value:
do { \
fprintf(stderr, "CUDA error in file '%s' in line %i: %s\n", \
} \
} while(0)
__global__ void matrixMulKernel(T *a, T *b, T *result, size_t aRows, size_t aCols, size_t bCols)
mul Kernel be faster! https://github.com/njuhope/cuda_sgemm/blob/master/gemm.cu
Definition a1.cpp:244

检查CUDA函数的错误

Parameters
call

◆ CUFFT_CHECK

#define CUFFT_CHECK ( call)
Value:
do { \
fprintf(stderr, "cuFFT error in file '%s' in line %i\n", \
} \
} while(0)

检查cuFFT函数的错误

Parameters
call

◆ MAT_16

#define MAT_16   2

◆ MAT_16I

#define MAT_16I   5

◆ MAT_32

#define MAT_32   4

◆ MAT_64

#define MAT_64   8

◆ MAT_8

#define MAT_8   1

◆ MAT_8U

#define MAT_8U   3

◆ uchar

#define uchar   unsigned char

Function Documentation

◆ LUDecomposition()

template<typename T >
__global__ void LUDecomposition ( T * A,
int n )

LU Kernel be faster! https://github.com/njuhope/cuda_sgemm/blob/master/gemm.cu.

Template Parameters
T
Parameters
array1
n
Returns
global

◆ matrixAddKernel()

template<typename T >
__global__ void matrixAddKernel ( T * a,
T * b,
T * result,
size_t rows,
size_t cols )

Add Kernel.

Template Parameters
T
Parameters
array1
array2
result_array
rows
cols
Returns
global

◆ matrixMulKernel()

template<typename T >
__global__ void matrixMulKernel ( T * a,
T * b,
T * result,
size_t aRows,
size_t aCols,
size_t bCols )

mul Kernel be faster! https://github.com/njuhope/cuda_sgemm/blob/master/gemm.cu

Template Parameters
T
Parameters
array1
array2
result_array
rows
cols
Returns
global

◆ matrixSubtractKernel()

template<typename T >
__global__ void matrixSubtractKernel ( T * a,
T * b,
T * result,
size_t rows,
size_t cols )

Sub Kernel.

Template Parameters
T
Parameters
array1
array2
result_array
rows
cols
Returns
global