← 返回首页
Add support to cuDNN Dependency module to load verion 8 when available by 9prady9 · Pull Request #2963 · arrayfire/arrayfire · GitHub
Skip to content

Navigation Menu

Toggle navigation
Sign in
Appearance settings
Search or jump to...

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Include my email address so I can be contacted

Saved searches

Use saved searches to filter your results more quickly

Appearance settings
Resetting focus
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension .cmake  (1) .cpp  (3) .hpp  (2) All 3 file types selected Viewed files
Conversations
Failed to load comments. Retry
Loading
Jump to
Jump to file
Failed to load files. Retry
Loading
Diff view
Unified
Split
Hide whitespace
Apply and reload
Show whitespace
Diff view
Unified
Split
Hide whitespace
Apply and reload
16 changes: 11 additions & 5 deletions CMakeModules/FindcuDNN.cmake
Show comments View file Edit file Delete file Open in desktop
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ find_package(CUDA QUIET)
find_path(cuDNN_INCLUDE_DIRS
NAMES cudnn.h
HINTS
${PC_CUDNN_INCLUDE_DIRS}
${cuDNN_ROOT_DIR}
${PC_CUDNN_INCLUDE_DIRS}
${CUDA_TOOLKIT_INCLUDE}
PATH_SUFFIXES include
DOC "cuDNN include directory path." )
Expand All @@ -64,6 +64,12 @@ if(cuDNN_INCLUDE_DIRS)
file(READ ${cuDNN_INCLUDE_DIRS}/cudnn.h CUDNN_VERSION_FILE_CONTENTS)
string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)"
CUDNN_MAJOR_VERSION "${CUDNN_VERSION_FILE_CONTENTS}")
list(LENGTH CUDNN_MAJOR_VERSION cudnn_ver_matches)
if(${cudnn_ver_matches} EQUAL 0)
file(READ ${cuDNN_INCLUDE_DIRS}/cudnn_version.h CUDNN_VERSION_FILE_CONTENTS)
string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)"
CUDNN_MAJOR_VERSION "${CUDNN_VERSION_FILE_CONTENTS}")
endif()
string(REGEX REPLACE "define CUDNN_MAJOR * +([0-9]+)" "\\1"
CUDNN_MAJOR_VERSION "${CUDNN_MAJOR_VERSION}")
string(REGEX MATCH "define CUDNN_MINOR * +([0-9]+)"
Expand Down Expand Up @@ -94,10 +100,10 @@ if(cuDNN_INCLUDE_DIRS)
libcudnn.${cudnn_ver_suffix}.dylib
cudnn
PATHS
$ENV{LD_LIBRARY_PATH}
${libpath_cudart}
${cuDNN_ROOT_DIR}
${PC_CUDNN_LIBRARY_DIRS}
$ENV{LD_LIBRARY_PATH}
${libpath_cudart}
${CMAKE_INSTALL_PREFIX}
PATH_SUFFIXES lib lib64 bin lib/x64 bin/x64
DOC "cuDNN link library." )
Expand All @@ -106,10 +112,10 @@ if(cuDNN_INCLUDE_DIRS)
find_file(cuDNN_DLL_LIBRARY
NAMES cudnn64_${cudnn_ver_suffix}${CMAKE_SHARED_LIBRARY_SUFFIX}
PATHS
$ENV{PATH}
${libpath_cudart}
${cuDNN_ROOT_DIR}
${PC_CUDNN_LIBRARY_DIRS}
$ENV{PATH}
${libpath_cudart}
${CMAKE_INSTALL_PREFIX}
PATH_SUFFIXES lib lib64 bin lib/x64 bin/x64
DOC "cuDNN Windows DLL." )
Expand Down
125 changes: 103 additions & 22 deletions src/backend/cuda/convolveNN.cpp
Show comments View file Edit file Delete file Open in desktop
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,25 @@
#include <af/dim4.hpp>

#include <type_traits>
#include <utility>
#include <vector>

using af::dim4;
using common::flip;
using common::half;
using common::make_handle;
using std::conditional;
using std::is_same;
using std::pair;
using std::tie;
using std::vector;

namespace cuda {

#ifdef WITH_CUDNN

auto getLogger() { return getCudnnPlugin().getLogger(); }

template<typename Desc, typename T>
auto toCudnn(Array<T> arr) {
auto descriptor = make_handle<Desc>();
Expand All @@ -51,6 +58,49 @@ template<typename T>
using scale_type =
typename conditional<is_same<T, double>::value, double, float>::type;

pair<cudnnConvolutionFwdAlgo_t, size_t> getForwardAlgorithm(
cudnnHandle_t cudnn, cudnnTensorDescriptor_t input_descriptor,
cudnnFilterDescriptor_t filter_descriptor,
cudnnConvolutionDescriptor_t convolution_descriptor,
cudnnTensorDescriptor_t output_descriptor) {
cudnnConvolutionFwdAlgo_t convolution_algorithm;
size_t workspace_bytes = 0;

auto version = getCudnnPlugin().getVersion();
if (std::get<0>(version) >= 8) {
int maxAlgoCount = 0;
CUDNN_CHECK(cuda::cudnnGetConvolutionForwardAlgorithmMaxCount(
cudnn, &maxAlgoCount));

vector<cudnnConvolutionFwdAlgoPerf_t> perfResults(maxAlgoCount);
int returnAlgoCount = 0;
CUDNN_CHECK(cuda::cudnnFindConvolutionForwardAlgorithm(
cudnn, input_descriptor, filter_descriptor, convolution_descriptor,
output_descriptor, maxAlgoCount, &returnAlgoCount,
perfResults.data()));

for (int i = 0; i < returnAlgoCount; ++i) {
if (perfResults[i].status == CUDNN_STATUS_SUCCESS) {
convolution_algorithm = perfResults[i].algo;
workspace_bytes = perfResults[i].memory;
break;
}
}
} else {
const int memory_limit =
0; // TODO: set to remaining space in memory manager?
CUDNN_CHECK(cuda::cudnnGetConvolutionForwardAlgorithm(
cudnn, input_descriptor, filter_descriptor, convolution_descriptor,
output_descriptor, CUDNN_CONVOLUTION_FWD_PREFER_FASTEST,
memory_limit, &convolution_algorithm));
CUDNN_CHECK(cuda::cudnnGetConvolutionForwardWorkspaceSize(
cudnn, input_descriptor, filter_descriptor, convolution_descriptor,
output_descriptor, convolution_algorithm, &workspace_bytes));
}

return {convolution_algorithm, workspace_bytes};
}

template<typename T>
Array<T> convolve2_cudnn(const Array<T> &signal, const Array<T> &filter,
const dim4 &stride, const dim4 &padding,
Expand Down Expand Up @@ -88,19 +138,12 @@ Array<T> convolve2_cudnn(const Array<T> &signal, const Array<T> &filter,
auto output_descriptor = toCudnn<cudnnTensorDescriptor_t>(out);

// get convolution algorithm
const int memory_limit =
0; // TODO: set to remaining space in memory manager?
cudnnConvolutionFwdAlgo_t convolution_algorithm;
CUDNN_CHECK(cuda::cudnnGetConvolutionForwardAlgorithm(
cudnn, input_descriptor, filter_descriptor, convolution_descriptor,
output_descriptor, CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, memory_limit,
&convolution_algorithm));
size_t workspace_bytes = 0;

// figure out scratch space memory requirements
size_t workspace_bytes;
CUDNN_CHECK(cuda::cudnnGetConvolutionForwardWorkspaceSize(
cudnn, input_descriptor, filter_descriptor, convolution_descriptor,
output_descriptor, convolution_algorithm, &workspace_bytes));
tie(convolution_algorithm, workspace_bytes) =
getForwardAlgorithm(cudnn, input_descriptor, filter_descriptor,
convolution_descriptor, output_descriptor);

auto workspace_buffer = memAlloc<char>(workspace_bytes);

Expand Down Expand Up @@ -355,6 +398,48 @@ Array<T> filter_gradient_base(const Array<T> &incoming_gradient,
}

#ifdef WITH_CUDNN

pair<cudnnConvolutionBwdFilterAlgo_t, size_t> getBackwardFilterAlgorithm(
cudnnHandle_t cudnn, cudnnTensorDescriptor_t x_descriptor,
cudnnTensorDescriptor_t dy_descriptor,
cudnnConvolutionDescriptor_t convolution_descriptor,
cudnnFilterDescriptor_t dw_descriptor) {
// determine algorithm to use
cudnnConvolutionBwdFilterAlgo_t bwd_filt_convolution_algorithm;
// figure out scratch space memory requirements
size_t workspace_bytes = 0;

auto version = getCudnnPlugin().getVersion();
if (std::get<0>(version) >= 8) {
int maxAlgoCount = 0;
CUDNN_CHECK(cuda::cudnnGetConvolutionBackwardFilterAlgorithmMaxCount(
cudnn, &maxAlgoCount));

vector<cudnnConvolutionBwdFilterAlgoPerf_t> perfResults(maxAlgoCount);
int returnAlgoCount = 0;
CUDNN_CHECK(cuda::cudnnFindConvolutionBackwardFilterAlgorithm(
cudnn, x_descriptor, dy_descriptor, convolution_descriptor,
dw_descriptor, maxAlgoCount, &returnAlgoCount, perfResults.data()));

for (int i = 0; i < returnAlgoCount; ++i) {
if (perfResults[i].status == CUDNN_STATUS_SUCCESS) {
bwd_filt_convolution_algorithm = perfResults[i].algo;
workspace_bytes = perfResults[i].memory;
break;
}
}
} else {
CUDNN_CHECK(cuda::cudnnGetConvolutionBackwardFilterAlgorithm(
cudnn, x_descriptor, dy_descriptor, convolution_descriptor,
dw_descriptor, CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST, 0,
&bwd_filt_convolution_algorithm));
CUDNN_CHECK(cuda::cudnnGetConvolutionBackwardFilterWorkspaceSize(
cudnn, x_descriptor, dy_descriptor, convolution_descriptor,
dw_descriptor, bwd_filt_convolution_algorithm, &workspace_bytes));
}
return {bwd_filt_convolution_algorithm, workspace_bytes};
}

template<typename T>
Array<T> filter_gradient_cudnn(const Array<T> &incoming_gradient,
const Array<T> &original_signal,
Expand Down Expand Up @@ -384,19 +469,15 @@ Array<T> filter_gradient_cudnn(const Array<T> &incoming_gradient,

// determine algorithm to use
cudnnConvolutionBwdFilterAlgo_t bwd_filt_convolution_algorithm;
CUDNN_CHECK(cuda::cudnnGetConvolutionBackwardFilterAlgorithm(
cudnn, x_descriptor, dy_descriptor, convolution_descriptor,
dw_descriptor, CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST, 0,
&bwd_filt_convolution_algorithm));

// figure out scratch space memory requirements
size_t workspace_bytes;
CUDNN_CHECK(cuda::cudnnGetConvolutionBackwardFilterWorkspaceSize(
cudnn, x_descriptor, dy_descriptor, convolution_descriptor,
dw_descriptor, bwd_filt_convolution_algorithm, &workspace_bytes));
// prepare output array and scratch space
Array<T> out = createEmptyArray<T>(fDims);
size_t workspace_bytes = 0;

tie(bwd_filt_convolution_algorithm, workspace_bytes) =
getBackwardFilterAlgorithm(cudnn, x_descriptor, dy_descriptor,
convolution_descriptor, dw_descriptor);

// prepare output array and scratch space
Array<T> out = createEmptyArray<T>(fDims);
auto workspace_buffer = memAlloc<char>(workspace_bytes);

// perform convolution
Expand Down
118 changes: 87 additions & 31 deletions src/backend/cuda/cudnn.cpp
Show comments View file Edit file Delete file Open in desktop
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ const char *errorString(cudnnStatus_t err) {
return "CUDNN_STATUS_RUNTIME_IN_PROGRESS";
case CUDNN_STATUS_RUNTIME_FP_OVERFLOW:
return "CUDNN_STATUS_RUNTIME_FP_OVERFLOW";
#if CUDNN_VERSION >= 8000
case CUDNN_STATUS_VERSION_MISMATCH:
return "CUDNN_STATUS_VERSION_MISMATCH";
#endif
#endif
#endif
default: return "UNKNOWN";
Expand Down Expand Up @@ -171,16 +175,16 @@ cudnnStatus_t cudnnGetConvolutionNdForwardOutputDim(
convDesc, inputTensorDesc, filterDesc, nbDims, tensorOuputDimA);
}

cudnnStatus_t cudnnGetConvolutionForwardAlgorithm(
cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
const cudnnFilterDescriptor_t wDesc,
const cudnnConvolutionDescriptor_t convDesc,
const cudnnTensorDescriptor_t yDesc,
cudnnConvolutionFwdPreference_t preference, size_t memoryLimitInBytes,
cudnnConvolutionFwdAlgo_t *algo) {
return getCudnnPlugin().cudnnGetConvolutionForwardAlgorithm(
handle, xDesc, wDesc, convDesc, yDesc, preference, memoryLimitInBytes,
algo);
cudnnStatus_t cudnnGetConvolutionForwardAlgorithmMaxCount(cudnnHandle_t handle,
int *count) {
return getCudnnPlugin().cudnnGetConvolutionForwardAlgorithmMaxCount(handle,
count);
}

cudnnStatus_t cudnnGetConvolutionBackwardFilterAlgorithmMaxCount(
cudnnHandle_t handle, int *count) {
return getCudnnPlugin().cudnnGetConvolutionBackwardFilterAlgorithmMaxCount(
handle, count);
}

cudnnStatus_t cudnnGetConvolutionForwardWorkspaceSize(
Expand All @@ -193,16 +197,57 @@ cudnnStatus_t cudnnGetConvolutionForwardWorkspaceSize(
handle, xDesc, wDesc, convDesc, yDesc, algo, sizeInBytes);
}

cudnnStatus_t cudnnConvolutionForward(
cudnnHandle_t handle, const void *alpha,
const cudnnTensorDescriptor_t xDesc, const void *x,
const cudnnFilterDescriptor_t wDesc, const void *w,
const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo,
void *workSpace, size_t workSpaceSizeInBytes, const void *beta,
const cudnnTensorDescriptor_t yDesc, void *y) {
return getCudnnPlugin().cudnnConvolutionForward(
handle, alpha, xDesc, x, wDesc, w, convDesc, algo, workSpace,
workSpaceSizeInBytes, beta, yDesc, y);
cudnnStatus_t cudnnGetConvolutionBackwardFilterWorkspaceSize(
cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
const cudnnTensorDescriptor_t dyDesc,
const cudnnConvolutionDescriptor_t convDesc,
const cudnnFilterDescriptor_t gradDesc,
cudnnConvolutionBwdFilterAlgo_t algo, size_t *sizeInBytes) {
return getCudnnPlugin().cudnnGetConvolutionBackwardFilterWorkspaceSize(
handle, xDesc, dyDesc, convDesc, gradDesc, algo, sizeInBytes);
}

cudnnStatus_t cudnnFindConvolutionForwardAlgorithm(
cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
const cudnnFilterDescriptor_t wDesc,
const cudnnConvolutionDescriptor_t convDesc,
const cudnnTensorDescriptor_t yDesc, const int requestedAlgoCount,
int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults) {
return getCudnnPlugin().cudnnFindConvolutionForwardAlgorithm(
handle, xDesc, wDesc, convDesc, yDesc, requestedAlgoCount,
returnedAlgoCount, perfResults);
}

cudnnStatus_t cudnnFindConvolutionBackwardFilterAlgorithm(
cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
const cudnnTensorDescriptor_t dyDesc,
const cudnnConvolutionDescriptor_t convDesc,
const cudnnFilterDescriptor_t dwDesc, const int requestedAlgoCount,
int *returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) {
return getCudnnPlugin().cudnnFindConvolutionBackwardFilterAlgorithm(
handle, xDesc, dyDesc, convDesc, dwDesc, requestedAlgoCount,
returnedAlgoCount, perfResults);
}

cudnnStatus_t cudnnGetConvolutionForwardAlgorithm(
cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
const cudnnFilterDescriptor_t wDesc,
const cudnnConvolutionDescriptor_t convDesc,
const cudnnTensorDescriptor_t yDesc,
cudnnConvolutionFwdPreference_t preference, size_t memoryLimitInBytes,
cudnnConvolutionFwdAlgo_t *algo) {
auto version = getCudnnPlugin().getVersion();
if (std::get<0>(version) < 8) {
return getCudnnPlugin().cudnnGetConvolutionForwardAlgorithm(
handle, xDesc, wDesc, convDesc, yDesc, preference,
memoryLimitInBytes, algo);
} else {
AF_ERROR(
"cudnnGetConvolutionForwardAlgorithm has been removed since cuDNN "
"8",
AF_ERR_NOT_SUPPORTED);
return CUDNN_STATUS_SUCCESS;
}
}

cudnnStatus_t cudnnGetConvolutionBackwardFilterAlgorithm(
Expand All @@ -212,19 +257,30 @@ cudnnStatus_t cudnnGetConvolutionBackwardFilterAlgorithm(
const cudnnFilterDescriptor_t dwDesc,
cudnnConvolutionBwdFilterPreference_t preference, size_t memoryLimitInBytes,
cudnnConvolutionBwdFilterAlgo_t *algo) {
return getCudnnPlugin().cudnnGetConvolutionBackwardFilterAlgorithm(
handle, xDesc, dyDesc, convDesc, dwDesc, preference, memoryLimitInBytes,
algo);
auto version = getCudnnPlugin().getVersion();
if (std::get<0>(version) < 8) {
return getCudnnPlugin().cudnnGetConvolutionBackwardFilterAlgorithm(
handle, xDesc, dyDesc, convDesc, dwDesc, preference,
memoryLimitInBytes, algo);
} else {
AF_ERROR(
"cudnnGetConvolutionBackwardFilterAlgorithm has been removed since "
"cuDNN 8",
AF_ERR_NOT_SUPPORTED);
return CUDNN_STATUS_SUCCESS;
}
}

cudnnStatus_t cudnnGetConvolutionBackwardFilterWorkspaceSize(
cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
const cudnnTensorDescriptor_t dyDesc,
const cudnnConvolutionDescriptor_t convDesc,
const cudnnFilterDescriptor_t gradDesc,
cudnnConvolutionBwdFilterAlgo_t algo, size_t *sizeInBytes) {
return getCudnnPlugin().cudnnGetConvolutionBackwardFilterWorkspaceSize(
handle, xDesc, dyDesc, convDesc, gradDesc, algo, sizeInBytes);
cudnnStatus_t cudnnConvolutionForward(
cudnnHandle_t handle, const void *alpha,
const cudnnTensorDescriptor_t xDesc, const void *x,
const cudnnFilterDescriptor_t wDesc, const void *w,
const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo,
void *workSpace, size_t workSpaceSizeInBytes, const void *beta,
const cudnnTensorDescriptor_t yDesc, void *y) {
return getCudnnPlugin().cudnnConvolutionForward(
handle, alpha, xDesc, x, wDesc, w, convDesc, algo, workSpace,
workSpaceSizeInBytes, beta, yDesc, y);
}

cudnnStatus_t cudnnConvolutionBackwardFilter(
Expand Down
Loading
Toggle all file notes Toggle all file annotations

Footer

© 2026 GitHub, Inc.