I’m attempting to develop a custom attention mechanism with the cuDNN graph API, but I’m facing some challenges. My aim is to set up a basic version that links two matrix multiplication operations, avoiding the intermediate steps for the moment.
The issue arises when I try to finalize an execution plan using any of the configurations from the engine heuristic; it consistently returns CUDNN_STATUS_NOT_SUPPORTED
. I’ve verified that the tensor shapes and data types are in accordance with what is needed, but there seems to be an underlying problem.
Here are the dimensions for the tensors I’m dealing with:
query_dims: 4: 1 1 10 64
query_strides: 4: 640 640 64 1
key_dims: 4: 1 1 64 10
key_strides: 4: 640 640 1 64
scores_dims: 4: 1 1 10 10
scores_strides: 4: 100 100 10 1
value_dims: 4: 1 1 10 64
value_strides: 4: 640 640 64 1
result_dims: 4: 1 1 10 64
result_strides: 4: 640 640 64 1
While my implementation involves creating the tensor descriptors and the matrix multiplication procedures, the finalization of the execution plan continually fails. What could be the cause of this issue, and how can I modify it to ensure that at least one engine configuration works?
#include <cudnn.h>
#include <iostream>
#include <vector>
#define CHECK_CUDNN(call) \
{ \
cudnnStatus_t err = call; \
if (err != CUDNN_STATUS_SUCCESS) { \
printf("cuDNN error at %s:%d - %s\n", __FILE__, __LINE__, cudnnGetErrorString(err)); \
exit(1); \
} \
}
void display_dims(const std::vector<int64_t> &dims, std::string label) {
std::cout << label << ": " << dims.size() << ": ";
for (auto d : dims) {
std::cout << d << " ";
}
std::cout << std::endl;
}
std::vector<int64_t> compute_strides(const std::vector<int64_t> &dims) {
return {dims[1] * dims[2] * dims[3], dims[2] * dims[3], dims[3], 1};
}
cudnnBackendDescriptor_t create_tensor_desc(const std::vector<int64_t> &dims,
const std::vector<int64_t> &strides,
int64_t tensor_id,
cudnnDataType_t dtype,
int64_t alignment,
bool virtual_tensor,
bool use_reordering = false) {
cudnnBackendDescriptor_t descriptor;
CHECK_CUDNN(cudnnBackendCreateDescriptor(CUDNN_BACKEND_TENSOR_DESCRIPTOR, &descriptor));
CHECK_CUDNN(cudnnBackendSetAttribute(descriptor, CUDNN_ATTR_TENSOR_UNIQUE_ID,
CUDNN_TYPE_INT64, 1, &tensor_id));
CHECK_CUDNN(cudnnBackendSetAttribute(descriptor, CUDNN_ATTR_TENSOR_DATA_TYPE,
CUDNN_TYPE_DATA_TYPE, 1, &dtype));
CHECK_CUDNN(cudnnBackendSetAttribute(descriptor, CUDNN_ATTR_TENSOR_BYTE_ALIGNMENT,
CUDNN_TYPE_INT64, 1, &alignment));
CHECK_CUDNN(cudnnBackendSetAttribute(descriptor, CUDNN_ATTR_TENSOR_DIMENSIONS,
CUDNN_TYPE_INT64, (int64_t)dims.size(), dims.data()));
CHECK_CUDNN(cudnnBackendSetAttribute(descriptor, CUDNN_ATTR_TENSOR_STRIDES,
CUDNN_TYPE_INT64, (int64_t)strides.size(), strides.data()));
CHECK_CUDNN(cudnnBackendSetAttribute(descriptor, CUDNN_ATTR_TENSOR_IS_VIRTUAL,
CUDNN_TYPE_BOOLEAN, 1, &virtual_tensor));
if (use_reordering) {
cudnnBackendTensorReordering_t reorder_mode = CUDNN_TENSOR_REORDERING_F16x16;
CHECK_CUDNN(cudnnBackendSetAttribute(descriptor, CUDNN_ATTR_TENSOR_REORDERING_MODE,
CUDNN_TYPE_TENSOR_REORDERING_MODE, 1, &reorder_mode));
}
CHECK_CUDNN(cudnnBackendFinalize(descriptor));
return descriptor;
}
int main() {
std::vector<int64_t> query_shape = {1, 1, 10, 64};
std::vector<int64_t> query_stride = compute_strides(query_shape);
std::vector<int64_t> key_shape = {1, 1, 10, 64};
std::vector<int64_t> key_stride = compute_strides(key_shape);
std::swap(key_shape[2], key_shape[3]);
std::swap(key_stride[2], key_stride[3]);
std::vector<int64_t> value_shape = {1, 1, 10, 64};
std::vector<int64_t> value_stride = compute_strides(value_shape);
std::vector<int64_t> attention_shape = {query_shape[0], query_shape[1],
query_shape[2], key_shape[3]};
std::vector<int64_t> attention_stride = compute_strides(attention_shape);
std::vector<int64_t> result_shape = {query_shape[0], query_shape[1],
query_shape[2], value_shape[3]};
std::vector<int64_t> result_stride = compute_strides(result_shape);
cudnnHandle_t cudnn_handle;
CHECK_CUDNN(cudnnCreate(&cudnn_handle));
cudnnDataType_t compute_dtype = CUDNN_DATA_FLOAT;
cudnnDataType_t tensor_dtype = CUDNN_DATA_HALF;
int64_t byte_align = 2;
auto query_tensor = create_tensor_desc(query_shape, query_stride, 101, tensor_dtype, byte_align, false);
auto key_tensor = create_tensor_desc(key_shape, key_stride, 102, tensor_dtype, byte_align, false);
auto value_tensor = create_tensor_desc(value_shape, value_stride, 103, tensor_dtype, byte_align, false);
auto attention_tensor = create_tensor_desc(attention_shape, attention_stride, 104, tensor_dtype, byte_align, true, true);
auto result_tensor = create_tensor_desc(result_shape, result_stride, 105, tensor_dtype, byte_align, false);
display_dims(query_shape, "query_dims");
display_dims(query_stride, "query_strides");
display_dims(key_shape, "key_dims");
display_dims(key_stride, "key_strides");
display_dims(attention_shape, "scores_dims");
display_dims(attention_stride, "scores_strides");
display_dims(value_shape, "value_dims");
display_dims(value_stride, "value_strides");
display_dims(result_shape, "result_dims");
display_dims(result_stride, "result_strides");
cudnnBackendDescriptor_t first_matmul_desc;
CHECK_CUDNN(cudnnBackendCreateDescriptor(CUDNN_BACKEND_MATMUL_DESCRIPTOR, &first_matmul_desc));
CHECK_CUDNN(cudnnBackendSetAttribute(first_matmul_desc, CUDNN_ATTR_MATMUL_COMP_TYPE,
CUDNN_TYPE_DATA_TYPE, 1, &compute_dtype));
CHECK_CUDNN(cudnnBackendFinalize(first_matmul_desc));
cudnnBackendDescriptor_t first_operation;
CHECK_CUDNN(cudnnBackendCreateDescriptor(CUDNN_BACKEND_OPERATION_MATMUL_DESCRIPTOR, &first_operation));
CHECK_CUDNN(cudnnBackendSetAttribute(first_operation, CUDNN_ATTR_OPERATION_MATMUL_DESC,
CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &first_matmul_desc));
CHECK_CUDNN(cudnnBackendSetAttribute(first_operation, CUDNN_ATTR_OPERATION_MATMUL_ADESC,
CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &query_tensor));
CHECK_CUDNN(cudnnBackendSetAttribute(first_operation, CUDNN_ATTR_OPERATION_MATMUL_BDESC,
CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &key_tensor));
CHECK_CUDNN(cudnnBackendSetAttribute(first_operation, CUDNN_ATTR_OPERATION_MATMUL_CDESC,
CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &attention_tensor));
CHECK_CUDNN(cudnnBackendFinalize(first_operation));
cudnnBackendDescriptor_t second_matmul_desc;
CHECK_CUDNN(cudnnBackendCreateDescriptor(CUDNN_BACKEND_MATMUL_DESCRIPTOR, &second_matmul_desc));
CHECK_CUDNN(cudnnBackendSetAttribute(second_matmul_desc, CUDNN_ATTR_MATMUL_COMP_TYPE,
CUDNN_TYPE_DATA_TYPE, 1, &compute_dtype));
CHECK_CUDNN(cudnnBackendFinalize(second_matmul_desc));
cudnnBackendDescriptor_t second_operation;
CHECK_CUDNN(cudnnBackendCreateDescriptor(CUDNN_BACKEND_OPERATION_MATMUL_DESCRIPTOR, &second_operation));
CHECK_CUDNN(cudnnBackendSetAttribute(second_operation, CUDNN_ATTR_OPERATION_MATMUL_DESC,
CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &second_matmul_desc));
CHECK_CUDNN(cudnnBackendSetAttribute(second_operation, CUDNN_ATTR_OPERATION_MATMUL_ADESC,
CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &attention_tensor));
CHECK_CUDNN(cudnnBackendSetAttribute(second_operation, CUDNN_ATTR_OPERATION_MATMUL_BDESC,
CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &value_tensor));
CHECK_CUDNN(cudnnBackendSetAttribute(second_operation, CUDNN_ATTR_OPERATION_MATMUL_CDESC,
CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &result_tensor));
CHECK_CUDNN(cudnnBackendFinalize(second_operation));
cudnnBackendDescriptor_t operation_graph;
cudnnBackendDescriptor_t operations[] = {first_operation, second_operation};
CHECK_CUDNN(cudnnBackendCreateDescriptor(CUDNN_BACKEND_OPERATIONGRAPH_DESCRIPTOR, &operation_graph));
CHECK_CUDNN(cudnnBackendSetAttribute(operation_graph, CUDNN_ATTR_OPERATIONGRAPH_OPS,
CUDNN_TYPE_BACKEND_DESCRIPTOR, 2, operations));
CHECK_CUDNN(cudnnBackendSetAttribute(operation_graph, CUDNN_ATTR_OPERATIONGRAPH_HANDLE,
CUDNN_TYPE_HANDLE, 1, &cudnn_handle));
CHECK_CUDNN(cudnnBackendFinalize(operation_graph));
cudnnBackendDescriptor_t heuristic;
CHECK_CUDNN(cudnnBackendCreateDescriptor(CUDNN_BACKEND_ENGINEHEUR_DESCRIPTOR, &heuristic));
CHECK_CUDNN(cudnnBackendSetAttribute(heuristic, CUDNN_ATTR_ENGINEHEUR_OPERATION_GRAPH,
CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &operation_graph));
cudnnBackendHeurMode_t mode = CUDNN_HEUR_MODE_FALLBACK;
CHECK_CUDNN(cudnnBackendSetAttribute(heuristic, CUDNN_ATTR_ENGINEHEUR_MODE,
CUDNN_TYPE_HEUR_MODE, 1, &mode));
CHECK_CUDNN(cudnnBackendFinalize(heuristic));
int64_t config_count = 0;
CHECK_CUDNN(cudnnBackendGetAttribute(heuristic, CUDNN_ATTR_ENGINEHEUR_RESULTS,
CUDNN_TYPE_BACKEND_DESCRIPTOR, 0, &config_count, NULL));
std::vector<cudnnBackendDescriptor_t> engine_configs(config_count);
for (auto &config : engine_configs) {
CHECK_CUDNN(cudnnBackendCreateDescriptor(CUDNN_BACKEND_ENGINECFG_DESCRIPTOR, &config));
}
CHECK_CUDNN(cudnnBackendGetAttribute(heuristic, CUDNN_ATTR_ENGINEHEUR_RESULTS,
CUDNN_TYPE_BACKEND_DESCRIPTOR, config_count, nullptr, engine_configs.data()));
for (auto &config : engine_configs) {
cudnnBackendDescriptor_t plan;
CHECK_CUDNN(cudnnBackendCreateDescriptor(CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR, &plan));
CHECK_CUDNN(cudnnBackendSetAttribute(plan, CUDNN_ATTR_EXECUTION_PLAN_ENGINE_CONFIG,
CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &config));
CHECK_CUDNN(cudnnBackendSetAttribute(plan, CUDNN_ATTR_EXECUTION_PLAN_HANDLE,
CUDNN_TYPE_HANDLE, 1, &cudnn_handle));
cudnnStatus_t result = cudnnBackendFinalize(plan);
std::cout << cudnnGetErrorString(result) << "\n";
if (result == CUDNN_STATUS_SUCCESS) {
std::cout << "Found working configuration!\n";
}
}
return 0;
}