diff --git a/CUDA/includes/attention.cuh b/CUDA/includes/attention.cuh new file mode 100644 index 0000000..7feac08 --- /dev/null +++ b/CUDA/includes/attention.cuh @@ -0,0 +1,29 @@ +#pragma once + +#include "tensor.cuh" + +#include + +namespace quadtrix { +namespace cuda { + +Status attention_forward( + const TensorView& input_qkv, + TensorView preatt, + TensorView att, + TensorView output, + int num_heads, + cudaStream_t stream = nullptr); + +Status attention_backward( + const TensorView& grad_output, + const TensorView& input_qkv, + const TensorView& att, + TensorView grad_input_qkv, + TensorView grad_preatt, + TensorView grad_att, + int num_heads, + cudaStream_t stream = nullptr); + +} // namespace cuda +} // namespace quadtrix