Skip to content

Commit 0c704ce

Browse files
author
Gopalakrishnan Nallasamy
committed
Harden OneHot operator input validation and output size computation
- Add overflow check in PrepareOutputShape using SafeInt for output size and prefix_dim_size multiplication to prevent unbounded allocation when depth or indices shape would overflow int64 - Guard against division by zero when prefix_dim_size is zero - Add CUDA int32 range validation before fast_divmod to avoid silent truncation in gsl::narrow_cast for suffix_dim_size and depth_val * suffix_dim_size - Check for nullptr from Output() in both CPU and CUDA Compute paths - Add unit tests: depth overflow (two variants), negative depth, depth=1 edge case, scalar-indices rejection (ONNX spec requires rank>=1), and opset 9 coverage
1 parent 0c3e5fc commit 0c704ce

3 files changed

Lines changed: 111 additions & 3 deletions

File tree

onnxruntime/core/providers/cpu/tensor/onehot.cc

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,12 @@ limitations under the License.
1616

1717
#include "core/providers/cpu/tensor/onehot.h"
1818
#include "core/common/eigen_common_wrapper.h"
19+
#include "core/common/safeint.h"
1920
#include "core/platform/env.h"
2021
#include "core/providers/common.h"
2122

23+
#include <limits>
24+
2225
#ifndef EIGEN_USE_THREADS
2326
#define EIGEN_USE_THREADS
2427
#endif
@@ -100,11 +103,28 @@ Status PrepareOutputShape(const Tensor* indices, const int64_t depth_val, const
100103

101104
output_shape.insert(output_shape.begin() + true_axis, depth_val);
102105

103-
prefix_dim_size = 1;
106+
// Validate that the total output tensor element count does not overflow int64.
107+
{
108+
int64_t total_elements = 1;
109+
for (auto dim : output_shape) {
110+
if (dim > 0 && total_elements > std::numeric_limits<int64_t>::max() / dim) {
111+
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
112+
"OneHot: output tensor size would overflow for the given indices shape "
113+
"and depth value (", depth_val, ").");
114+
}
115+
total_elements *= dim;
116+
}
117+
}
118+
119+
// Use SafeInt for prefix_dim_size computation to guard against overflow.
120+
SafeInt<int64_t> safe_prefix = 1;
104121
for (int64_t i = 0; i < true_axis; ++i) {
105-
prefix_dim_size *= indices_dims[onnxruntime::narrow<size_t>(i)];
122+
safe_prefix *= indices_dims[onnxruntime::narrow<size_t>(i)];
106123
}
107-
suffix_dim_size = indices_shape.Size() / prefix_dim_size;
124+
prefix_dim_size = safe_prefix;
125+
126+
// Guard against division by zero when indices have a zero-sized dimension before the axis.
127+
suffix_dim_size = (prefix_dim_size > 0) ? (indices_shape.Size() / prefix_dim_size) : 0;
108128

109129
return Status::OK();
110130
}
@@ -166,6 +186,7 @@ Status OneHotOp<in_type, out_type, depth_type>::Compute(OpKernelContext* p_op_ke
166186
// allocate output
167187
const auto* values_data = values->Data<out_type>();
168188
Tensor* output = p_op_kernel_context->Output(0, TensorShape(output_shape));
189+
ORT_RETURN_IF_NOT(output, "OneHot: failed to allocate output tensor. Output shape may be too large.");
169190

170191
// edge case where we have a dim with a value of 0
171192
if (output->Shape().Size() == 0)

onnxruntime/core/providers/cuda/tensor/onehot.cc

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33

44
#include "core/providers/cuda/tensor/onehot.h"
55

6+
#include <limits>
7+
68
using namespace onnxruntime::common;
79

810
namespace onnxruntime {
@@ -55,11 +57,22 @@ Status OneHotOp<in_type, out_type, depth_type>::ComputeInternal(OpKernelContext*
5557
// allocate output
5658
const auto* values_data = reinterpret_cast<const CudaT_Out*>(values->Data<out_type>());
5759
Tensor* output = ctx->Output(0, TensorShape(output_shape));
60+
ORT_RETURN_IF_NOT(output, "OneHot: failed to allocate output tensor. Output shape may be too large.");
5861

5962
// edge case where we have a dim with a value of 0
6063
if (output->Shape().Size() == 0)
6164
return Status::OK();
6265

66+
// Validate that dimensions used by CUDA kernels fit in int32 range.
67+
// fast_divmod requires int32 operands.
68+
constexpr int64_t kInt32Max = std::numeric_limits<int>::max();
69+
ORT_RETURN_IF_NOT(suffix_dim_size <= kInt32Max,
70+
"OneHot: suffix dimension size (", suffix_dim_size,
71+
") exceeds int32 range supported by the CUDA kernel.");
72+
ORT_RETURN_IF_NOT(depth_val <= kInt32Max / std::max(suffix_dim_size, int64_t{1}),
73+
"OneHot: depth (", depth_val, ") * suffix dimension size (", suffix_dim_size,
74+
") exceeds int32 range supported by the CUDA kernel.");
75+
6376
const fast_divmod fdm_suffix(gsl::narrow_cast<int>(suffix_dim_size));
6477
const auto* indices_data = indices->Data<in_type>();
6578
auto* output_data = reinterpret_cast<CudaT_Out*>(output->MutableData<out_type>());

onnxruntime/test/providers/cpu/tensor/onehot_op_test.cc

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
// Copyright (c) Microsoft Corporation. All rights reserved.
22
// Licensed under the MIT License.
33

4+
#include <limits>
5+
46
#include "gtest/gtest.h"
57
#include "test/providers/provider_test_utils.h"
68
#include "test/common/trt_op_test_utils.h"
@@ -499,6 +501,78 @@ TEST(OneHotOpTest, DimWithZero) {
499501
test.Run();
500502
}
501503

504+
// Test that extremely large depth values that would cause output tensor size overflow are rejected.
505+
TEST(OneHotOpTest, DepthTooLarge_OutputSizeOverflow) {
506+
OpTester test("OneHot", 11);
507+
// indices shape [2, 3] with depth = INT64_MAX causes output shape [2, 3, INT64_MAX]
508+
// which would overflow when computing total element count.
509+
test.AddInput<int64_t>("indices", {2, 3}, {1, 2, 3, 4, 5, 6});
510+
test.AddInput<int64_t>("depth", {1}, {std::numeric_limits<int64_t>::max()});
511+
test.AddInput<int64_t>("values", {2}, {0, 1});
512+
test.AddOutput<int64_t>("output", {2, 3, 1}, {0, 0, 0, 0, 0, 0});
513+
test.Run(OpTester::ExpectResult::kExpectFailure, "output tensor size would overflow");
514+
}
515+
516+
// Test that a very large depth value that overflows with multi-dimensional indices is rejected.
517+
TEST(OneHotOpTest, DepthTooLarge_OutputSizeOverflow_LargeIndices) {
518+
OpTester test("OneHot", 11);
519+
// indices shape [1000] with depth = INT64_MAX / 500 causes overflow in element count.
520+
const int64_t large_depth = std::numeric_limits<int64_t>::max() / 500;
521+
std::vector<int64_t> indices(1000, 0);
522+
std::vector<int64_t> dummy_output(1000, 0);
523+
test.AddInput<int64_t>("indices", {1000}, indices);
524+
test.AddInput<int64_t>("depth", {1}, {large_depth});
525+
test.AddInput<int64_t>("values", {2}, {0, 1});
526+
test.AddOutput<int64_t>("output", {1000, 1}, dummy_output);
527+
test.Run(OpTester::ExpectResult::kExpectFailure, "output tensor size would overflow");
528+
}
529+
530+
// Test that a negative depth value is rejected.
531+
TEST(OneHotOpTest, NegativeDepth) {
532+
OpTester test("OneHot", 11);
533+
test.AddInput<int64_t>("indices", {2, 3}, {1, 2, 3, 4, 5, 6});
534+
test.AddInput<int64_t>("depth", {1}, {-5});
535+
test.AddInput<int64_t>("values", {2}, {0, 1});
536+
test.AddOutput<int64_t>("output", {2, 3, 1}, {0, 0, 0, 0, 0, 0});
537+
test.Run(OpTester::ExpectResult::kExpectFailure, "Depth is negative");
538+
}
539+
540+
// Test minimum valid depth value of 1.
541+
TEST(OneHotOpTest, DepthOne) {
542+
OpTester test("OneHot", 11);
543+
test.AddInput<int64_t>("indices", {3}, {0, 0, 0});
544+
test.AddInput<int64_t>("depth", {1}, {1});
545+
test.AddInput<int64_t>("values", {2}, {0, 1});
546+
test.AddOutput<int64_t>("output", {3, 1}, {1, 1, 1});
547+
test.Run();
548+
}
549+
550+
// Test scalar (rank-0) indices are rejected per ONNX spec (indices must have rank >= 1).
551+
TEST(OneHotOpTest, ScalarIndicesRejected) {
552+
OpTester test("OneHot", 11);
553+
test.AddInput<int64_t>("indices", {}, {2});
554+
test.AddInput<int64_t>("depth", {1}, {5});
555+
test.AddInput<int64_t>("values", {2}, {0, 1});
556+
test.AddOutput<int64_t>("output", {5}, {0, 0, 1, 0, 0});
557+
test.Run(OpTester::ExpectResult::kExpectFailure, "Indices tensor must have rank >= 1");
558+
}
559+
560+
// Test with opset 9.
561+
TEST(OneHotOpTest, DefaultAxis_Opset9) {
562+
OpTester test("OneHot", 9);
563+
test.AddInput<int64_t>("indices", {2, 3}, {1, 9, 8, 2, 4, 6});
564+
test.AddInput<int64_t>("depth", {1}, {10});
565+
test.AddInput<int64_t>("values", {2}, {0, 1});
566+
test.AddOutput<int64_t>("output", {2, 3, 10},
567+
{0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
568+
0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
569+
0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
570+
0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
571+
0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
572+
0, 0, 0, 0, 0, 0, 1, 0, 0, 0});
573+
test.Run();
574+
}
575+
502576
#ifdef USE_CUDA
503577

504578
TEST(OneHotOpTest, DefaultAxis_int64_MLFloat16_int64 /*indices, output, depth*/) {

0 commit comments

Comments
 (0)