Skip to content

Commit d9ed85f

Browse files
0lai0martin-g
andauthored
fix: Support on all-literal RLIKE expression (#3647)
* Native engine crashes on all-literal RLIKE expression * add test * address comment * Update native/spark-expr/src/predicate_funcs/rlike.rs Co-authored-by: Martin Grigorov <martin-g@users.noreply.github.com> * address comment test * fix fmt --------- Co-authored-by: Martin Grigorov <martin-g@users.noreply.github.com>
1 parent 1da18dd commit d9ed85f

2 files changed

Lines changed: 70 additions & 4 deletions

File tree

native/spark-expr/src/predicate_funcs/rlike.rs

Lines changed: 69 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use arrow::array::types::Int32Type;
2121
use arrow::array::{Array, BooleanArray, DictionaryArray, RecordBatch, StringArray};
2222
use arrow::compute::take;
2323
use arrow::datatypes::{DataType, Schema};
24-
use datafusion::common::{internal_err, Result};
24+
use datafusion::common::{internal_err, Result, ScalarValue};
2525
use datafusion::physical_expr::PhysicalExpr;
2626
use datafusion::physical_plan::ColumnarValue;
2727
use regex::Regex;
@@ -140,8 +140,24 @@ impl PhysicalExpr for RLike {
140140
let array = self.is_match(inputs);
141141
Ok(ColumnarValue::Array(Arc::new(array)))
142142
}
143-
ColumnarValue::Scalar(_) => {
144-
internal_err!("non scalar regexp patterns are not supported")
143+
ColumnarValue::Scalar(scalar) => {
144+
if scalar.is_null() {
145+
return Ok(ColumnarValue::Scalar(ScalarValue::Boolean(None)));
146+
}
147+
148+
let is_match = match scalar {
149+
ScalarValue::Utf8(Some(s))
150+
| ScalarValue::LargeUtf8(Some(s))
151+
| ScalarValue::Utf8View(Some(s)) => self.pattern.is_match(&s),
152+
_ => {
153+
return internal_err!(
154+
"RLike requires string type for input, got {:?}",
155+
scalar.data_type()
156+
);
157+
}
158+
};
159+
160+
Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some(is_match))))
145161
}
146162
}
147163
}
@@ -165,3 +181,53 @@ impl PhysicalExpr for RLike {
165181
Display::fmt(self, f)
166182
}
167183
}
184+
185+
#[cfg(test)]
186+
mod tests {
187+
use super::*;
188+
use datafusion::physical_expr::expressions::Literal;
189+
190+
#[test]
191+
fn test_rlike_scalar_string_variants() {
192+
let pattern = "R[a-z]+";
193+
let scalars = [
194+
ScalarValue::Utf8(Some("Rose".to_string())),
195+
ScalarValue::LargeUtf8(Some("Rose".to_string())),
196+
ScalarValue::Utf8View(Some("Rose".to_string())),
197+
];
198+
199+
for scalar in scalars {
200+
let expr = RLike::try_new(Arc::new(Literal::new(scalar.clone())), pattern).unwrap();
201+
let result = expr
202+
.evaluate(&RecordBatch::new_empty(Arc::new(Schema::empty())))
203+
.unwrap();
204+
let ColumnarValue::Scalar(result) = result else {
205+
panic!("expected scalar result");
206+
};
207+
assert_eq!(result, ScalarValue::Boolean(Some(true)));
208+
}
209+
210+
// Null input should produce a null boolean result
211+
let expr =
212+
RLike::try_new(Arc::new(Literal::new(ScalarValue::Utf8(None))), pattern).unwrap();
213+
let result = expr
214+
.evaluate(&RecordBatch::new_empty(Arc::new(Schema::empty())))
215+
.unwrap();
216+
let ColumnarValue::Scalar(result) = result else {
217+
panic!("expected scalar result");
218+
};
219+
assert_eq!(result, ScalarValue::Boolean(None));
220+
}
221+
222+
#[test]
223+
fn test_rlike_scalar_non_string_error() {
224+
let expr = RLike::try_new(
225+
Arc::new(Literal::new(ScalarValue::Boolean(Some(true)))),
226+
"R[a-z]+",
227+
)
228+
.unwrap();
229+
230+
let result = expr.evaluate(&RecordBatch::new_empty(Arc::new(Schema::empty())));
231+
assert!(result.is_err());
232+
}
233+
}

spark/src/test/resources/sql-tests/expressions/string/rlike_enabled.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,5 +35,5 @@ query
3535
SELECT s RLIKE '' FROM test_rlike_enabled
3636

3737
-- literal arguments
38-
query ignore(https://github.com/apache/datafusion-comet/issues/3343)
38+
query
3939
SELECT 'hello' RLIKE '^[a-z]+$', '12345' RLIKE '^[a-z]+$', '' RLIKE '', NULL RLIKE 'a'

0 commit comments

Comments
 (0)