|
| 1 | +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 2 | +// you may not use this file except in compliance with the License. |
| 3 | +// You may obtain a copy of the License at |
| 4 | +// |
| 5 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 6 | +// |
| 7 | +// Unless required by applicable law or agreed to in writing, software |
| 8 | +// distributed under the License is distributed on an "AS IS" BASIS, |
| 9 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 10 | +// See the License for the specific language governing permissions and |
| 11 | +// limitations under the License. |
| 12 | +use log::debug; |
| 13 | + |
| 14 | +use crate::dialect::{Dialect, Precedence}; |
| 15 | +use crate::keywords::Keyword; |
| 16 | +use crate::parser::{Parser, ParserError}; |
| 17 | +use crate::tokenizer::Token; |
| 18 | + |
| 19 | +/// A [`Dialect`] for [Arroyo](https://www.arroyo.dev/) |
| 20 | +/// This is based on the Postgres dialect |
| 21 | +/// |
| 22 | +/// Currently the Arroyo dialect differs from postgres in one respect: it supports |
| 23 | +/// Hive/Biquery/etc. struct syntax (`struct<a INT, b TEXT>`) |
| 24 | +#[derive(Debug)] |
| 25 | +pub struct ArroyoDialect {} |
| 26 | + |
| 27 | +const PERIOD_PREC: u8 = 200; |
| 28 | +const DOUBLE_COLON_PREC: u8 = 140; |
| 29 | +const BRACKET_PREC: u8 = 130; |
| 30 | +const COLLATE_PREC: u8 = 120; |
| 31 | +const AT_TZ_PREC: u8 = 110; |
| 32 | +const CARET_PREC: u8 = 100; |
| 33 | +const MUL_DIV_MOD_OP_PREC: u8 = 90; |
| 34 | +const PLUS_MINUS_PREC: u8 = 80; |
| 35 | +// there's no XOR operator in PostgreSQL, but support it here to avoid breaking tests |
| 36 | +const XOR_PREC: u8 = 75; |
| 37 | +const PG_OTHER_PREC: u8 = 70; |
| 38 | +const BETWEEN_LIKE_PREC: u8 = 60; |
| 39 | +const EQ_PREC: u8 = 50; |
| 40 | +const IS_PREC: u8 = 40; |
| 41 | +const NOT_PREC: u8 = 30; |
| 42 | +const AND_PREC: u8 = 20; |
| 43 | +const OR_PREC: u8 = 10; |
| 44 | + |
| 45 | +impl Dialect for ArroyoDialect { |
| 46 | + fn identifier_quote_style(&self, _identifier: &str) -> Option<char> { |
| 47 | + Some('"') |
| 48 | + } |
| 49 | + |
| 50 | + fn is_delimited_identifier_start(&self, ch: char) -> bool { |
| 51 | + ch == '"' // Postgres does not support backticks to quote identifiers |
| 52 | + } |
| 53 | + |
| 54 | + fn is_identifier_start(&self, ch: char) -> bool { |
| 55 | + // See https://www.postgresql.org/docs/11/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS |
| 56 | + // We don't yet support identifiers beginning with "letters with |
| 57 | + // diacritical marks" |
| 58 | + ch.is_alphabetic() || ch == '_' |
| 59 | + } |
| 60 | + |
| 61 | + fn is_identifier_part(&self, ch: char) -> bool { |
| 62 | + ch.is_alphabetic() || ch.is_ascii_digit() || ch == '$' || ch == '_' |
| 63 | + } |
| 64 | + |
| 65 | + fn supports_unicode_string_literal(&self) -> bool { |
| 66 | + true |
| 67 | + } |
| 68 | + |
| 69 | + /// See <https://www.postgresql.org/docs/current/sql-createoperator.html> |
| 70 | + fn is_custom_operator_part(&self, ch: char) -> bool { |
| 71 | + matches!( |
| 72 | + ch, |
| 73 | + '+' | '-' |
| 74 | + | '*' |
| 75 | + | '/' |
| 76 | + | '<' |
| 77 | + | '>' |
| 78 | + | '=' |
| 79 | + | '~' |
| 80 | + | '!' |
| 81 | + | '@' |
| 82 | + | '#' |
| 83 | + | '%' |
| 84 | + | '^' |
| 85 | + | '&' |
| 86 | + | '|' |
| 87 | + | '`' |
| 88 | + | '?' |
| 89 | + ) |
| 90 | + } |
| 91 | + |
| 92 | + fn get_next_precedence(&self, parser: &Parser) -> Option<Result<u8, ParserError>> { |
| 93 | + let token = parser.peek_token(); |
| 94 | + debug!("get_next_precedence() {:?}", token); |
| 95 | + |
| 96 | + // we only return some custom value here when the behaviour (not merely the numeric value) differs |
| 97 | + // from the default implementation |
| 98 | + match token.token { |
| 99 | + Token::Word(w) if w.keyword == Keyword::COLLATE => Some(Ok(COLLATE_PREC)), |
| 100 | + Token::LBracket => Some(Ok(BRACKET_PREC)), |
| 101 | + Token::Arrow |
| 102 | + | Token::LongArrow |
| 103 | + | Token::HashArrow |
| 104 | + | Token::HashLongArrow |
| 105 | + | Token::AtArrow |
| 106 | + | Token::ArrowAt |
| 107 | + | Token::HashMinus |
| 108 | + | Token::AtQuestion |
| 109 | + | Token::AtAt |
| 110 | + | Token::Question |
| 111 | + | Token::QuestionAnd |
| 112 | + | Token::QuestionPipe |
| 113 | + | Token::ExclamationMark |
| 114 | + | Token::Overlap |
| 115 | + | Token::CaretAt |
| 116 | + | Token::StringConcat |
| 117 | + | Token::Sharp |
| 118 | + | Token::ShiftRight |
| 119 | + | Token::ShiftLeft |
| 120 | + | Token::CustomBinaryOperator(_) => Some(Ok(PG_OTHER_PREC)), |
| 121 | + _ => None, |
| 122 | + } |
| 123 | + } |
| 124 | + |
| 125 | + |
| 126 | + fn supports_filter_during_aggregation(&self) -> bool { |
| 127 | + true |
| 128 | + } |
| 129 | + |
| 130 | + fn supports_group_by_expr(&self) -> bool { |
| 131 | + true |
| 132 | + } |
| 133 | + |
| 134 | + fn prec_value(&self, prec: Precedence) -> u8 { |
| 135 | + match prec { |
| 136 | + Precedence::Period => PERIOD_PREC, |
| 137 | + Precedence::DoubleColon => DOUBLE_COLON_PREC, |
| 138 | + Precedence::AtTz => AT_TZ_PREC, |
| 139 | + Precedence::MulDivModOp => MUL_DIV_MOD_OP_PREC, |
| 140 | + Precedence::PlusMinus => PLUS_MINUS_PREC, |
| 141 | + Precedence::Xor => XOR_PREC, |
| 142 | + Precedence::Ampersand => PG_OTHER_PREC, |
| 143 | + Precedence::Caret => CARET_PREC, |
| 144 | + Precedence::Pipe => PG_OTHER_PREC, |
| 145 | + Precedence::Between => BETWEEN_LIKE_PREC, |
| 146 | + Precedence::Eq => EQ_PREC, |
| 147 | + Precedence::Like => BETWEEN_LIKE_PREC, |
| 148 | + Precedence::Is => IS_PREC, |
| 149 | + Precedence::PgOther => PG_OTHER_PREC, |
| 150 | + Precedence::UnaryNot => NOT_PREC, |
| 151 | + Precedence::And => AND_PREC, |
| 152 | + Precedence::Or => OR_PREC, |
| 153 | + } |
| 154 | + } |
| 155 | + |
| 156 | + fn allow_extract_custom(&self) -> bool { |
| 157 | + true |
| 158 | + } |
| 159 | + |
| 160 | + fn allow_extract_single_quotes(&self) -> bool { |
| 161 | + true |
| 162 | + } |
| 163 | + |
| 164 | + /// see <https://www.postgresql.org/docs/13/functions-math.html> |
| 165 | + fn supports_factorial_operator(&self) -> bool { |
| 166 | + true |
| 167 | + } |
| 168 | + |
| 169 | + /// see <https://www.postgresql.org/docs/current/sql-comment.html> |
| 170 | + fn supports_comment_on(&self) -> bool { |
| 171 | + true |
| 172 | + } |
| 173 | + |
| 174 | + |
| 175 | + /// Return true if the dialect supports empty projections in SELECT statements |
| 176 | + /// |
| 177 | + /// Example |
| 178 | + /// ```sql |
| 179 | + /// SELECT from table_name |
| 180 | + /// ``` |
| 181 | + fn supports_empty_projections(&self) -> bool { |
| 182 | + true |
| 183 | + } |
| 184 | + |
| 185 | + fn supports_nested_comments(&self) -> bool { |
| 186 | + true |
| 187 | + } |
| 188 | + |
| 189 | + fn supports_string_escape_constant(&self) -> bool { |
| 190 | + true |
| 191 | + } |
| 192 | + |
| 193 | + fn supports_numeric_literal_underscores(&self) -> bool { |
| 194 | + true |
| 195 | + } |
| 196 | + |
| 197 | + /// See: <https://www.postgresql.org/docs/current/arrays.html#ARRAYS-DECLARATION> |
| 198 | + fn supports_array_typedef_with_brackets(&self) -> bool { |
| 199 | + true |
| 200 | + } |
| 201 | + |
| 202 | + fn supports_geometric_types(&self) -> bool { |
| 203 | + true |
| 204 | + } |
| 205 | + |
| 206 | + // arroyo-specific features |
| 207 | + fn supports_partiql(&self) -> bool { |
| 208 | + true |
| 209 | + } |
| 210 | + |
| 211 | + fn supports_struct_literal(&self) -> bool { |
| 212 | + true |
| 213 | + } |
| 214 | +} |
0 commit comments