sqlglot.dialects.duckdb
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 JSON_EXTRACT_TYPE, 10 NormalizationStrategy, 11 Version, 12 approx_count_distinct_sql, 13 arrow_json_extract_sql, 14 binary_from_function, 15 bool_xor_sql, 16 build_default_decimal_type, 17 count_if_to_sum, 18 date_delta_to_binary_interval_op, 19 date_trunc_to_time, 20 datestrtodate_sql, 21 no_datetime_sql, 22 encode_decode_sql, 23 build_formatted_time, 24 inline_array_unless_query, 25 no_comment_column_constraint_sql, 26 no_time_sql, 27 no_timestamp_sql, 28 pivot_column_names, 29 rename_func, 30 remove_from_array_using_filter, 31 strposition_sql, 32 str_to_time_sql, 33 timestamptrunc_sql, 34 timestrtotime_sql, 35 unit_to_str, 36 sha256_sql, 37 build_regexp_extract, 38 explode_to_unnest_sql, 39 no_make_interval_sql, 40 groupconcat_sql, 41) 42from sqlglot.generator import unsupported_args 43from sqlglot.helper import seq_get 44from sqlglot.tokens import TokenType 45from sqlglot.parser import binary_range_parser 46 47 48# BigQuery -> DuckDB conversion for the DATE function 49def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 50 result = f"CAST({self.sql(expression, 'this')} AS DATE)" 51 zone = self.sql(expression, "zone") 52 53 if zone: 54 date_str = self.func("STRFTIME", result, "'%d/%m/%Y'") 55 date_str = f"{date_str} || ' ' || {zone}" 56 57 # This will create a TIMESTAMP with time zone information 58 result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'") 59 60 return result 61 62 63# BigQuery -> DuckDB conversion for the TIME_DIFF function 64def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 65 this = exp.cast(expression.this, exp.DataType.Type.TIME) 66 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 67 68 # Although the 2 dialects share similar signatures, BQ seems to inverse 69 # the sign of the result so the start/end time operands are flipped 70 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 71 72 73@unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator.")) 74def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 75 return self.func("ARRAY_SORT", expression.this) 76 77 78def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 79 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 80 return self.func(name, expression.this) 81 82 83def _build_sort_array_desc(args: t.List) -> exp.Expression: 84 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 85 86 87def _build_date_diff(args: t.List) -> exp.Expression: 88 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 89 90 91def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 92 def _builder(args: t.List) -> exp.GenerateSeries: 93 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 94 if len(args) == 1: 95 # DuckDB uses 0 as a default for the series' start when it's omitted 96 args.insert(0, exp.Literal.number("0")) 97 98 gen_series = exp.GenerateSeries.from_arg_list(args) 99 gen_series.set("is_end_exclusive", end_exclusive) 100 101 return gen_series 102 103 return _builder 104 105 106def _build_make_timestamp(args: t.List) -> exp.Expression: 107 if len(args) == 1: 108 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 109 110 return exp.TimestampFromParts( 111 year=seq_get(args, 0), 112 month=seq_get(args, 1), 113 day=seq_get(args, 2), 114 hour=seq_get(args, 3), 115 min=seq_get(args, 4), 116 sec=seq_get(args, 5), 117 ) 118 119 120def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[DuckDB.Parser], exp.Show]: 121 def _parse(self: DuckDB.Parser) -> exp.Show: 122 return self._parse_show_duckdb(*args, **kwargs) 123 124 return _parse 125 126 127def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 128 args: t.List[str] = [] 129 130 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 131 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 132 # The transformation to ROW will take place if: 133 # 1. The STRUCT itself does not have proper fields (key := value) as a "proper" STRUCT would 134 # 2. A cast to STRUCT / ARRAY of STRUCTs is found 135 ancestor_cast = expression.find_ancestor(exp.Cast) 136 is_bq_inline_struct = ( 137 (expression.find(exp.PropertyEQ) is None) 138 and ancestor_cast 139 and any( 140 casted_type.is_type(exp.DataType.Type.STRUCT) 141 for casted_type in ancestor_cast.find_all(exp.DataType) 142 ) 143 ) 144 145 for i, expr in enumerate(expression.expressions): 146 is_property_eq = isinstance(expr, exp.PropertyEQ) 147 value = expr.expression if is_property_eq else expr 148 149 if is_bq_inline_struct: 150 args.append(self.sql(value)) 151 else: 152 if is_property_eq: 153 if isinstance(expr.this, exp.Identifier): 154 key = self.sql(exp.Literal.string(expr.name)) 155 else: 156 key = self.sql(expr.this) 157 else: 158 key = self.sql(exp.Literal.string(f"_{i}")) 159 160 args.append(f"{key}: {self.sql(value)}") 161 162 csv_args = ", ".join(args) 163 164 return f"ROW({csv_args})" if is_bq_inline_struct else f"{{{csv_args}}}" 165 166 167def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 168 if expression.is_type("array"): 169 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 170 171 # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE 172 if expression.is_type( 173 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ, exp.DataType.Type.TIMESTAMPTZ 174 ): 175 return expression.this.value 176 177 return self.datatype_sql(expression) 178 179 180def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 181 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 182 return f"CAST({sql} AS TEXT)" 183 184 185def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 186 scale = expression.args.get("scale") 187 timestamp = expression.this 188 189 if scale in (None, exp.UnixToTime.SECONDS): 190 return self.func("TO_TIMESTAMP", timestamp) 191 if scale == exp.UnixToTime.MILLIS: 192 return self.func("EPOCH_MS", timestamp) 193 if scale == exp.UnixToTime.MICROS: 194 return self.func("MAKE_TIMESTAMP", timestamp) 195 196 return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) 197 198 199WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In) 200 201 202def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 203 arrow_sql = arrow_json_extract_sql(self, expression) 204 if not expression.same_parent and isinstance( 205 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 206 ): 207 arrow_sql = self.wrap(arrow_sql) 208 return arrow_sql 209 210 211def _implicit_datetime_cast( 212 arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE 213) -> t.Optional[exp.Expression]: 214 return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg 215 216 217def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 218 this = _implicit_datetime_cast(expression.this) 219 expr = _implicit_datetime_cast(expression.expression) 220 221 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 222 223 224def _generate_datetime_array_sql( 225 self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray] 226) -> str: 227 is_generate_date_array = isinstance(expression, exp.GenerateDateArray) 228 229 type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP 230 start = _implicit_datetime_cast(expression.args.get("start"), type=type) 231 end = _implicit_datetime_cast(expression.args.get("end"), type=type) 232 233 # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES 234 gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries( 235 start=start, end=end, step=expression.args.get("step") 236 ) 237 238 if is_generate_date_array: 239 # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for 240 # GENERATE_DATE_ARRAY we must cast it back to DATE array 241 gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")) 242 243 return self.sql(gen_series) 244 245 246def _json_extract_value_array_sql( 247 self: DuckDB.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 248) -> str: 249 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 250 data_type = "ARRAY<STRING>" if isinstance(expression, exp.JSONValueArray) else "ARRAY<JSON>" 251 return self.sql(exp.cast(json_extract, to=exp.DataType.build(data_type))) 252 253 254class DuckDB(Dialect): 255 NULL_ORDERING = "nulls_are_last" 256 SUPPORTS_USER_DEFINED_TYPES = True 257 SAFE_DIVISION = True 258 INDEX_OFFSET = 1 259 CONCAT_COALESCE = True 260 SUPPORTS_ORDER_BY_ALL = True 261 SUPPORTS_FIXED_SIZE_ARRAYS = True 262 STRICT_JSON_PATH_SYNTAX = False 263 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 264 265 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 266 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 267 268 DATE_PART_MAPPING = { 269 **Dialect.DATE_PART_MAPPING, 270 "DAYOFWEEKISO": "ISODOW", 271 } 272 DATE_PART_MAPPING.pop("WEEKDAY") 273 274 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 275 if isinstance(path, exp.Literal): 276 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 277 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 278 # This check ensures we'll avoid trying to parse these as JSON paths, which can 279 # either result in a noisy warning or in an invalid representation of the path. 280 path_text = path.name 281 if path_text.startswith("/") or "[#" in path_text: 282 return path 283 284 return super().to_json_path(path) 285 286 class Tokenizer(tokens.Tokenizer): 287 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 288 HEREDOC_STRINGS = ["$"] 289 290 HEREDOC_TAG_IS_IDENTIFIER = True 291 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 292 293 KEYWORDS = { 294 **tokens.Tokenizer.KEYWORDS, 295 "//": TokenType.DIV, 296 "**": TokenType.DSTAR, 297 "^@": TokenType.CARET_AT, 298 "@>": TokenType.AT_GT, 299 "<@": TokenType.LT_AT, 300 "ATTACH": TokenType.ATTACH, 301 "BINARY": TokenType.VARBINARY, 302 "BITSTRING": TokenType.BIT, 303 "BPCHAR": TokenType.TEXT, 304 "CHAR": TokenType.TEXT, 305 "DATETIME": TokenType.TIMESTAMPNTZ, 306 "DETACH": TokenType.DETACH, 307 "LOGICAL": TokenType.BOOLEAN, 308 "ONLY": TokenType.ONLY, 309 "PIVOT_WIDER": TokenType.PIVOT, 310 "POSITIONAL": TokenType.POSITIONAL, 311 "RESET": TokenType.COMMAND, 312 "SIGNED": TokenType.INT, 313 "STRING": TokenType.TEXT, 314 "SUMMARIZE": TokenType.SUMMARIZE, 315 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 316 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 317 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 318 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 319 "TIMESTAMP_US": TokenType.TIMESTAMP, 320 "UBIGINT": TokenType.UBIGINT, 321 "UINTEGER": TokenType.UINT, 322 "USMALLINT": TokenType.USMALLINT, 323 "UTINYINT": TokenType.UTINYINT, 324 "VARCHAR": TokenType.TEXT, 325 } 326 KEYWORDS.pop("/*+") 327 328 SINGLE_TOKENS = { 329 **tokens.Tokenizer.SINGLE_TOKENS, 330 "$": TokenType.PARAMETER, 331 } 332 333 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 334 335 class Parser(parser.Parser): 336 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = True 337 338 BITWISE = { 339 **parser.Parser.BITWISE, 340 TokenType.TILDA: exp.RegexpLike, 341 } 342 BITWISE.pop(TokenType.CARET) 343 344 RANGE_PARSERS = { 345 **parser.Parser.RANGE_PARSERS, 346 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 347 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 348 } 349 350 EXPONENT = { 351 **parser.Parser.EXPONENT, 352 TokenType.CARET: exp.Pow, 353 TokenType.DSTAR: exp.Pow, 354 } 355 356 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 357 358 SHOW_PARSERS = { 359 "TABLES": _show_parser("TABLES"), 360 "ALL TABLES": _show_parser("ALL TABLES"), 361 } 362 363 FUNCTIONS = { 364 **parser.Parser.FUNCTIONS, 365 "ANY_VALUE": lambda args: exp.IgnoreNulls(this=exp.AnyValue.from_arg_list(args)), 366 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 367 "ARRAY_SORT": exp.SortArray.from_arg_list, 368 "DATEDIFF": _build_date_diff, 369 "DATE_DIFF": _build_date_diff, 370 "DATE_TRUNC": date_trunc_to_time, 371 "DATETRUNC": date_trunc_to_time, 372 "DECODE": lambda args: exp.Decode( 373 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 374 ), 375 "EDITDIST3": exp.Levenshtein.from_arg_list, 376 "ENCODE": lambda args: exp.Encode( 377 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 378 ), 379 "EPOCH": exp.TimeToUnix.from_arg_list, 380 "EPOCH_MS": lambda args: exp.UnixToTime( 381 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 382 ), 383 "GENERATE_SERIES": _build_generate_series(), 384 "JSON": exp.ParseJSON.from_arg_list, 385 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 386 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 387 "LIST_CONTAINS": exp.ArrayContains.from_arg_list, 388 "LIST_COSINE_DISTANCE": exp.CosineDistance.from_arg_list, 389 "LIST_DISTANCE": exp.EuclideanDistance.from_arg_list, 390 "LIST_FILTER": exp.ArrayFilter.from_arg_list, 391 "LIST_HAS": exp.ArrayContains.from_arg_list, 392 "LIST_HAS_ANY": exp.ArrayOverlaps.from_arg_list, 393 "LIST_REVERSE_SORT": _build_sort_array_desc, 394 "LIST_SORT": exp.SortArray.from_arg_list, 395 "LIST_TRANSFORM": exp.Transform.from_arg_list, 396 "LIST_VALUE": lambda args: exp.Array(expressions=args), 397 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 398 "MAKE_TIMESTAMP": _build_make_timestamp, 399 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 400 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 401 "RANGE": _build_generate_series(end_exclusive=True), 402 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 403 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 404 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 405 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 406 this=seq_get(args, 0), 407 expression=seq_get(args, 1), 408 replacement=seq_get(args, 2), 409 modifiers=seq_get(args, 3), 410 ), 411 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 412 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 413 "STRING_SPLIT": exp.Split.from_arg_list, 414 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 415 "STRING_TO_ARRAY": exp.Split.from_arg_list, 416 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 417 "STRUCT_PACK": exp.Struct.from_arg_list, 418 "STR_SPLIT": exp.Split.from_arg_list, 419 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 420 "TIME_BUCKET": exp.DateBin.from_arg_list, 421 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 422 "UNNEST": exp.Explode.from_arg_list, 423 "XOR": binary_from_function(exp.BitwiseXor), 424 } 425 426 FUNCTIONS.pop("DATE_SUB") 427 FUNCTIONS.pop("GLOB") 428 429 FUNCTION_PARSERS = { 430 **parser.Parser.FUNCTION_PARSERS, 431 **dict.fromkeys( 432 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 433 ), 434 } 435 FUNCTION_PARSERS.pop("DECODE") 436 437 NO_PAREN_FUNCTION_PARSERS = { 438 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 439 "MAP": lambda self: self._parse_map(), 440 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 441 } 442 443 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 444 TokenType.SEMI, 445 TokenType.ANTI, 446 } 447 448 PLACEHOLDER_PARSERS = { 449 **parser.Parser.PLACEHOLDER_PARSERS, 450 TokenType.PARAMETER: lambda self: ( 451 self.expression(exp.Placeholder, this=self._prev.text) 452 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 453 else None 454 ), 455 } 456 457 TYPE_CONVERTERS = { 458 # https://duckdb.org/docs/sql/data_types/numeric 459 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 460 # https://duckdb.org/docs/sql/data_types/text 461 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 462 } 463 464 STATEMENT_PARSERS = { 465 **parser.Parser.STATEMENT_PARSERS, 466 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 467 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 468 TokenType.SHOW: lambda self: self._parse_show(), 469 } 470 471 SET_PARSERS = { 472 **parser.Parser.SET_PARSERS, 473 "VARIABLE": lambda self: self._parse_set_item_assignment("VARIABLE"), 474 } 475 476 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 477 index = self._index 478 if not self._match_text_seq("LAMBDA"): 479 return super()._parse_lambda(alias=alias) 480 481 expressions = self._parse_csv(self._parse_lambda_arg) 482 if not self._match(TokenType.COLON): 483 self._retreat(index) 484 return None 485 486 this = self._replace_lambda(self._parse_assignment(), expressions) 487 return self.expression(exp.Lambda, this=this, expressions=expressions, colon=True) 488 489 def _parse_expression(self) -> t.Optional[exp.Expression]: 490 # DuckDB supports prefix aliases, e.g. foo: 1 491 if self._next and self._next.token_type == TokenType.COLON: 492 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 493 self._match(TokenType.COLON) 494 comments = self._prev_comments or [] 495 496 this = self._parse_assignment() 497 if isinstance(this, exp.Expression): 498 # Moves the comment next to the alias in `alias: expr /* comment */` 499 comments += this.pop_comments() or [] 500 501 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 502 503 return super()._parse_expression() 504 505 def _parse_table( 506 self, 507 schema: bool = False, 508 joins: bool = False, 509 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 510 parse_bracket: bool = False, 511 is_db_reference: bool = False, 512 parse_partition: bool = False, 513 consume_pipe: bool = False, 514 ) -> t.Optional[exp.Expression]: 515 # DuckDB supports prefix aliases, e.g. FROM foo: bar 516 if self._next and self._next.token_type == TokenType.COLON: 517 alias = self._parse_table_alias( 518 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 519 ) 520 self._match(TokenType.COLON) 521 comments = self._prev_comments or [] 522 else: 523 alias = None 524 comments = [] 525 526 table = super()._parse_table( 527 schema=schema, 528 joins=joins, 529 alias_tokens=alias_tokens, 530 parse_bracket=parse_bracket, 531 is_db_reference=is_db_reference, 532 parse_partition=parse_partition, 533 ) 534 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 535 # Moves the comment next to the alias in `alias: table /* comment */` 536 comments += table.pop_comments() or [] 537 alias.comments = alias.pop_comments() + comments 538 table.set("alias", alias) 539 540 return table 541 542 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 543 # https://duckdb.org/docs/sql/samples.html 544 sample = super()._parse_table_sample(as_modifier=as_modifier) 545 if sample and not sample.args.get("method"): 546 if sample.args.get("size"): 547 sample.set("method", exp.var("RESERVOIR")) 548 else: 549 sample.set("method", exp.var("SYSTEM")) 550 551 return sample 552 553 def _parse_bracket( 554 self, this: t.Optional[exp.Expression] = None 555 ) -> t.Optional[exp.Expression]: 556 bracket = super()._parse_bracket(this) 557 558 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 559 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 560 bracket.set("returns_list_for_maps", True) 561 562 return bracket 563 564 def _parse_map(self) -> exp.ToMap | exp.Map: 565 if self._match(TokenType.L_BRACE, advance=False): 566 return self.expression(exp.ToMap, this=self._parse_bracket()) 567 568 args = self._parse_wrapped_csv(self._parse_assignment) 569 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 570 571 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 572 return self._parse_field_def() 573 574 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 575 if len(aggregations) == 1: 576 return super()._pivot_column_names(aggregations) 577 return pivot_column_names(aggregations, dialect="duckdb") 578 579 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 580 def _parse_attach_option() -> exp.AttachOption: 581 return self.expression( 582 exp.AttachOption, 583 this=self._parse_var(any_token=True), 584 expression=self._parse_field(any_token=True), 585 ) 586 587 self._match(TokenType.DATABASE) 588 exists = self._parse_exists(not_=is_attach) 589 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 590 591 if self._match(TokenType.L_PAREN, advance=False): 592 expressions = self._parse_wrapped_csv(_parse_attach_option) 593 else: 594 expressions = None 595 596 return ( 597 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 598 if is_attach 599 else self.expression(exp.Detach, this=this, exists=exists) 600 ) 601 602 def _parse_show_duckdb(self, this: str) -> exp.Show: 603 return self.expression(exp.Show, this=this) 604 605 def _parse_primary(self) -> t.Optional[exp.Expression]: 606 if self._match_pair(TokenType.HASH, TokenType.NUMBER): 607 return exp.PositionalColumn(this=exp.Literal.number(self._prev.text)) 608 609 return super()._parse_primary() 610 611 class Generator(generator.Generator): 612 PARAMETER_TOKEN = "$" 613 NAMED_PLACEHOLDER_TOKEN = "$" 614 JOIN_HINTS = False 615 TABLE_HINTS = False 616 QUERY_HINTS = False 617 LIMIT_FETCH = "LIMIT" 618 STRUCT_DELIMITER = ("(", ")") 619 RENAME_TABLE_WITH_DB = False 620 NVL2_SUPPORTED = False 621 SEMI_ANTI_JOIN_WITH_SIDE = False 622 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 623 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 624 LAST_DAY_SUPPORTS_DATE_PART = False 625 JSON_KEY_VALUE_PAIR_SEP = "," 626 IGNORE_NULLS_IN_FUNC = True 627 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 628 SUPPORTS_CREATE_TABLE_LIKE = False 629 MULTI_ARG_DISTINCT = False 630 CAN_IMPLEMENT_ARRAY_ANY = True 631 SUPPORTS_TO_NUMBER = False 632 SUPPORTS_WINDOW_EXCLUDE = True 633 COPY_HAS_INTO_KEYWORD = False 634 STAR_EXCEPT = "EXCLUDE" 635 PAD_FILL_PATTERN_IS_REQUIRED = True 636 ARRAY_CONCAT_IS_VAR_LEN = False 637 ARRAY_SIZE_DIM_REQUIRED = False 638 NORMALIZE_EXTRACT_DATE_PARTS = True 639 SUPPORTS_LIKE_QUANTIFIERS = False 640 641 TRANSFORMS = { 642 **generator.Generator.TRANSFORMS, 643 exp.ApproxDistinct: approx_count_distinct_sql, 644 exp.Array: inline_array_unless_query, 645 exp.ArrayFilter: rename_func("LIST_FILTER"), 646 exp.ArrayRemove: remove_from_array_using_filter, 647 exp.ArraySort: _array_sort_sql, 648 exp.ArraySum: rename_func("LIST_SUM"), 649 exp.ArrayUniqueAgg: lambda self, e: self.func( 650 "LIST", exp.Distinct(expressions=[e.this]) 651 ), 652 exp.BitwiseXor: rename_func("XOR"), 653 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 654 exp.CosineDistance: rename_func("LIST_COSINE_DISTANCE"), 655 exp.CurrentDate: lambda *_: "CURRENT_DATE", 656 exp.CurrentTime: lambda *_: "CURRENT_TIME", 657 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 658 exp.DayOfMonth: rename_func("DAYOFMONTH"), 659 exp.DayOfWeek: rename_func("DAYOFWEEK"), 660 exp.DayOfWeekIso: rename_func("ISODOW"), 661 exp.DayOfYear: rename_func("DAYOFYEAR"), 662 exp.DataType: _datatype_sql, 663 exp.Date: _date_sql, 664 exp.DateAdd: date_delta_to_binary_interval_op(), 665 exp.DateFromParts: rename_func("MAKE_DATE"), 666 exp.DateSub: date_delta_to_binary_interval_op(), 667 exp.DateDiff: _date_diff_sql, 668 exp.DateStrToDate: datestrtodate_sql, 669 exp.Datetime: no_datetime_sql, 670 exp.DatetimeSub: date_delta_to_binary_interval_op(), 671 exp.DatetimeAdd: date_delta_to_binary_interval_op(), 672 exp.DateToDi: lambda self, 673 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 674 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 675 exp.DiToDate: lambda self, 676 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 677 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 678 exp.EuclideanDistance: rename_func("LIST_DISTANCE"), 679 exp.GenerateDateArray: _generate_datetime_array_sql, 680 exp.GenerateTimestampArray: _generate_datetime_array_sql, 681 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 682 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 683 exp.Explode: rename_func("UNNEST"), 684 exp.IntDiv: lambda self, e: self.binary(e, "//"), 685 exp.IsInf: rename_func("ISINF"), 686 exp.IsNan: rename_func("ISNAN"), 687 exp.JSONBExists: rename_func("JSON_EXISTS"), 688 exp.JSONExtract: _arrow_json_extract_sql, 689 exp.JSONExtractArray: _json_extract_value_array_sql, 690 exp.JSONExtractScalar: _arrow_json_extract_sql, 691 exp.JSONFormat: _json_format_sql, 692 exp.JSONValueArray: _json_extract_value_array_sql, 693 exp.Lateral: explode_to_unnest_sql, 694 exp.LogicalOr: rename_func("BOOL_OR"), 695 exp.LogicalAnd: rename_func("BOOL_AND"), 696 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 697 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 698 exp.MonthsBetween: lambda self, e: self.func( 699 "DATEDIFF", 700 "'month'", 701 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 702 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 703 ), 704 exp.PercentileCont: rename_func("QUANTILE_CONT"), 705 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 706 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 707 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 708 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 709 exp.RegexpReplace: lambda self, e: self.func( 710 "REGEXP_REPLACE", 711 e.this, 712 e.expression, 713 e.args.get("replacement"), 714 e.args.get("modifiers"), 715 ), 716 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 717 exp.RegexpILike: lambda self, e: self.func( 718 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 719 ), 720 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 721 exp.Return: lambda self, e: self.sql(e, "this"), 722 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 723 exp.Rand: rename_func("RANDOM"), 724 exp.SHA: rename_func("SHA1"), 725 exp.SHA2: sha256_sql, 726 exp.Split: rename_func("STR_SPLIT"), 727 exp.SortArray: _sort_array_sql, 728 exp.StrPosition: strposition_sql, 729 exp.StrToUnix: lambda self, e: self.func( 730 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 731 ), 732 exp.Struct: _struct_sql, 733 exp.Transform: rename_func("LIST_TRANSFORM"), 734 exp.TimeAdd: date_delta_to_binary_interval_op(), 735 exp.Time: no_time_sql, 736 exp.TimeDiff: _timediff_sql, 737 exp.Timestamp: no_timestamp_sql, 738 exp.TimestampDiff: lambda self, e: self.func( 739 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 740 ), 741 exp.TimestampTrunc: timestamptrunc_sql(), 742 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 743 exp.TimeStrToTime: timestrtotime_sql, 744 exp.TimeStrToUnix: lambda self, e: self.func( 745 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 746 ), 747 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 748 exp.TimeToUnix: rename_func("EPOCH"), 749 exp.TsOrDiToDi: lambda self, 750 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 751 exp.TsOrDsAdd: date_delta_to_binary_interval_op(), 752 exp.TsOrDsDiff: lambda self, e: self.func( 753 "DATE_DIFF", 754 f"'{e.args.get('unit') or 'DAY'}'", 755 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 756 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 757 ), 758 exp.UnixToStr: lambda self, e: self.func( 759 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 760 ), 761 exp.DatetimeTrunc: lambda self, e: self.func( 762 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 763 ), 764 exp.UnixToTime: _unix_to_time_sql, 765 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 766 exp.VariancePop: rename_func("VAR_POP"), 767 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 768 exp.Xor: bool_xor_sql, 769 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 770 rename_func("LEVENSHTEIN") 771 ), 772 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 773 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 774 exp.DateBin: rename_func("TIME_BUCKET"), 775 } 776 777 SUPPORTED_JSON_PATH_PARTS = { 778 exp.JSONPathKey, 779 exp.JSONPathRoot, 780 exp.JSONPathSubscript, 781 exp.JSONPathWildcard, 782 } 783 784 TYPE_MAPPING = { 785 **generator.Generator.TYPE_MAPPING, 786 exp.DataType.Type.BINARY: "BLOB", 787 exp.DataType.Type.BPCHAR: "TEXT", 788 exp.DataType.Type.CHAR: "TEXT", 789 exp.DataType.Type.DATETIME: "TIMESTAMP", 790 exp.DataType.Type.FLOAT: "REAL", 791 exp.DataType.Type.JSONB: "JSON", 792 exp.DataType.Type.NCHAR: "TEXT", 793 exp.DataType.Type.NVARCHAR: "TEXT", 794 exp.DataType.Type.UINT: "UINTEGER", 795 exp.DataType.Type.VARBINARY: "BLOB", 796 exp.DataType.Type.ROWVERSION: "BLOB", 797 exp.DataType.Type.VARCHAR: "TEXT", 798 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 799 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 800 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 801 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 802 } 803 804 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 805 RESERVED_KEYWORDS = { 806 "array", 807 "analyse", 808 "union", 809 "all", 810 "when", 811 "in_p", 812 "default", 813 "create_p", 814 "window", 815 "asymmetric", 816 "to", 817 "else", 818 "localtime", 819 "from", 820 "end_p", 821 "select", 822 "current_date", 823 "foreign", 824 "with", 825 "grant", 826 "session_user", 827 "or", 828 "except", 829 "references", 830 "fetch", 831 "limit", 832 "group_p", 833 "leading", 834 "into", 835 "collate", 836 "offset", 837 "do", 838 "then", 839 "localtimestamp", 840 "check_p", 841 "lateral_p", 842 "current_role", 843 "where", 844 "asc_p", 845 "placing", 846 "desc_p", 847 "user", 848 "unique", 849 "initially", 850 "column", 851 "both", 852 "some", 853 "as", 854 "any", 855 "only", 856 "deferrable", 857 "null_p", 858 "current_time", 859 "true_p", 860 "table", 861 "case", 862 "trailing", 863 "variadic", 864 "for", 865 "on", 866 "distinct", 867 "false_p", 868 "not", 869 "constraint", 870 "current_timestamp", 871 "returning", 872 "primary", 873 "intersect", 874 "having", 875 "analyze", 876 "current_user", 877 "and", 878 "cast", 879 "symmetric", 880 "using", 881 "order", 882 "current_catalog", 883 } 884 885 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 886 887 # DuckDB doesn't generally support CREATE TABLE .. properties 888 # https://duckdb.org/docs/sql/statements/create_table.html 889 PROPERTIES_LOCATION = { 890 prop: exp.Properties.Location.UNSUPPORTED 891 for prop in generator.Generator.PROPERTIES_LOCATION 892 } 893 894 # There are a few exceptions (e.g. temporary tables) which are supported or 895 # can be transpiled to DuckDB, so we explicitly override them accordingly 896 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 897 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 898 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 899 PROPERTIES_LOCATION[exp.SequenceProperties] = exp.Properties.Location.POST_EXPRESSION 900 901 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 902 exp.FirstValue, 903 exp.Lag, 904 exp.LastValue, 905 exp.Lead, 906 exp.NthValue, 907 ) 908 909 def lambda_sql( 910 self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True 911 ) -> str: 912 if expression.args.get("colon"): 913 prefix = "LAMBDA " 914 arrow_sep = ":" 915 wrap = False 916 else: 917 prefix = "" 918 919 lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap) 920 return f"{prefix}{lambda_sql}" 921 922 def show_sql(self, expression: exp.Show) -> str: 923 return f"SHOW {expression.name}" 924 925 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 926 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 927 928 def strtotime_sql(self, expression: exp.StrToTime) -> str: 929 if expression.args.get("safe"): 930 formatted_time = self.format_time(expression) 931 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 932 return str_to_time_sql(self, expression) 933 934 def strtodate_sql(self, expression: exp.StrToDate) -> str: 935 if expression.args.get("safe"): 936 formatted_time = self.format_time(expression) 937 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 938 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 939 940 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 941 arg = expression.this 942 if expression.args.get("safe"): 943 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 944 return self.func("JSON", arg) 945 946 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 947 nano = expression.args.get("nano") 948 if nano is not None: 949 expression.set( 950 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 951 ) 952 953 return rename_func("MAKE_TIME")(self, expression) 954 955 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 956 sec = expression.args["sec"] 957 958 milli = expression.args.get("milli") 959 if milli is not None: 960 sec += milli.pop() / exp.Literal.number(1000.0) 961 962 nano = expression.args.get("nano") 963 if nano is not None: 964 sec += nano.pop() / exp.Literal.number(1000000000.0) 965 966 if milli or nano: 967 expression.set("sec", sec) 968 969 return rename_func("MAKE_TIMESTAMP")(self, expression) 970 971 def tablesample_sql( 972 self, 973 expression: exp.TableSample, 974 tablesample_keyword: t.Optional[str] = None, 975 ) -> str: 976 if not isinstance(expression.parent, exp.Select): 977 # This sample clause only applies to a single source, not the entire resulting relation 978 tablesample_keyword = "TABLESAMPLE" 979 980 if expression.args.get("size"): 981 method = expression.args.get("method") 982 if method and method.name.upper() != "RESERVOIR": 983 self.unsupported( 984 f"Sampling method {method} is not supported with a discrete sample count, " 985 "defaulting to reservoir sampling" 986 ) 987 expression.set("method", exp.var("RESERVOIR")) 988 989 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 990 991 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 992 if isinstance(expression.parent, exp.UserDefinedFunction): 993 return self.sql(expression, "this") 994 return super().columndef_sql(expression, sep) 995 996 def join_sql(self, expression: exp.Join) -> str: 997 if ( 998 expression.side == "LEFT" 999 and not expression.args.get("on") 1000 and isinstance(expression.this, exp.Unnest) 1001 ): 1002 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 1003 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 1004 return super().join_sql(expression.on(exp.true())) 1005 1006 return super().join_sql(expression) 1007 1008 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 1009 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 1010 if expression.args.get("is_end_exclusive"): 1011 return rename_func("RANGE")(self, expression) 1012 1013 return self.function_fallback_sql(expression) 1014 1015 def countif_sql(self, expression: exp.CountIf) -> str: 1016 if self.dialect.version >= Version("1.2"): 1017 return self.function_fallback_sql(expression) 1018 1019 # https://github.com/tobymao/sqlglot/pull/4749 1020 return count_if_to_sum(self, expression) 1021 1022 def bracket_sql(self, expression: exp.Bracket) -> str: 1023 if self.dialect.version >= Version("1.2"): 1024 return super().bracket_sql(expression) 1025 1026 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1027 this = expression.this 1028 if isinstance(this, exp.Array): 1029 this.replace(exp.paren(this)) 1030 1031 bracket = super().bracket_sql(expression) 1032 1033 if not expression.args.get("returns_list_for_maps"): 1034 if not this.type: 1035 from sqlglot.optimizer.annotate_types import annotate_types 1036 1037 this = annotate_types(this, dialect=self.dialect) 1038 1039 if this.is_type(exp.DataType.Type.MAP): 1040 bracket = f"({bracket})[1]" 1041 1042 return bracket 1043 1044 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1045 expression_sql = self.sql(expression, "expression") 1046 1047 func = expression.this 1048 if isinstance(func, exp.PERCENTILES): 1049 # Make the order key the first arg and slide the fraction to the right 1050 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1051 order_col = expression.find(exp.Ordered) 1052 if order_col: 1053 func.set("expression", func.this) 1054 func.set("this", order_col.this) 1055 1056 this = self.sql(expression, "this").rstrip(")") 1057 1058 return f"{this}{expression_sql})" 1059 1060 def length_sql(self, expression: exp.Length) -> str: 1061 arg = expression.this 1062 1063 # Dialects like BQ and Snowflake also accept binary values as args, so 1064 # DDB will attempt to infer the type or resort to case/when resolution 1065 if not expression.args.get("binary") or arg.is_string: 1066 return self.func("LENGTH", arg) 1067 1068 if not arg.type: 1069 from sqlglot.optimizer.annotate_types import annotate_types 1070 1071 arg = annotate_types(arg, dialect=self.dialect) 1072 1073 if arg.is_type(*exp.DataType.TEXT_TYPES): 1074 return self.func("LENGTH", arg) 1075 1076 # We need these casts to make duckdb's static type checker happy 1077 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1078 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1079 1080 case = ( 1081 exp.case(self.func("TYPEOF", arg)) 1082 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1083 .else_( 1084 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1085 ) # anonymous to break length_sql recursion 1086 ) 1087 1088 return self.sql(case) 1089 1090 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1091 this = expression.this 1092 key = expression.args.get("key") 1093 key_sql = key.name if isinstance(key, exp.Expression) else "" 1094 value_sql = self.sql(expression, "value") 1095 1096 kv_sql = f"{key_sql} := {value_sql}" 1097 1098 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1099 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1100 if isinstance(this, exp.Struct) and not this.expressions: 1101 return self.func("STRUCT_PACK", kv_sql) 1102 1103 return self.func("STRUCT_INSERT", this, kv_sql) 1104 1105 def unnest_sql(self, expression: exp.Unnest) -> str: 1106 explode_array = expression.args.get("explode_array") 1107 if explode_array: 1108 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1109 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1110 expression.expressions.append( 1111 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1112 ) 1113 1114 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1115 alias = expression.args.get("alias") 1116 if isinstance(alias, exp.TableAlias): 1117 expression.set("alias", None) 1118 if alias.columns: 1119 alias = exp.TableAlias(this=seq_get(alias.columns, 0)) 1120 1121 unnest_sql = super().unnest_sql(expression) 1122 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1123 return self.sql(select) 1124 1125 return super().unnest_sql(expression) 1126 1127 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1128 this = expression.this 1129 1130 if isinstance(this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1131 # DuckDB should render IGNORE NULLS only for the general-purpose 1132 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1133 return super().ignorenulls_sql(expression) 1134 1135 if isinstance(this, exp.First): 1136 this = exp.AnyValue(this=this.this) 1137 1138 if not isinstance(this, exp.AnyValue): 1139 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1140 1141 return self.sql(this) 1142 1143 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1144 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1145 # DuckDB should render RESPECT NULLS only for the general-purpose 1146 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1147 return super().respectnulls_sql(expression) 1148 1149 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1150 return self.sql(expression, "this") 1151 1152 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1153 this = self.sql(expression, "this") 1154 null_text = self.sql(expression, "null") 1155 1156 if null_text: 1157 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1158 1159 return self.func("ARRAY_TO_STRING", this, expression.expression) 1160 1161 @unsupported_args("position", "occurrence") 1162 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1163 group = expression.args.get("group") 1164 params = expression.args.get("parameters") 1165 1166 # Do not render group if there is no following argument, 1167 # and it's the default value for this dialect 1168 if ( 1169 not params 1170 and group 1171 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1172 ): 1173 group = None 1174 return self.func( 1175 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1176 ) 1177 1178 @unsupported_args("culture") 1179 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1180 fmt = expression.args.get("format") 1181 if fmt and fmt.is_int: 1182 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1183 1184 self.unsupported("Only integer formats are supported by NumberToStr") 1185 return self.function_fallback_sql(expression) 1186 1187 def autoincrementcolumnconstraint_sql(self, _) -> str: 1188 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1189 return "" 1190 1191 def aliases_sql(self, expression: exp.Aliases) -> str: 1192 this = expression.this 1193 if isinstance(this, exp.Posexplode): 1194 return self.posexplode_sql(this) 1195 1196 return super().aliases_sql(expression) 1197 1198 def posexplode_sql(self, expression: exp.Posexplode) -> str: 1199 this = expression.this 1200 parent = expression.parent 1201 1202 # The default Spark aliases are "pos" and "col", unless specified otherwise 1203 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 1204 1205 if isinstance(parent, exp.Aliases): 1206 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 1207 pos, col = parent.expressions 1208 elif isinstance(parent, exp.Table): 1209 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 1210 alias = parent.args.get("alias") 1211 if alias: 1212 pos, col = alias.columns or [pos, col] 1213 alias.pop() 1214 1215 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 1216 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 1217 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 1218 gen_subscripts = self.sql( 1219 exp.Alias( 1220 this=exp.Anonymous( 1221 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 1222 ) 1223 - exp.Literal.number(1), 1224 alias=pos, 1225 ) 1226 ) 1227 1228 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 1229 1230 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 1231 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 1232 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 1233 1234 return posexplode_sql 1235 1236 def addmonths_sql(self, expression: exp.AddMonths) -> str: 1237 this = expression.this 1238 1239 if not this.type: 1240 from sqlglot.optimizer.annotate_types import annotate_types 1241 1242 this = annotate_types(this, dialect=self.dialect) 1243 1244 if this.is_type(*exp.DataType.TEXT_TYPES): 1245 this = exp.Cast(this=this, to=exp.DataType(this=exp.DataType.Type.TIMESTAMP)) 1246 1247 func = self.func( 1248 "DATE_ADD", this, exp.Interval(this=expression.expression, unit=exp.var("MONTH")) 1249 ) 1250 1251 # DuckDB's DATE_ADD function returns TIMESTAMP/DATETIME by default, even when the input is DATE 1252 # To match for example Snowflake's ADD_MONTHS behavior (which preserves the input type) 1253 # We need to cast the result back to the original type when the input is DATE or TIMESTAMPTZ 1254 # Example: ADD_MONTHS('2023-01-31'::date, 1) should return DATE, not TIMESTAMP 1255 if this.is_type(exp.DataType.Type.DATE, exp.DataType.Type.TIMESTAMPTZ): 1256 return self.sql(exp.Cast(this=func, to=this.type)) 1257 1258 return self.sql(func)
255class DuckDB(Dialect): 256 NULL_ORDERING = "nulls_are_last" 257 SUPPORTS_USER_DEFINED_TYPES = True 258 SAFE_DIVISION = True 259 INDEX_OFFSET = 1 260 CONCAT_COALESCE = True 261 SUPPORTS_ORDER_BY_ALL = True 262 SUPPORTS_FIXED_SIZE_ARRAYS = True 263 STRICT_JSON_PATH_SYNTAX = False 264 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 265 266 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 267 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 268 269 DATE_PART_MAPPING = { 270 **Dialect.DATE_PART_MAPPING, 271 "DAYOFWEEKISO": "ISODOW", 272 } 273 DATE_PART_MAPPING.pop("WEEKDAY") 274 275 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 276 if isinstance(path, exp.Literal): 277 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 278 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 279 # This check ensures we'll avoid trying to parse these as JSON paths, which can 280 # either result in a noisy warning or in an invalid representation of the path. 281 path_text = path.name 282 if path_text.startswith("/") or "[#" in path_text: 283 return path 284 285 return super().to_json_path(path) 286 287 class Tokenizer(tokens.Tokenizer): 288 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 289 HEREDOC_STRINGS = ["$"] 290 291 HEREDOC_TAG_IS_IDENTIFIER = True 292 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 293 294 KEYWORDS = { 295 **tokens.Tokenizer.KEYWORDS, 296 "//": TokenType.DIV, 297 "**": TokenType.DSTAR, 298 "^@": TokenType.CARET_AT, 299 "@>": TokenType.AT_GT, 300 "<@": TokenType.LT_AT, 301 "ATTACH": TokenType.ATTACH, 302 "BINARY": TokenType.VARBINARY, 303 "BITSTRING": TokenType.BIT, 304 "BPCHAR": TokenType.TEXT, 305 "CHAR": TokenType.TEXT, 306 "DATETIME": TokenType.TIMESTAMPNTZ, 307 "DETACH": TokenType.DETACH, 308 "LOGICAL": TokenType.BOOLEAN, 309 "ONLY": TokenType.ONLY, 310 "PIVOT_WIDER": TokenType.PIVOT, 311 "POSITIONAL": TokenType.POSITIONAL, 312 "RESET": TokenType.COMMAND, 313 "SIGNED": TokenType.INT, 314 "STRING": TokenType.TEXT, 315 "SUMMARIZE": TokenType.SUMMARIZE, 316 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 317 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 318 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 319 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 320 "TIMESTAMP_US": TokenType.TIMESTAMP, 321 "UBIGINT": TokenType.UBIGINT, 322 "UINTEGER": TokenType.UINT, 323 "USMALLINT": TokenType.USMALLINT, 324 "UTINYINT": TokenType.UTINYINT, 325 "VARCHAR": TokenType.TEXT, 326 } 327 KEYWORDS.pop("/*+") 328 329 SINGLE_TOKENS = { 330 **tokens.Tokenizer.SINGLE_TOKENS, 331 "$": TokenType.PARAMETER, 332 } 333 334 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 335 336 class Parser(parser.Parser): 337 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = True 338 339 BITWISE = { 340 **parser.Parser.BITWISE, 341 TokenType.TILDA: exp.RegexpLike, 342 } 343 BITWISE.pop(TokenType.CARET) 344 345 RANGE_PARSERS = { 346 **parser.Parser.RANGE_PARSERS, 347 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 348 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 349 } 350 351 EXPONENT = { 352 **parser.Parser.EXPONENT, 353 TokenType.CARET: exp.Pow, 354 TokenType.DSTAR: exp.Pow, 355 } 356 357 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 358 359 SHOW_PARSERS = { 360 "TABLES": _show_parser("TABLES"), 361 "ALL TABLES": _show_parser("ALL TABLES"), 362 } 363 364 FUNCTIONS = { 365 **parser.Parser.FUNCTIONS, 366 "ANY_VALUE": lambda args: exp.IgnoreNulls(this=exp.AnyValue.from_arg_list(args)), 367 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 368 "ARRAY_SORT": exp.SortArray.from_arg_list, 369 "DATEDIFF": _build_date_diff, 370 "DATE_DIFF": _build_date_diff, 371 "DATE_TRUNC": date_trunc_to_time, 372 "DATETRUNC": date_trunc_to_time, 373 "DECODE": lambda args: exp.Decode( 374 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 375 ), 376 "EDITDIST3": exp.Levenshtein.from_arg_list, 377 "ENCODE": lambda args: exp.Encode( 378 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 379 ), 380 "EPOCH": exp.TimeToUnix.from_arg_list, 381 "EPOCH_MS": lambda args: exp.UnixToTime( 382 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 383 ), 384 "GENERATE_SERIES": _build_generate_series(), 385 "JSON": exp.ParseJSON.from_arg_list, 386 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 387 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 388 "LIST_CONTAINS": exp.ArrayContains.from_arg_list, 389 "LIST_COSINE_DISTANCE": exp.CosineDistance.from_arg_list, 390 "LIST_DISTANCE": exp.EuclideanDistance.from_arg_list, 391 "LIST_FILTER": exp.ArrayFilter.from_arg_list, 392 "LIST_HAS": exp.ArrayContains.from_arg_list, 393 "LIST_HAS_ANY": exp.ArrayOverlaps.from_arg_list, 394 "LIST_REVERSE_SORT": _build_sort_array_desc, 395 "LIST_SORT": exp.SortArray.from_arg_list, 396 "LIST_TRANSFORM": exp.Transform.from_arg_list, 397 "LIST_VALUE": lambda args: exp.Array(expressions=args), 398 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 399 "MAKE_TIMESTAMP": _build_make_timestamp, 400 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 401 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 402 "RANGE": _build_generate_series(end_exclusive=True), 403 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 404 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 405 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 406 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 407 this=seq_get(args, 0), 408 expression=seq_get(args, 1), 409 replacement=seq_get(args, 2), 410 modifiers=seq_get(args, 3), 411 ), 412 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 413 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 414 "STRING_SPLIT": exp.Split.from_arg_list, 415 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 416 "STRING_TO_ARRAY": exp.Split.from_arg_list, 417 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 418 "STRUCT_PACK": exp.Struct.from_arg_list, 419 "STR_SPLIT": exp.Split.from_arg_list, 420 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 421 "TIME_BUCKET": exp.DateBin.from_arg_list, 422 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 423 "UNNEST": exp.Explode.from_arg_list, 424 "XOR": binary_from_function(exp.BitwiseXor), 425 } 426 427 FUNCTIONS.pop("DATE_SUB") 428 FUNCTIONS.pop("GLOB") 429 430 FUNCTION_PARSERS = { 431 **parser.Parser.FUNCTION_PARSERS, 432 **dict.fromkeys( 433 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 434 ), 435 } 436 FUNCTION_PARSERS.pop("DECODE") 437 438 NO_PAREN_FUNCTION_PARSERS = { 439 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 440 "MAP": lambda self: self._parse_map(), 441 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 442 } 443 444 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 445 TokenType.SEMI, 446 TokenType.ANTI, 447 } 448 449 PLACEHOLDER_PARSERS = { 450 **parser.Parser.PLACEHOLDER_PARSERS, 451 TokenType.PARAMETER: lambda self: ( 452 self.expression(exp.Placeholder, this=self._prev.text) 453 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 454 else None 455 ), 456 } 457 458 TYPE_CONVERTERS = { 459 # https://duckdb.org/docs/sql/data_types/numeric 460 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 461 # https://duckdb.org/docs/sql/data_types/text 462 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 463 } 464 465 STATEMENT_PARSERS = { 466 **parser.Parser.STATEMENT_PARSERS, 467 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 468 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 469 TokenType.SHOW: lambda self: self._parse_show(), 470 } 471 472 SET_PARSERS = { 473 **parser.Parser.SET_PARSERS, 474 "VARIABLE": lambda self: self._parse_set_item_assignment("VARIABLE"), 475 } 476 477 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 478 index = self._index 479 if not self._match_text_seq("LAMBDA"): 480 return super()._parse_lambda(alias=alias) 481 482 expressions = self._parse_csv(self._parse_lambda_arg) 483 if not self._match(TokenType.COLON): 484 self._retreat(index) 485 return None 486 487 this = self._replace_lambda(self._parse_assignment(), expressions) 488 return self.expression(exp.Lambda, this=this, expressions=expressions, colon=True) 489 490 def _parse_expression(self) -> t.Optional[exp.Expression]: 491 # DuckDB supports prefix aliases, e.g. foo: 1 492 if self._next and self._next.token_type == TokenType.COLON: 493 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 494 self._match(TokenType.COLON) 495 comments = self._prev_comments or [] 496 497 this = self._parse_assignment() 498 if isinstance(this, exp.Expression): 499 # Moves the comment next to the alias in `alias: expr /* comment */` 500 comments += this.pop_comments() or [] 501 502 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 503 504 return super()._parse_expression() 505 506 def _parse_table( 507 self, 508 schema: bool = False, 509 joins: bool = False, 510 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 511 parse_bracket: bool = False, 512 is_db_reference: bool = False, 513 parse_partition: bool = False, 514 consume_pipe: bool = False, 515 ) -> t.Optional[exp.Expression]: 516 # DuckDB supports prefix aliases, e.g. FROM foo: bar 517 if self._next and self._next.token_type == TokenType.COLON: 518 alias = self._parse_table_alias( 519 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 520 ) 521 self._match(TokenType.COLON) 522 comments = self._prev_comments or [] 523 else: 524 alias = None 525 comments = [] 526 527 table = super()._parse_table( 528 schema=schema, 529 joins=joins, 530 alias_tokens=alias_tokens, 531 parse_bracket=parse_bracket, 532 is_db_reference=is_db_reference, 533 parse_partition=parse_partition, 534 ) 535 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 536 # Moves the comment next to the alias in `alias: table /* comment */` 537 comments += table.pop_comments() or [] 538 alias.comments = alias.pop_comments() + comments 539 table.set("alias", alias) 540 541 return table 542 543 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 544 # https://duckdb.org/docs/sql/samples.html 545 sample = super()._parse_table_sample(as_modifier=as_modifier) 546 if sample and not sample.args.get("method"): 547 if sample.args.get("size"): 548 sample.set("method", exp.var("RESERVOIR")) 549 else: 550 sample.set("method", exp.var("SYSTEM")) 551 552 return sample 553 554 def _parse_bracket( 555 self, this: t.Optional[exp.Expression] = None 556 ) -> t.Optional[exp.Expression]: 557 bracket = super()._parse_bracket(this) 558 559 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 560 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 561 bracket.set("returns_list_for_maps", True) 562 563 return bracket 564 565 def _parse_map(self) -> exp.ToMap | exp.Map: 566 if self._match(TokenType.L_BRACE, advance=False): 567 return self.expression(exp.ToMap, this=self._parse_bracket()) 568 569 args = self._parse_wrapped_csv(self._parse_assignment) 570 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 571 572 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 573 return self._parse_field_def() 574 575 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 576 if len(aggregations) == 1: 577 return super()._pivot_column_names(aggregations) 578 return pivot_column_names(aggregations, dialect="duckdb") 579 580 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 581 def _parse_attach_option() -> exp.AttachOption: 582 return self.expression( 583 exp.AttachOption, 584 this=self._parse_var(any_token=True), 585 expression=self._parse_field(any_token=True), 586 ) 587 588 self._match(TokenType.DATABASE) 589 exists = self._parse_exists(not_=is_attach) 590 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 591 592 if self._match(TokenType.L_PAREN, advance=False): 593 expressions = self._parse_wrapped_csv(_parse_attach_option) 594 else: 595 expressions = None 596 597 return ( 598 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 599 if is_attach 600 else self.expression(exp.Detach, this=this, exists=exists) 601 ) 602 603 def _parse_show_duckdb(self, this: str) -> exp.Show: 604 return self.expression(exp.Show, this=this) 605 606 def _parse_primary(self) -> t.Optional[exp.Expression]: 607 if self._match_pair(TokenType.HASH, TokenType.NUMBER): 608 return exp.PositionalColumn(this=exp.Literal.number(self._prev.text)) 609 610 return super()._parse_primary() 611 612 class Generator(generator.Generator): 613 PARAMETER_TOKEN = "$" 614 NAMED_PLACEHOLDER_TOKEN = "$" 615 JOIN_HINTS = False 616 TABLE_HINTS = False 617 QUERY_HINTS = False 618 LIMIT_FETCH = "LIMIT" 619 STRUCT_DELIMITER = ("(", ")") 620 RENAME_TABLE_WITH_DB = False 621 NVL2_SUPPORTED = False 622 SEMI_ANTI_JOIN_WITH_SIDE = False 623 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 624 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 625 LAST_DAY_SUPPORTS_DATE_PART = False 626 JSON_KEY_VALUE_PAIR_SEP = "," 627 IGNORE_NULLS_IN_FUNC = True 628 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 629 SUPPORTS_CREATE_TABLE_LIKE = False 630 MULTI_ARG_DISTINCT = False 631 CAN_IMPLEMENT_ARRAY_ANY = True 632 SUPPORTS_TO_NUMBER = False 633 SUPPORTS_WINDOW_EXCLUDE = True 634 COPY_HAS_INTO_KEYWORD = False 635 STAR_EXCEPT = "EXCLUDE" 636 PAD_FILL_PATTERN_IS_REQUIRED = True 637 ARRAY_CONCAT_IS_VAR_LEN = False 638 ARRAY_SIZE_DIM_REQUIRED = False 639 NORMALIZE_EXTRACT_DATE_PARTS = True 640 SUPPORTS_LIKE_QUANTIFIERS = False 641 642 TRANSFORMS = { 643 **generator.Generator.TRANSFORMS, 644 exp.ApproxDistinct: approx_count_distinct_sql, 645 exp.Array: inline_array_unless_query, 646 exp.ArrayFilter: rename_func("LIST_FILTER"), 647 exp.ArrayRemove: remove_from_array_using_filter, 648 exp.ArraySort: _array_sort_sql, 649 exp.ArraySum: rename_func("LIST_SUM"), 650 exp.ArrayUniqueAgg: lambda self, e: self.func( 651 "LIST", exp.Distinct(expressions=[e.this]) 652 ), 653 exp.BitwiseXor: rename_func("XOR"), 654 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 655 exp.CosineDistance: rename_func("LIST_COSINE_DISTANCE"), 656 exp.CurrentDate: lambda *_: "CURRENT_DATE", 657 exp.CurrentTime: lambda *_: "CURRENT_TIME", 658 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 659 exp.DayOfMonth: rename_func("DAYOFMONTH"), 660 exp.DayOfWeek: rename_func("DAYOFWEEK"), 661 exp.DayOfWeekIso: rename_func("ISODOW"), 662 exp.DayOfYear: rename_func("DAYOFYEAR"), 663 exp.DataType: _datatype_sql, 664 exp.Date: _date_sql, 665 exp.DateAdd: date_delta_to_binary_interval_op(), 666 exp.DateFromParts: rename_func("MAKE_DATE"), 667 exp.DateSub: date_delta_to_binary_interval_op(), 668 exp.DateDiff: _date_diff_sql, 669 exp.DateStrToDate: datestrtodate_sql, 670 exp.Datetime: no_datetime_sql, 671 exp.DatetimeSub: date_delta_to_binary_interval_op(), 672 exp.DatetimeAdd: date_delta_to_binary_interval_op(), 673 exp.DateToDi: lambda self, 674 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 675 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 676 exp.DiToDate: lambda self, 677 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 678 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 679 exp.EuclideanDistance: rename_func("LIST_DISTANCE"), 680 exp.GenerateDateArray: _generate_datetime_array_sql, 681 exp.GenerateTimestampArray: _generate_datetime_array_sql, 682 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 683 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 684 exp.Explode: rename_func("UNNEST"), 685 exp.IntDiv: lambda self, e: self.binary(e, "//"), 686 exp.IsInf: rename_func("ISINF"), 687 exp.IsNan: rename_func("ISNAN"), 688 exp.JSONBExists: rename_func("JSON_EXISTS"), 689 exp.JSONExtract: _arrow_json_extract_sql, 690 exp.JSONExtractArray: _json_extract_value_array_sql, 691 exp.JSONExtractScalar: _arrow_json_extract_sql, 692 exp.JSONFormat: _json_format_sql, 693 exp.JSONValueArray: _json_extract_value_array_sql, 694 exp.Lateral: explode_to_unnest_sql, 695 exp.LogicalOr: rename_func("BOOL_OR"), 696 exp.LogicalAnd: rename_func("BOOL_AND"), 697 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 698 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 699 exp.MonthsBetween: lambda self, e: self.func( 700 "DATEDIFF", 701 "'month'", 702 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 703 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 704 ), 705 exp.PercentileCont: rename_func("QUANTILE_CONT"), 706 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 707 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 708 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 709 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 710 exp.RegexpReplace: lambda self, e: self.func( 711 "REGEXP_REPLACE", 712 e.this, 713 e.expression, 714 e.args.get("replacement"), 715 e.args.get("modifiers"), 716 ), 717 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 718 exp.RegexpILike: lambda self, e: self.func( 719 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 720 ), 721 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 722 exp.Return: lambda self, e: self.sql(e, "this"), 723 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 724 exp.Rand: rename_func("RANDOM"), 725 exp.SHA: rename_func("SHA1"), 726 exp.SHA2: sha256_sql, 727 exp.Split: rename_func("STR_SPLIT"), 728 exp.SortArray: _sort_array_sql, 729 exp.StrPosition: strposition_sql, 730 exp.StrToUnix: lambda self, e: self.func( 731 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 732 ), 733 exp.Struct: _struct_sql, 734 exp.Transform: rename_func("LIST_TRANSFORM"), 735 exp.TimeAdd: date_delta_to_binary_interval_op(), 736 exp.Time: no_time_sql, 737 exp.TimeDiff: _timediff_sql, 738 exp.Timestamp: no_timestamp_sql, 739 exp.TimestampDiff: lambda self, e: self.func( 740 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 741 ), 742 exp.TimestampTrunc: timestamptrunc_sql(), 743 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 744 exp.TimeStrToTime: timestrtotime_sql, 745 exp.TimeStrToUnix: lambda self, e: self.func( 746 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 747 ), 748 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 749 exp.TimeToUnix: rename_func("EPOCH"), 750 exp.TsOrDiToDi: lambda self, 751 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 752 exp.TsOrDsAdd: date_delta_to_binary_interval_op(), 753 exp.TsOrDsDiff: lambda self, e: self.func( 754 "DATE_DIFF", 755 f"'{e.args.get('unit') or 'DAY'}'", 756 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 757 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 758 ), 759 exp.UnixToStr: lambda self, e: self.func( 760 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 761 ), 762 exp.DatetimeTrunc: lambda self, e: self.func( 763 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 764 ), 765 exp.UnixToTime: _unix_to_time_sql, 766 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 767 exp.VariancePop: rename_func("VAR_POP"), 768 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 769 exp.Xor: bool_xor_sql, 770 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 771 rename_func("LEVENSHTEIN") 772 ), 773 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 774 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 775 exp.DateBin: rename_func("TIME_BUCKET"), 776 } 777 778 SUPPORTED_JSON_PATH_PARTS = { 779 exp.JSONPathKey, 780 exp.JSONPathRoot, 781 exp.JSONPathSubscript, 782 exp.JSONPathWildcard, 783 } 784 785 TYPE_MAPPING = { 786 **generator.Generator.TYPE_MAPPING, 787 exp.DataType.Type.BINARY: "BLOB", 788 exp.DataType.Type.BPCHAR: "TEXT", 789 exp.DataType.Type.CHAR: "TEXT", 790 exp.DataType.Type.DATETIME: "TIMESTAMP", 791 exp.DataType.Type.FLOAT: "REAL", 792 exp.DataType.Type.JSONB: "JSON", 793 exp.DataType.Type.NCHAR: "TEXT", 794 exp.DataType.Type.NVARCHAR: "TEXT", 795 exp.DataType.Type.UINT: "UINTEGER", 796 exp.DataType.Type.VARBINARY: "BLOB", 797 exp.DataType.Type.ROWVERSION: "BLOB", 798 exp.DataType.Type.VARCHAR: "TEXT", 799 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 800 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 801 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 802 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 803 } 804 805 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 806 RESERVED_KEYWORDS = { 807 "array", 808 "analyse", 809 "union", 810 "all", 811 "when", 812 "in_p", 813 "default", 814 "create_p", 815 "window", 816 "asymmetric", 817 "to", 818 "else", 819 "localtime", 820 "from", 821 "end_p", 822 "select", 823 "current_date", 824 "foreign", 825 "with", 826 "grant", 827 "session_user", 828 "or", 829 "except", 830 "references", 831 "fetch", 832 "limit", 833 "group_p", 834 "leading", 835 "into", 836 "collate", 837 "offset", 838 "do", 839 "then", 840 "localtimestamp", 841 "check_p", 842 "lateral_p", 843 "current_role", 844 "where", 845 "asc_p", 846 "placing", 847 "desc_p", 848 "user", 849 "unique", 850 "initially", 851 "column", 852 "both", 853 "some", 854 "as", 855 "any", 856 "only", 857 "deferrable", 858 "null_p", 859 "current_time", 860 "true_p", 861 "table", 862 "case", 863 "trailing", 864 "variadic", 865 "for", 866 "on", 867 "distinct", 868 "false_p", 869 "not", 870 "constraint", 871 "current_timestamp", 872 "returning", 873 "primary", 874 "intersect", 875 "having", 876 "analyze", 877 "current_user", 878 "and", 879 "cast", 880 "symmetric", 881 "using", 882 "order", 883 "current_catalog", 884 } 885 886 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 887 888 # DuckDB doesn't generally support CREATE TABLE .. properties 889 # https://duckdb.org/docs/sql/statements/create_table.html 890 PROPERTIES_LOCATION = { 891 prop: exp.Properties.Location.UNSUPPORTED 892 for prop in generator.Generator.PROPERTIES_LOCATION 893 } 894 895 # There are a few exceptions (e.g. temporary tables) which are supported or 896 # can be transpiled to DuckDB, so we explicitly override them accordingly 897 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 898 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 899 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 900 PROPERTIES_LOCATION[exp.SequenceProperties] = exp.Properties.Location.POST_EXPRESSION 901 902 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 903 exp.FirstValue, 904 exp.Lag, 905 exp.LastValue, 906 exp.Lead, 907 exp.NthValue, 908 ) 909 910 def lambda_sql( 911 self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True 912 ) -> str: 913 if expression.args.get("colon"): 914 prefix = "LAMBDA " 915 arrow_sep = ":" 916 wrap = False 917 else: 918 prefix = "" 919 920 lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap) 921 return f"{prefix}{lambda_sql}" 922 923 def show_sql(self, expression: exp.Show) -> str: 924 return f"SHOW {expression.name}" 925 926 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 927 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 928 929 def strtotime_sql(self, expression: exp.StrToTime) -> str: 930 if expression.args.get("safe"): 931 formatted_time = self.format_time(expression) 932 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 933 return str_to_time_sql(self, expression) 934 935 def strtodate_sql(self, expression: exp.StrToDate) -> str: 936 if expression.args.get("safe"): 937 formatted_time = self.format_time(expression) 938 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 939 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 940 941 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 942 arg = expression.this 943 if expression.args.get("safe"): 944 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 945 return self.func("JSON", arg) 946 947 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 948 nano = expression.args.get("nano") 949 if nano is not None: 950 expression.set( 951 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 952 ) 953 954 return rename_func("MAKE_TIME")(self, expression) 955 956 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 957 sec = expression.args["sec"] 958 959 milli = expression.args.get("milli") 960 if milli is not None: 961 sec += milli.pop() / exp.Literal.number(1000.0) 962 963 nano = expression.args.get("nano") 964 if nano is not None: 965 sec += nano.pop() / exp.Literal.number(1000000000.0) 966 967 if milli or nano: 968 expression.set("sec", sec) 969 970 return rename_func("MAKE_TIMESTAMP")(self, expression) 971 972 def tablesample_sql( 973 self, 974 expression: exp.TableSample, 975 tablesample_keyword: t.Optional[str] = None, 976 ) -> str: 977 if not isinstance(expression.parent, exp.Select): 978 # This sample clause only applies to a single source, not the entire resulting relation 979 tablesample_keyword = "TABLESAMPLE" 980 981 if expression.args.get("size"): 982 method = expression.args.get("method") 983 if method and method.name.upper() != "RESERVOIR": 984 self.unsupported( 985 f"Sampling method {method} is not supported with a discrete sample count, " 986 "defaulting to reservoir sampling" 987 ) 988 expression.set("method", exp.var("RESERVOIR")) 989 990 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 991 992 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 993 if isinstance(expression.parent, exp.UserDefinedFunction): 994 return self.sql(expression, "this") 995 return super().columndef_sql(expression, sep) 996 997 def join_sql(self, expression: exp.Join) -> str: 998 if ( 999 expression.side == "LEFT" 1000 and not expression.args.get("on") 1001 and isinstance(expression.this, exp.Unnest) 1002 ): 1003 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 1004 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 1005 return super().join_sql(expression.on(exp.true())) 1006 1007 return super().join_sql(expression) 1008 1009 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 1010 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 1011 if expression.args.get("is_end_exclusive"): 1012 return rename_func("RANGE")(self, expression) 1013 1014 return self.function_fallback_sql(expression) 1015 1016 def countif_sql(self, expression: exp.CountIf) -> str: 1017 if self.dialect.version >= Version("1.2"): 1018 return self.function_fallback_sql(expression) 1019 1020 # https://github.com/tobymao/sqlglot/pull/4749 1021 return count_if_to_sum(self, expression) 1022 1023 def bracket_sql(self, expression: exp.Bracket) -> str: 1024 if self.dialect.version >= Version("1.2"): 1025 return super().bracket_sql(expression) 1026 1027 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1028 this = expression.this 1029 if isinstance(this, exp.Array): 1030 this.replace(exp.paren(this)) 1031 1032 bracket = super().bracket_sql(expression) 1033 1034 if not expression.args.get("returns_list_for_maps"): 1035 if not this.type: 1036 from sqlglot.optimizer.annotate_types import annotate_types 1037 1038 this = annotate_types(this, dialect=self.dialect) 1039 1040 if this.is_type(exp.DataType.Type.MAP): 1041 bracket = f"({bracket})[1]" 1042 1043 return bracket 1044 1045 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1046 expression_sql = self.sql(expression, "expression") 1047 1048 func = expression.this 1049 if isinstance(func, exp.PERCENTILES): 1050 # Make the order key the first arg and slide the fraction to the right 1051 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1052 order_col = expression.find(exp.Ordered) 1053 if order_col: 1054 func.set("expression", func.this) 1055 func.set("this", order_col.this) 1056 1057 this = self.sql(expression, "this").rstrip(")") 1058 1059 return f"{this}{expression_sql})" 1060 1061 def length_sql(self, expression: exp.Length) -> str: 1062 arg = expression.this 1063 1064 # Dialects like BQ and Snowflake also accept binary values as args, so 1065 # DDB will attempt to infer the type or resort to case/when resolution 1066 if not expression.args.get("binary") or arg.is_string: 1067 return self.func("LENGTH", arg) 1068 1069 if not arg.type: 1070 from sqlglot.optimizer.annotate_types import annotate_types 1071 1072 arg = annotate_types(arg, dialect=self.dialect) 1073 1074 if arg.is_type(*exp.DataType.TEXT_TYPES): 1075 return self.func("LENGTH", arg) 1076 1077 # We need these casts to make duckdb's static type checker happy 1078 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1079 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1080 1081 case = ( 1082 exp.case(self.func("TYPEOF", arg)) 1083 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1084 .else_( 1085 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1086 ) # anonymous to break length_sql recursion 1087 ) 1088 1089 return self.sql(case) 1090 1091 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1092 this = expression.this 1093 key = expression.args.get("key") 1094 key_sql = key.name if isinstance(key, exp.Expression) else "" 1095 value_sql = self.sql(expression, "value") 1096 1097 kv_sql = f"{key_sql} := {value_sql}" 1098 1099 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1100 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1101 if isinstance(this, exp.Struct) and not this.expressions: 1102 return self.func("STRUCT_PACK", kv_sql) 1103 1104 return self.func("STRUCT_INSERT", this, kv_sql) 1105 1106 def unnest_sql(self, expression: exp.Unnest) -> str: 1107 explode_array = expression.args.get("explode_array") 1108 if explode_array: 1109 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1110 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1111 expression.expressions.append( 1112 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1113 ) 1114 1115 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1116 alias = expression.args.get("alias") 1117 if isinstance(alias, exp.TableAlias): 1118 expression.set("alias", None) 1119 if alias.columns: 1120 alias = exp.TableAlias(this=seq_get(alias.columns, 0)) 1121 1122 unnest_sql = super().unnest_sql(expression) 1123 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1124 return self.sql(select) 1125 1126 return super().unnest_sql(expression) 1127 1128 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1129 this = expression.this 1130 1131 if isinstance(this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1132 # DuckDB should render IGNORE NULLS only for the general-purpose 1133 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1134 return super().ignorenulls_sql(expression) 1135 1136 if isinstance(this, exp.First): 1137 this = exp.AnyValue(this=this.this) 1138 1139 if not isinstance(this, exp.AnyValue): 1140 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1141 1142 return self.sql(this) 1143 1144 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1145 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1146 # DuckDB should render RESPECT NULLS only for the general-purpose 1147 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1148 return super().respectnulls_sql(expression) 1149 1150 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1151 return self.sql(expression, "this") 1152 1153 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1154 this = self.sql(expression, "this") 1155 null_text = self.sql(expression, "null") 1156 1157 if null_text: 1158 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1159 1160 return self.func("ARRAY_TO_STRING", this, expression.expression) 1161 1162 @unsupported_args("position", "occurrence") 1163 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1164 group = expression.args.get("group") 1165 params = expression.args.get("parameters") 1166 1167 # Do not render group if there is no following argument, 1168 # and it's the default value for this dialect 1169 if ( 1170 not params 1171 and group 1172 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1173 ): 1174 group = None 1175 return self.func( 1176 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1177 ) 1178 1179 @unsupported_args("culture") 1180 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1181 fmt = expression.args.get("format") 1182 if fmt and fmt.is_int: 1183 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1184 1185 self.unsupported("Only integer formats are supported by NumberToStr") 1186 return self.function_fallback_sql(expression) 1187 1188 def autoincrementcolumnconstraint_sql(self, _) -> str: 1189 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1190 return "" 1191 1192 def aliases_sql(self, expression: exp.Aliases) -> str: 1193 this = expression.this 1194 if isinstance(this, exp.Posexplode): 1195 return self.posexplode_sql(this) 1196 1197 return super().aliases_sql(expression) 1198 1199 def posexplode_sql(self, expression: exp.Posexplode) -> str: 1200 this = expression.this 1201 parent = expression.parent 1202 1203 # The default Spark aliases are "pos" and "col", unless specified otherwise 1204 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 1205 1206 if isinstance(parent, exp.Aliases): 1207 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 1208 pos, col = parent.expressions 1209 elif isinstance(parent, exp.Table): 1210 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 1211 alias = parent.args.get("alias") 1212 if alias: 1213 pos, col = alias.columns or [pos, col] 1214 alias.pop() 1215 1216 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 1217 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 1218 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 1219 gen_subscripts = self.sql( 1220 exp.Alias( 1221 this=exp.Anonymous( 1222 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 1223 ) 1224 - exp.Literal.number(1), 1225 alias=pos, 1226 ) 1227 ) 1228 1229 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 1230 1231 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 1232 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 1233 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 1234 1235 return posexplode_sql 1236 1237 def addmonths_sql(self, expression: exp.AddMonths) -> str: 1238 this = expression.this 1239 1240 if not this.type: 1241 from sqlglot.optimizer.annotate_types import annotate_types 1242 1243 this = annotate_types(this, dialect=self.dialect) 1244 1245 if this.is_type(*exp.DataType.TEXT_TYPES): 1246 this = exp.Cast(this=this, to=exp.DataType(this=exp.DataType.Type.TIMESTAMP)) 1247 1248 func = self.func( 1249 "DATE_ADD", this, exp.Interval(this=expression.expression, unit=exp.var("MONTH")) 1250 ) 1251 1252 # DuckDB's DATE_ADD function returns TIMESTAMP/DATETIME by default, even when the input is DATE 1253 # To match for example Snowflake's ADD_MONTHS behavior (which preserves the input type) 1254 # We need to cast the result back to the original type when the input is DATE or TIMESTAMPTZ 1255 # Example: ADD_MONTHS('2023-01-31'::date, 1) should return DATE, not TIMESTAMP 1256 if this.is_type(exp.DataType.Type.DATE, exp.DataType.Type.TIMESTAMPTZ): 1257 return self.sql(exp.Cast(this=func, to=this.type)) 1258 1259 return self.sql(func)
Default NULL ordering method to use if not explicitly set.
Possible values: "nulls_are_small", "nulls_are_large", "nulls_are_last"
A NULL arg in CONCAT yields NULL by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator.
Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.
Whether number literals can include underscores for better readability
Specifies the strategy according to which identifiers should be normalized.
275 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 276 if isinstance(path, exp.Literal): 277 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 278 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 279 # This check ensures we'll avoid trying to parse these as JSON paths, which can 280 # either result in a noisy warning or in an invalid representation of the path. 281 path_text = path.name 282 if path_text.startswith("/") or "[#" in path_text: 283 return path 284 285 return super().to_json_path(path)
287 class Tokenizer(tokens.Tokenizer): 288 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 289 HEREDOC_STRINGS = ["$"] 290 291 HEREDOC_TAG_IS_IDENTIFIER = True 292 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 293 294 KEYWORDS = { 295 **tokens.Tokenizer.KEYWORDS, 296 "//": TokenType.DIV, 297 "**": TokenType.DSTAR, 298 "^@": TokenType.CARET_AT, 299 "@>": TokenType.AT_GT, 300 "<@": TokenType.LT_AT, 301 "ATTACH": TokenType.ATTACH, 302 "BINARY": TokenType.VARBINARY, 303 "BITSTRING": TokenType.BIT, 304 "BPCHAR": TokenType.TEXT, 305 "CHAR": TokenType.TEXT, 306 "DATETIME": TokenType.TIMESTAMPNTZ, 307 "DETACH": TokenType.DETACH, 308 "LOGICAL": TokenType.BOOLEAN, 309 "ONLY": TokenType.ONLY, 310 "PIVOT_WIDER": TokenType.PIVOT, 311 "POSITIONAL": TokenType.POSITIONAL, 312 "RESET": TokenType.COMMAND, 313 "SIGNED": TokenType.INT, 314 "STRING": TokenType.TEXT, 315 "SUMMARIZE": TokenType.SUMMARIZE, 316 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 317 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 318 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 319 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 320 "TIMESTAMP_US": TokenType.TIMESTAMP, 321 "UBIGINT": TokenType.UBIGINT, 322 "UINTEGER": TokenType.UINT, 323 "USMALLINT": TokenType.USMALLINT, 324 "UTINYINT": TokenType.UTINYINT, 325 "VARCHAR": TokenType.TEXT, 326 } 327 KEYWORDS.pop("/*+") 328 329 SINGLE_TOKENS = { 330 **tokens.Tokenizer.SINGLE_TOKENS, 331 "$": TokenType.PARAMETER, 332 } 333 334 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
336 class Parser(parser.Parser): 337 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = True 338 339 BITWISE = { 340 **parser.Parser.BITWISE, 341 TokenType.TILDA: exp.RegexpLike, 342 } 343 BITWISE.pop(TokenType.CARET) 344 345 RANGE_PARSERS = { 346 **parser.Parser.RANGE_PARSERS, 347 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 348 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 349 } 350 351 EXPONENT = { 352 **parser.Parser.EXPONENT, 353 TokenType.CARET: exp.Pow, 354 TokenType.DSTAR: exp.Pow, 355 } 356 357 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 358 359 SHOW_PARSERS = { 360 "TABLES": _show_parser("TABLES"), 361 "ALL TABLES": _show_parser("ALL TABLES"), 362 } 363 364 FUNCTIONS = { 365 **parser.Parser.FUNCTIONS, 366 "ANY_VALUE": lambda args: exp.IgnoreNulls(this=exp.AnyValue.from_arg_list(args)), 367 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 368 "ARRAY_SORT": exp.SortArray.from_arg_list, 369 "DATEDIFF": _build_date_diff, 370 "DATE_DIFF": _build_date_diff, 371 "DATE_TRUNC": date_trunc_to_time, 372 "DATETRUNC": date_trunc_to_time, 373 "DECODE": lambda args: exp.Decode( 374 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 375 ), 376 "EDITDIST3": exp.Levenshtein.from_arg_list, 377 "ENCODE": lambda args: exp.Encode( 378 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 379 ), 380 "EPOCH": exp.TimeToUnix.from_arg_list, 381 "EPOCH_MS": lambda args: exp.UnixToTime( 382 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 383 ), 384 "GENERATE_SERIES": _build_generate_series(), 385 "JSON": exp.ParseJSON.from_arg_list, 386 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 387 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 388 "LIST_CONTAINS": exp.ArrayContains.from_arg_list, 389 "LIST_COSINE_DISTANCE": exp.CosineDistance.from_arg_list, 390 "LIST_DISTANCE": exp.EuclideanDistance.from_arg_list, 391 "LIST_FILTER": exp.ArrayFilter.from_arg_list, 392 "LIST_HAS": exp.ArrayContains.from_arg_list, 393 "LIST_HAS_ANY": exp.ArrayOverlaps.from_arg_list, 394 "LIST_REVERSE_SORT": _build_sort_array_desc, 395 "LIST_SORT": exp.SortArray.from_arg_list, 396 "LIST_TRANSFORM": exp.Transform.from_arg_list, 397 "LIST_VALUE": lambda args: exp.Array(expressions=args), 398 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 399 "MAKE_TIMESTAMP": _build_make_timestamp, 400 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 401 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 402 "RANGE": _build_generate_series(end_exclusive=True), 403 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 404 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 405 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 406 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 407 this=seq_get(args, 0), 408 expression=seq_get(args, 1), 409 replacement=seq_get(args, 2), 410 modifiers=seq_get(args, 3), 411 ), 412 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 413 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 414 "STRING_SPLIT": exp.Split.from_arg_list, 415 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 416 "STRING_TO_ARRAY": exp.Split.from_arg_list, 417 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 418 "STRUCT_PACK": exp.Struct.from_arg_list, 419 "STR_SPLIT": exp.Split.from_arg_list, 420 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 421 "TIME_BUCKET": exp.DateBin.from_arg_list, 422 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 423 "UNNEST": exp.Explode.from_arg_list, 424 "XOR": binary_from_function(exp.BitwiseXor), 425 } 426 427 FUNCTIONS.pop("DATE_SUB") 428 FUNCTIONS.pop("GLOB") 429 430 FUNCTION_PARSERS = { 431 **parser.Parser.FUNCTION_PARSERS, 432 **dict.fromkeys( 433 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 434 ), 435 } 436 FUNCTION_PARSERS.pop("DECODE") 437 438 NO_PAREN_FUNCTION_PARSERS = { 439 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 440 "MAP": lambda self: self._parse_map(), 441 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 442 } 443 444 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 445 TokenType.SEMI, 446 TokenType.ANTI, 447 } 448 449 PLACEHOLDER_PARSERS = { 450 **parser.Parser.PLACEHOLDER_PARSERS, 451 TokenType.PARAMETER: lambda self: ( 452 self.expression(exp.Placeholder, this=self._prev.text) 453 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 454 else None 455 ), 456 } 457 458 TYPE_CONVERTERS = { 459 # https://duckdb.org/docs/sql/data_types/numeric 460 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 461 # https://duckdb.org/docs/sql/data_types/text 462 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 463 } 464 465 STATEMENT_PARSERS = { 466 **parser.Parser.STATEMENT_PARSERS, 467 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 468 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 469 TokenType.SHOW: lambda self: self._parse_show(), 470 } 471 472 SET_PARSERS = { 473 **parser.Parser.SET_PARSERS, 474 "VARIABLE": lambda self: self._parse_set_item_assignment("VARIABLE"), 475 } 476 477 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 478 index = self._index 479 if not self._match_text_seq("LAMBDA"): 480 return super()._parse_lambda(alias=alias) 481 482 expressions = self._parse_csv(self._parse_lambda_arg) 483 if not self._match(TokenType.COLON): 484 self._retreat(index) 485 return None 486 487 this = self._replace_lambda(self._parse_assignment(), expressions) 488 return self.expression(exp.Lambda, this=this, expressions=expressions, colon=True) 489 490 def _parse_expression(self) -> t.Optional[exp.Expression]: 491 # DuckDB supports prefix aliases, e.g. foo: 1 492 if self._next and self._next.token_type == TokenType.COLON: 493 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 494 self._match(TokenType.COLON) 495 comments = self._prev_comments or [] 496 497 this = self._parse_assignment() 498 if isinstance(this, exp.Expression): 499 # Moves the comment next to the alias in `alias: expr /* comment */` 500 comments += this.pop_comments() or [] 501 502 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 503 504 return super()._parse_expression() 505 506 def _parse_table( 507 self, 508 schema: bool = False, 509 joins: bool = False, 510 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 511 parse_bracket: bool = False, 512 is_db_reference: bool = False, 513 parse_partition: bool = False, 514 consume_pipe: bool = False, 515 ) -> t.Optional[exp.Expression]: 516 # DuckDB supports prefix aliases, e.g. FROM foo: bar 517 if self._next and self._next.token_type == TokenType.COLON: 518 alias = self._parse_table_alias( 519 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 520 ) 521 self._match(TokenType.COLON) 522 comments = self._prev_comments or [] 523 else: 524 alias = None 525 comments = [] 526 527 table = super()._parse_table( 528 schema=schema, 529 joins=joins, 530 alias_tokens=alias_tokens, 531 parse_bracket=parse_bracket, 532 is_db_reference=is_db_reference, 533 parse_partition=parse_partition, 534 ) 535 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 536 # Moves the comment next to the alias in `alias: table /* comment */` 537 comments += table.pop_comments() or [] 538 alias.comments = alias.pop_comments() + comments 539 table.set("alias", alias) 540 541 return table 542 543 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 544 # https://duckdb.org/docs/sql/samples.html 545 sample = super()._parse_table_sample(as_modifier=as_modifier) 546 if sample and not sample.args.get("method"): 547 if sample.args.get("size"): 548 sample.set("method", exp.var("RESERVOIR")) 549 else: 550 sample.set("method", exp.var("SYSTEM")) 551 552 return sample 553 554 def _parse_bracket( 555 self, this: t.Optional[exp.Expression] = None 556 ) -> t.Optional[exp.Expression]: 557 bracket = super()._parse_bracket(this) 558 559 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 560 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 561 bracket.set("returns_list_for_maps", True) 562 563 return bracket 564 565 def _parse_map(self) -> exp.ToMap | exp.Map: 566 if self._match(TokenType.L_BRACE, advance=False): 567 return self.expression(exp.ToMap, this=self._parse_bracket()) 568 569 args = self._parse_wrapped_csv(self._parse_assignment) 570 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 571 572 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 573 return self._parse_field_def() 574 575 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 576 if len(aggregations) == 1: 577 return super()._pivot_column_names(aggregations) 578 return pivot_column_names(aggregations, dialect="duckdb") 579 580 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 581 def _parse_attach_option() -> exp.AttachOption: 582 return self.expression( 583 exp.AttachOption, 584 this=self._parse_var(any_token=True), 585 expression=self._parse_field(any_token=True), 586 ) 587 588 self._match(TokenType.DATABASE) 589 exists = self._parse_exists(not_=is_attach) 590 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 591 592 if self._match(TokenType.L_PAREN, advance=False): 593 expressions = self._parse_wrapped_csv(_parse_attach_option) 594 else: 595 expressions = None 596 597 return ( 598 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 599 if is_attach 600 else self.expression(exp.Detach, this=this, exists=exists) 601 ) 602 603 def _parse_show_duckdb(self, this: str) -> exp.Show: 604 return self.expression(exp.Show, this=this) 605 606 def _parse_primary(self) -> t.Optional[exp.Expression]: 607 if self._match_pair(TokenType.HASH, TokenType.NUMBER): 608 return exp.PositionalColumn(this=exp.Literal.number(self._prev.text)) 609 610 return super()._parse_primary()
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- JOINS_HAVE_EQUAL_PRECEDENCE
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- JSON_EXTRACT_REQUIRES_JSON_EXPRESSION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- build_cast
- errors
- sql
612 class Generator(generator.Generator): 613 PARAMETER_TOKEN = "$" 614 NAMED_PLACEHOLDER_TOKEN = "$" 615 JOIN_HINTS = False 616 TABLE_HINTS = False 617 QUERY_HINTS = False 618 LIMIT_FETCH = "LIMIT" 619 STRUCT_DELIMITER = ("(", ")") 620 RENAME_TABLE_WITH_DB = False 621 NVL2_SUPPORTED = False 622 SEMI_ANTI_JOIN_WITH_SIDE = False 623 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 624 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 625 LAST_DAY_SUPPORTS_DATE_PART = False 626 JSON_KEY_VALUE_PAIR_SEP = "," 627 IGNORE_NULLS_IN_FUNC = True 628 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 629 SUPPORTS_CREATE_TABLE_LIKE = False 630 MULTI_ARG_DISTINCT = False 631 CAN_IMPLEMENT_ARRAY_ANY = True 632 SUPPORTS_TO_NUMBER = False 633 SUPPORTS_WINDOW_EXCLUDE = True 634 COPY_HAS_INTO_KEYWORD = False 635 STAR_EXCEPT = "EXCLUDE" 636 PAD_FILL_PATTERN_IS_REQUIRED = True 637 ARRAY_CONCAT_IS_VAR_LEN = False 638 ARRAY_SIZE_DIM_REQUIRED = False 639 NORMALIZE_EXTRACT_DATE_PARTS = True 640 SUPPORTS_LIKE_QUANTIFIERS = False 641 642 TRANSFORMS = { 643 **generator.Generator.TRANSFORMS, 644 exp.ApproxDistinct: approx_count_distinct_sql, 645 exp.Array: inline_array_unless_query, 646 exp.ArrayFilter: rename_func("LIST_FILTER"), 647 exp.ArrayRemove: remove_from_array_using_filter, 648 exp.ArraySort: _array_sort_sql, 649 exp.ArraySum: rename_func("LIST_SUM"), 650 exp.ArrayUniqueAgg: lambda self, e: self.func( 651 "LIST", exp.Distinct(expressions=[e.this]) 652 ), 653 exp.BitwiseXor: rename_func("XOR"), 654 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 655 exp.CosineDistance: rename_func("LIST_COSINE_DISTANCE"), 656 exp.CurrentDate: lambda *_: "CURRENT_DATE", 657 exp.CurrentTime: lambda *_: "CURRENT_TIME", 658 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 659 exp.DayOfMonth: rename_func("DAYOFMONTH"), 660 exp.DayOfWeek: rename_func("DAYOFWEEK"), 661 exp.DayOfWeekIso: rename_func("ISODOW"), 662 exp.DayOfYear: rename_func("DAYOFYEAR"), 663 exp.DataType: _datatype_sql, 664 exp.Date: _date_sql, 665 exp.DateAdd: date_delta_to_binary_interval_op(), 666 exp.DateFromParts: rename_func("MAKE_DATE"), 667 exp.DateSub: date_delta_to_binary_interval_op(), 668 exp.DateDiff: _date_diff_sql, 669 exp.DateStrToDate: datestrtodate_sql, 670 exp.Datetime: no_datetime_sql, 671 exp.DatetimeSub: date_delta_to_binary_interval_op(), 672 exp.DatetimeAdd: date_delta_to_binary_interval_op(), 673 exp.DateToDi: lambda self, 674 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 675 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 676 exp.DiToDate: lambda self, 677 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 678 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 679 exp.EuclideanDistance: rename_func("LIST_DISTANCE"), 680 exp.GenerateDateArray: _generate_datetime_array_sql, 681 exp.GenerateTimestampArray: _generate_datetime_array_sql, 682 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 683 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 684 exp.Explode: rename_func("UNNEST"), 685 exp.IntDiv: lambda self, e: self.binary(e, "//"), 686 exp.IsInf: rename_func("ISINF"), 687 exp.IsNan: rename_func("ISNAN"), 688 exp.JSONBExists: rename_func("JSON_EXISTS"), 689 exp.JSONExtract: _arrow_json_extract_sql, 690 exp.JSONExtractArray: _json_extract_value_array_sql, 691 exp.JSONExtractScalar: _arrow_json_extract_sql, 692 exp.JSONFormat: _json_format_sql, 693 exp.JSONValueArray: _json_extract_value_array_sql, 694 exp.Lateral: explode_to_unnest_sql, 695 exp.LogicalOr: rename_func("BOOL_OR"), 696 exp.LogicalAnd: rename_func("BOOL_AND"), 697 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 698 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 699 exp.MonthsBetween: lambda self, e: self.func( 700 "DATEDIFF", 701 "'month'", 702 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 703 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 704 ), 705 exp.PercentileCont: rename_func("QUANTILE_CONT"), 706 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 707 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 708 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 709 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 710 exp.RegexpReplace: lambda self, e: self.func( 711 "REGEXP_REPLACE", 712 e.this, 713 e.expression, 714 e.args.get("replacement"), 715 e.args.get("modifiers"), 716 ), 717 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 718 exp.RegexpILike: lambda self, e: self.func( 719 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 720 ), 721 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 722 exp.Return: lambda self, e: self.sql(e, "this"), 723 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 724 exp.Rand: rename_func("RANDOM"), 725 exp.SHA: rename_func("SHA1"), 726 exp.SHA2: sha256_sql, 727 exp.Split: rename_func("STR_SPLIT"), 728 exp.SortArray: _sort_array_sql, 729 exp.StrPosition: strposition_sql, 730 exp.StrToUnix: lambda self, e: self.func( 731 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 732 ), 733 exp.Struct: _struct_sql, 734 exp.Transform: rename_func("LIST_TRANSFORM"), 735 exp.TimeAdd: date_delta_to_binary_interval_op(), 736 exp.Time: no_time_sql, 737 exp.TimeDiff: _timediff_sql, 738 exp.Timestamp: no_timestamp_sql, 739 exp.TimestampDiff: lambda self, e: self.func( 740 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 741 ), 742 exp.TimestampTrunc: timestamptrunc_sql(), 743 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 744 exp.TimeStrToTime: timestrtotime_sql, 745 exp.TimeStrToUnix: lambda self, e: self.func( 746 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 747 ), 748 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 749 exp.TimeToUnix: rename_func("EPOCH"), 750 exp.TsOrDiToDi: lambda self, 751 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 752 exp.TsOrDsAdd: date_delta_to_binary_interval_op(), 753 exp.TsOrDsDiff: lambda self, e: self.func( 754 "DATE_DIFF", 755 f"'{e.args.get('unit') or 'DAY'}'", 756 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 757 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 758 ), 759 exp.UnixToStr: lambda self, e: self.func( 760 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 761 ), 762 exp.DatetimeTrunc: lambda self, e: self.func( 763 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 764 ), 765 exp.UnixToTime: _unix_to_time_sql, 766 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 767 exp.VariancePop: rename_func("VAR_POP"), 768 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 769 exp.Xor: bool_xor_sql, 770 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 771 rename_func("LEVENSHTEIN") 772 ), 773 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 774 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 775 exp.DateBin: rename_func("TIME_BUCKET"), 776 } 777 778 SUPPORTED_JSON_PATH_PARTS = { 779 exp.JSONPathKey, 780 exp.JSONPathRoot, 781 exp.JSONPathSubscript, 782 exp.JSONPathWildcard, 783 } 784 785 TYPE_MAPPING = { 786 **generator.Generator.TYPE_MAPPING, 787 exp.DataType.Type.BINARY: "BLOB", 788 exp.DataType.Type.BPCHAR: "TEXT", 789 exp.DataType.Type.CHAR: "TEXT", 790 exp.DataType.Type.DATETIME: "TIMESTAMP", 791 exp.DataType.Type.FLOAT: "REAL", 792 exp.DataType.Type.JSONB: "JSON", 793 exp.DataType.Type.NCHAR: "TEXT", 794 exp.DataType.Type.NVARCHAR: "TEXT", 795 exp.DataType.Type.UINT: "UINTEGER", 796 exp.DataType.Type.VARBINARY: "BLOB", 797 exp.DataType.Type.ROWVERSION: "BLOB", 798 exp.DataType.Type.VARCHAR: "TEXT", 799 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 800 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 801 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 802 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 803 } 804 805 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 806 RESERVED_KEYWORDS = { 807 "array", 808 "analyse", 809 "union", 810 "all", 811 "when", 812 "in_p", 813 "default", 814 "create_p", 815 "window", 816 "asymmetric", 817 "to", 818 "else", 819 "localtime", 820 "from", 821 "end_p", 822 "select", 823 "current_date", 824 "foreign", 825 "with", 826 "grant", 827 "session_user", 828 "or", 829 "except", 830 "references", 831 "fetch", 832 "limit", 833 "group_p", 834 "leading", 835 "into", 836 "collate", 837 "offset", 838 "do", 839 "then", 840 "localtimestamp", 841 "check_p", 842 "lateral_p", 843 "current_role", 844 "where", 845 "asc_p", 846 "placing", 847 "desc_p", 848 "user", 849 "unique", 850 "initially", 851 "column", 852 "both", 853 "some", 854 "as", 855 "any", 856 "only", 857 "deferrable", 858 "null_p", 859 "current_time", 860 "true_p", 861 "table", 862 "case", 863 "trailing", 864 "variadic", 865 "for", 866 "on", 867 "distinct", 868 "false_p", 869 "not", 870 "constraint", 871 "current_timestamp", 872 "returning", 873 "primary", 874 "intersect", 875 "having", 876 "analyze", 877 "current_user", 878 "and", 879 "cast", 880 "symmetric", 881 "using", 882 "order", 883 "current_catalog", 884 } 885 886 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 887 888 # DuckDB doesn't generally support CREATE TABLE .. properties 889 # https://duckdb.org/docs/sql/statements/create_table.html 890 PROPERTIES_LOCATION = { 891 prop: exp.Properties.Location.UNSUPPORTED 892 for prop in generator.Generator.PROPERTIES_LOCATION 893 } 894 895 # There are a few exceptions (e.g. temporary tables) which are supported or 896 # can be transpiled to DuckDB, so we explicitly override them accordingly 897 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 898 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 899 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 900 PROPERTIES_LOCATION[exp.SequenceProperties] = exp.Properties.Location.POST_EXPRESSION 901 902 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 903 exp.FirstValue, 904 exp.Lag, 905 exp.LastValue, 906 exp.Lead, 907 exp.NthValue, 908 ) 909 910 def lambda_sql( 911 self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True 912 ) -> str: 913 if expression.args.get("colon"): 914 prefix = "LAMBDA " 915 arrow_sep = ":" 916 wrap = False 917 else: 918 prefix = "" 919 920 lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap) 921 return f"{prefix}{lambda_sql}" 922 923 def show_sql(self, expression: exp.Show) -> str: 924 return f"SHOW {expression.name}" 925 926 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 927 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 928 929 def strtotime_sql(self, expression: exp.StrToTime) -> str: 930 if expression.args.get("safe"): 931 formatted_time = self.format_time(expression) 932 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 933 return str_to_time_sql(self, expression) 934 935 def strtodate_sql(self, expression: exp.StrToDate) -> str: 936 if expression.args.get("safe"): 937 formatted_time = self.format_time(expression) 938 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 939 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 940 941 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 942 arg = expression.this 943 if expression.args.get("safe"): 944 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 945 return self.func("JSON", arg) 946 947 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 948 nano = expression.args.get("nano") 949 if nano is not None: 950 expression.set( 951 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 952 ) 953 954 return rename_func("MAKE_TIME")(self, expression) 955 956 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 957 sec = expression.args["sec"] 958 959 milli = expression.args.get("milli") 960 if milli is not None: 961 sec += milli.pop() / exp.Literal.number(1000.0) 962 963 nano = expression.args.get("nano") 964 if nano is not None: 965 sec += nano.pop() / exp.Literal.number(1000000000.0) 966 967 if milli or nano: 968 expression.set("sec", sec) 969 970 return rename_func("MAKE_TIMESTAMP")(self, expression) 971 972 def tablesample_sql( 973 self, 974 expression: exp.TableSample, 975 tablesample_keyword: t.Optional[str] = None, 976 ) -> str: 977 if not isinstance(expression.parent, exp.Select): 978 # This sample clause only applies to a single source, not the entire resulting relation 979 tablesample_keyword = "TABLESAMPLE" 980 981 if expression.args.get("size"): 982 method = expression.args.get("method") 983 if method and method.name.upper() != "RESERVOIR": 984 self.unsupported( 985 f"Sampling method {method} is not supported with a discrete sample count, " 986 "defaulting to reservoir sampling" 987 ) 988 expression.set("method", exp.var("RESERVOIR")) 989 990 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 991 992 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 993 if isinstance(expression.parent, exp.UserDefinedFunction): 994 return self.sql(expression, "this") 995 return super().columndef_sql(expression, sep) 996 997 def join_sql(self, expression: exp.Join) -> str: 998 if ( 999 expression.side == "LEFT" 1000 and not expression.args.get("on") 1001 and isinstance(expression.this, exp.Unnest) 1002 ): 1003 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 1004 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 1005 return super().join_sql(expression.on(exp.true())) 1006 1007 return super().join_sql(expression) 1008 1009 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 1010 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 1011 if expression.args.get("is_end_exclusive"): 1012 return rename_func("RANGE")(self, expression) 1013 1014 return self.function_fallback_sql(expression) 1015 1016 def countif_sql(self, expression: exp.CountIf) -> str: 1017 if self.dialect.version >= Version("1.2"): 1018 return self.function_fallback_sql(expression) 1019 1020 # https://github.com/tobymao/sqlglot/pull/4749 1021 return count_if_to_sum(self, expression) 1022 1023 def bracket_sql(self, expression: exp.Bracket) -> str: 1024 if self.dialect.version >= Version("1.2"): 1025 return super().bracket_sql(expression) 1026 1027 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1028 this = expression.this 1029 if isinstance(this, exp.Array): 1030 this.replace(exp.paren(this)) 1031 1032 bracket = super().bracket_sql(expression) 1033 1034 if not expression.args.get("returns_list_for_maps"): 1035 if not this.type: 1036 from sqlglot.optimizer.annotate_types import annotate_types 1037 1038 this = annotate_types(this, dialect=self.dialect) 1039 1040 if this.is_type(exp.DataType.Type.MAP): 1041 bracket = f"({bracket})[1]" 1042 1043 return bracket 1044 1045 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1046 expression_sql = self.sql(expression, "expression") 1047 1048 func = expression.this 1049 if isinstance(func, exp.PERCENTILES): 1050 # Make the order key the first arg and slide the fraction to the right 1051 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1052 order_col = expression.find(exp.Ordered) 1053 if order_col: 1054 func.set("expression", func.this) 1055 func.set("this", order_col.this) 1056 1057 this = self.sql(expression, "this").rstrip(")") 1058 1059 return f"{this}{expression_sql})" 1060 1061 def length_sql(self, expression: exp.Length) -> str: 1062 arg = expression.this 1063 1064 # Dialects like BQ and Snowflake also accept binary values as args, so 1065 # DDB will attempt to infer the type or resort to case/when resolution 1066 if not expression.args.get("binary") or arg.is_string: 1067 return self.func("LENGTH", arg) 1068 1069 if not arg.type: 1070 from sqlglot.optimizer.annotate_types import annotate_types 1071 1072 arg = annotate_types(arg, dialect=self.dialect) 1073 1074 if arg.is_type(*exp.DataType.TEXT_TYPES): 1075 return self.func("LENGTH", arg) 1076 1077 # We need these casts to make duckdb's static type checker happy 1078 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1079 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1080 1081 case = ( 1082 exp.case(self.func("TYPEOF", arg)) 1083 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1084 .else_( 1085 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1086 ) # anonymous to break length_sql recursion 1087 ) 1088 1089 return self.sql(case) 1090 1091 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1092 this = expression.this 1093 key = expression.args.get("key") 1094 key_sql = key.name if isinstance(key, exp.Expression) else "" 1095 value_sql = self.sql(expression, "value") 1096 1097 kv_sql = f"{key_sql} := {value_sql}" 1098 1099 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1100 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1101 if isinstance(this, exp.Struct) and not this.expressions: 1102 return self.func("STRUCT_PACK", kv_sql) 1103 1104 return self.func("STRUCT_INSERT", this, kv_sql) 1105 1106 def unnest_sql(self, expression: exp.Unnest) -> str: 1107 explode_array = expression.args.get("explode_array") 1108 if explode_array: 1109 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1110 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1111 expression.expressions.append( 1112 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1113 ) 1114 1115 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1116 alias = expression.args.get("alias") 1117 if isinstance(alias, exp.TableAlias): 1118 expression.set("alias", None) 1119 if alias.columns: 1120 alias = exp.TableAlias(this=seq_get(alias.columns, 0)) 1121 1122 unnest_sql = super().unnest_sql(expression) 1123 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1124 return self.sql(select) 1125 1126 return super().unnest_sql(expression) 1127 1128 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1129 this = expression.this 1130 1131 if isinstance(this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1132 # DuckDB should render IGNORE NULLS only for the general-purpose 1133 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1134 return super().ignorenulls_sql(expression) 1135 1136 if isinstance(this, exp.First): 1137 this = exp.AnyValue(this=this.this) 1138 1139 if not isinstance(this, exp.AnyValue): 1140 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1141 1142 return self.sql(this) 1143 1144 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1145 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1146 # DuckDB should render RESPECT NULLS only for the general-purpose 1147 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1148 return super().respectnulls_sql(expression) 1149 1150 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1151 return self.sql(expression, "this") 1152 1153 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1154 this = self.sql(expression, "this") 1155 null_text = self.sql(expression, "null") 1156 1157 if null_text: 1158 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1159 1160 return self.func("ARRAY_TO_STRING", this, expression.expression) 1161 1162 @unsupported_args("position", "occurrence") 1163 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1164 group = expression.args.get("group") 1165 params = expression.args.get("parameters") 1166 1167 # Do not render group if there is no following argument, 1168 # and it's the default value for this dialect 1169 if ( 1170 not params 1171 and group 1172 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1173 ): 1174 group = None 1175 return self.func( 1176 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1177 ) 1178 1179 @unsupported_args("culture") 1180 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1181 fmt = expression.args.get("format") 1182 if fmt and fmt.is_int: 1183 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1184 1185 self.unsupported("Only integer formats are supported by NumberToStr") 1186 return self.function_fallback_sql(expression) 1187 1188 def autoincrementcolumnconstraint_sql(self, _) -> str: 1189 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1190 return "" 1191 1192 def aliases_sql(self, expression: exp.Aliases) -> str: 1193 this = expression.this 1194 if isinstance(this, exp.Posexplode): 1195 return self.posexplode_sql(this) 1196 1197 return super().aliases_sql(expression) 1198 1199 def posexplode_sql(self, expression: exp.Posexplode) -> str: 1200 this = expression.this 1201 parent = expression.parent 1202 1203 # The default Spark aliases are "pos" and "col", unless specified otherwise 1204 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 1205 1206 if isinstance(parent, exp.Aliases): 1207 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 1208 pos, col = parent.expressions 1209 elif isinstance(parent, exp.Table): 1210 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 1211 alias = parent.args.get("alias") 1212 if alias: 1213 pos, col = alias.columns or [pos, col] 1214 alias.pop() 1215 1216 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 1217 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 1218 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 1219 gen_subscripts = self.sql( 1220 exp.Alias( 1221 this=exp.Anonymous( 1222 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 1223 ) 1224 - exp.Literal.number(1), 1225 alias=pos, 1226 ) 1227 ) 1228 1229 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 1230 1231 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 1232 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 1233 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 1234 1235 return posexplode_sql 1236 1237 def addmonths_sql(self, expression: exp.AddMonths) -> str: 1238 this = expression.this 1239 1240 if not this.type: 1241 from sqlglot.optimizer.annotate_types import annotate_types 1242 1243 this = annotate_types(this, dialect=self.dialect) 1244 1245 if this.is_type(*exp.DataType.TEXT_TYPES): 1246 this = exp.Cast(this=this, to=exp.DataType(this=exp.DataType.Type.TIMESTAMP)) 1247 1248 func = self.func( 1249 "DATE_ADD", this, exp.Interval(this=expression.expression, unit=exp.var("MONTH")) 1250 ) 1251 1252 # DuckDB's DATE_ADD function returns TIMESTAMP/DATETIME by default, even when the input is DATE 1253 # To match for example Snowflake's ADD_MONTHS behavior (which preserves the input type) 1254 # We need to cast the result back to the original type when the input is DATE or TIMESTAMPTZ 1255 # Example: ADD_MONTHS('2023-01-31'::date, 1) should return DATE, not TIMESTAMP 1256 if this.is_type(exp.DataType.Type.DATE, exp.DataType.Type.TIMESTAMPTZ): 1257 return self.sql(exp.Cast(this=func, to=this.type)) 1258 1259 return self.sql(func)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHEREclause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
910 def lambda_sql( 911 self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True 912 ) -> str: 913 if expression.args.get("colon"): 914 prefix = "LAMBDA " 915 arrow_sep = ":" 916 wrap = False 917 else: 918 prefix = "" 919 920 lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap) 921 return f"{prefix}{lambda_sql}"
935 def strtodate_sql(self, expression: exp.StrToDate) -> str: 936 if expression.args.get("safe"): 937 formatted_time = self.format_time(expression) 938 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 939 return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
947 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 948 nano = expression.args.get("nano") 949 if nano is not None: 950 expression.set( 951 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 952 ) 953 954 return rename_func("MAKE_TIME")(self, expression)
956 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 957 sec = expression.args["sec"] 958 959 milli = expression.args.get("milli") 960 if milli is not None: 961 sec += milli.pop() / exp.Literal.number(1000.0) 962 963 nano = expression.args.get("nano") 964 if nano is not None: 965 sec += nano.pop() / exp.Literal.number(1000000000.0) 966 967 if milli or nano: 968 expression.set("sec", sec) 969 970 return rename_func("MAKE_TIMESTAMP")(self, expression)
972 def tablesample_sql( 973 self, 974 expression: exp.TableSample, 975 tablesample_keyword: t.Optional[str] = None, 976 ) -> str: 977 if not isinstance(expression.parent, exp.Select): 978 # This sample clause only applies to a single source, not the entire resulting relation 979 tablesample_keyword = "TABLESAMPLE" 980 981 if expression.args.get("size"): 982 method = expression.args.get("method") 983 if method and method.name.upper() != "RESERVOIR": 984 self.unsupported( 985 f"Sampling method {method} is not supported with a discrete sample count, " 986 "defaulting to reservoir sampling" 987 ) 988 expression.set("method", exp.var("RESERVOIR")) 989 990 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
997 def join_sql(self, expression: exp.Join) -> str: 998 if ( 999 expression.side == "LEFT" 1000 and not expression.args.get("on") 1001 and isinstance(expression.this, exp.Unnest) 1002 ): 1003 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 1004 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 1005 return super().join_sql(expression.on(exp.true())) 1006 1007 return super().join_sql(expression)
1023 def bracket_sql(self, expression: exp.Bracket) -> str: 1024 if self.dialect.version >= Version("1.2"): 1025 return super().bracket_sql(expression) 1026 1027 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1028 this = expression.this 1029 if isinstance(this, exp.Array): 1030 this.replace(exp.paren(this)) 1031 1032 bracket = super().bracket_sql(expression) 1033 1034 if not expression.args.get("returns_list_for_maps"): 1035 if not this.type: 1036 from sqlglot.optimizer.annotate_types import annotate_types 1037 1038 this = annotate_types(this, dialect=self.dialect) 1039 1040 if this.is_type(exp.DataType.Type.MAP): 1041 bracket = f"({bracket})[1]" 1042 1043 return bracket
1045 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1046 expression_sql = self.sql(expression, "expression") 1047 1048 func = expression.this 1049 if isinstance(func, exp.PERCENTILES): 1050 # Make the order key the first arg and slide the fraction to the right 1051 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1052 order_col = expression.find(exp.Ordered) 1053 if order_col: 1054 func.set("expression", func.this) 1055 func.set("this", order_col.this) 1056 1057 this = self.sql(expression, "this").rstrip(")") 1058 1059 return f"{this}{expression_sql})"
1061 def length_sql(self, expression: exp.Length) -> str: 1062 arg = expression.this 1063 1064 # Dialects like BQ and Snowflake also accept binary values as args, so 1065 # DDB will attempt to infer the type or resort to case/when resolution 1066 if not expression.args.get("binary") or arg.is_string: 1067 return self.func("LENGTH", arg) 1068 1069 if not arg.type: 1070 from sqlglot.optimizer.annotate_types import annotate_types 1071 1072 arg = annotate_types(arg, dialect=self.dialect) 1073 1074 if arg.is_type(*exp.DataType.TEXT_TYPES): 1075 return self.func("LENGTH", arg) 1076 1077 # We need these casts to make duckdb's static type checker happy 1078 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1079 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1080 1081 case = ( 1082 exp.case(self.func("TYPEOF", arg)) 1083 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1084 .else_( 1085 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1086 ) # anonymous to break length_sql recursion 1087 ) 1088 1089 return self.sql(case)
1091 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1092 this = expression.this 1093 key = expression.args.get("key") 1094 key_sql = key.name if isinstance(key, exp.Expression) else "" 1095 value_sql = self.sql(expression, "value") 1096 1097 kv_sql = f"{key_sql} := {value_sql}" 1098 1099 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1100 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1101 if isinstance(this, exp.Struct) and not this.expressions: 1102 return self.func("STRUCT_PACK", kv_sql) 1103 1104 return self.func("STRUCT_INSERT", this, kv_sql)
1106 def unnest_sql(self, expression: exp.Unnest) -> str: 1107 explode_array = expression.args.get("explode_array") 1108 if explode_array: 1109 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1110 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1111 expression.expressions.append( 1112 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1113 ) 1114 1115 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1116 alias = expression.args.get("alias") 1117 if isinstance(alias, exp.TableAlias): 1118 expression.set("alias", None) 1119 if alias.columns: 1120 alias = exp.TableAlias(this=seq_get(alias.columns, 0)) 1121 1122 unnest_sql = super().unnest_sql(expression) 1123 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1124 return self.sql(select) 1125 1126 return super().unnest_sql(expression)
1128 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1129 this = expression.this 1130 1131 if isinstance(this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1132 # DuckDB should render IGNORE NULLS only for the general-purpose 1133 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1134 return super().ignorenulls_sql(expression) 1135 1136 if isinstance(this, exp.First): 1137 this = exp.AnyValue(this=this.this) 1138 1139 if not isinstance(this, exp.AnyValue): 1140 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1141 1142 return self.sql(this)
1144 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1145 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1146 # DuckDB should render RESPECT NULLS only for the general-purpose 1147 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1148 return super().respectnulls_sql(expression) 1149 1150 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1151 return self.sql(expression, "this")
1153 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1154 this = self.sql(expression, "this") 1155 null_text = self.sql(expression, "null") 1156 1157 if null_text: 1158 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1159 1160 return self.func("ARRAY_TO_STRING", this, expression.expression)
1162 @unsupported_args("position", "occurrence") 1163 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1164 group = expression.args.get("group") 1165 params = expression.args.get("parameters") 1166 1167 # Do not render group if there is no following argument, 1168 # and it's the default value for this dialect 1169 if ( 1170 not params 1171 and group 1172 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1173 ): 1174 group = None 1175 return self.func( 1176 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1177 )
1179 @unsupported_args("culture") 1180 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1181 fmt = expression.args.get("format") 1182 if fmt and fmt.is_int: 1183 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1184 1185 self.unsupported("Only integer formats are supported by NumberToStr") 1186 return self.function_fallback_sql(expression)
1199 def posexplode_sql(self, expression: exp.Posexplode) -> str: 1200 this = expression.this 1201 parent = expression.parent 1202 1203 # The default Spark aliases are "pos" and "col", unless specified otherwise 1204 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 1205 1206 if isinstance(parent, exp.Aliases): 1207 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 1208 pos, col = parent.expressions 1209 elif isinstance(parent, exp.Table): 1210 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 1211 alias = parent.args.get("alias") 1212 if alias: 1213 pos, col = alias.columns or [pos, col] 1214 alias.pop() 1215 1216 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 1217 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 1218 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 1219 gen_subscripts = self.sql( 1220 exp.Alias( 1221 this=exp.Anonymous( 1222 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 1223 ) 1224 - exp.Literal.number(1), 1225 alias=pos, 1226 ) 1227 ) 1228 1229 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 1230 1231 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 1232 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 1233 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 1234 1235 return posexplode_sql
1237 def addmonths_sql(self, expression: exp.AddMonths) -> str: 1238 this = expression.this 1239 1240 if not this.type: 1241 from sqlglot.optimizer.annotate_types import annotate_types 1242 1243 this = annotate_types(this, dialect=self.dialect) 1244 1245 if this.is_type(*exp.DataType.TEXT_TYPES): 1246 this = exp.Cast(this=this, to=exp.DataType(this=exp.DataType.Type.TIMESTAMP)) 1247 1248 func = self.func( 1249 "DATE_ADD", this, exp.Interval(this=expression.expression, unit=exp.var("MONTH")) 1250 ) 1251 1252 # DuckDB's DATE_ADD function returns TIMESTAMP/DATETIME by default, even when the input is DATE 1253 # To match for example Snowflake's ADD_MONTHS behavior (which preserves the input type) 1254 # We need to cast the result back to the original type when the input is DATE or TIMESTAMPTZ 1255 # Example: ADD_MONTHS('2023-01-31'::date, 1) should return DATE, not TIMESTAMP 1256 if this.is_type(exp.DataType.Type.DATE, exp.DataType.Type.TIMESTAMPTZ): 1257 return self.sql(exp.Cast(this=func, to=this.type)) 1258 1259 return self.sql(func)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- TRY_SUPPORTED
- SUPPORTS_UESCAPE
- UNICODE_SUBSTITUTE
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ALTER_SET_TYPE
- SUPPORTS_BETWEEN_FLAGS
- MATCH_AGAINST_TABLE_PREFIX
- UNSUPPORTED_TYPES
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SAFE_JSON_PATH_KEY_RE
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- queryband_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- altersession_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- is_sql
- like_sql
- ilike_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- generateembedding_sql
- featuresattime_sql
- vectorsearch_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- revoke_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql
- getextract_sql
- datefromunixdate_sql
- space_sql
- buildproperty_sql
- refreshtriggerproperty_sql