sqlglot.parser
1from __future__ import annotations 2 3import itertools 4import logging 5import re 6import typing as t 7from collections import defaultdict 8 9from sqlglot import exp 10from sqlglot.errors import ErrorLevel, ParseError, TokenError, concat_messages, merge_errors 11from sqlglot.helper import apply_index_offset, ensure_list, seq_get 12from sqlglot.time import format_time 13from sqlglot.tokens import Token, Tokenizer, TokenType 14from sqlglot.trie import TrieResult, in_trie, new_trie 15 16if t.TYPE_CHECKING: 17 from sqlglot._typing import E, Lit 18 from sqlglot.dialects.dialect import Dialect, DialectType 19 20 T = t.TypeVar("T") 21 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 22 23logger = logging.getLogger("sqlglot") 24 25OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 26 27# Used to detect alphabetical characters and +/- in timestamp literals 28TIME_ZONE_RE: t.Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]") 29 30 31def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 32 if len(args) == 1 and args[0].is_star: 33 return exp.StarMap(this=args[0]) 34 35 keys = [] 36 values = [] 37 for i in range(0, len(args), 2): 38 keys.append(args[i]) 39 values.append(args[i + 1]) 40 41 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 42 43 44def build_like(args: t.List) -> exp.Escape | exp.Like: 45 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 46 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 47 48 49def binary_range_parser( 50 expr_type: t.Type[exp.Expression], reverse_args: bool = False 51) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 52 def _parse_binary_range( 53 self: Parser, this: t.Optional[exp.Expression] 54 ) -> t.Optional[exp.Expression]: 55 expression = self._parse_bitwise() 56 if reverse_args: 57 this, expression = expression, this 58 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 59 60 return _parse_binary_range 61 62 63def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 64 # Default argument order is base, expression 65 this = seq_get(args, 0) 66 expression = seq_get(args, 1) 67 68 if expression: 69 if not dialect.LOG_BASE_FIRST: 70 this, expression = expression, this 71 return exp.Log(this=this, expression=expression) 72 73 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 74 75 76def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 79 80 81def build_lower(args: t.List) -> exp.Lower | exp.Hex: 82 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 85 86 87def build_upper(args: t.List) -> exp.Upper | exp.Hex: 88 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 89 arg = seq_get(args, 0) 90 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 91 92 93def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 94 def _builder(args: t.List, dialect: Dialect) -> E: 95 expression = expr_type( 96 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 97 ) 98 if len(args) > 2 and expr_type is exp.JSONExtract: 99 expression.set("expressions", args[2:]) 100 101 return expression 102 103 return _builder 104 105 106def build_mod(args: t.List) -> exp.Mod: 107 this = seq_get(args, 0) 108 expression = seq_get(args, 1) 109 110 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 111 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 112 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 113 114 return exp.Mod(this=this, expression=expression) 115 116 117def build_pad(args: t.List, is_left: bool = True): 118 return exp.Pad( 119 this=seq_get(args, 0), 120 expression=seq_get(args, 1), 121 fill_pattern=seq_get(args, 2), 122 is_left=is_left, 123 ) 124 125 126def build_array_constructor( 127 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 128) -> exp.Expression: 129 array_exp = exp_class(expressions=args) 130 131 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 132 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 133 134 return array_exp 135 136 137def build_convert_timezone( 138 args: t.List, default_source_tz: t.Optional[str] = None 139) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 140 if len(args) == 2: 141 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 142 return exp.ConvertTimezone( 143 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 144 ) 145 146 return exp.ConvertTimezone.from_arg_list(args) 147 148 149def build_trim(args: t.List, is_left: bool = True): 150 return exp.Trim( 151 this=seq_get(args, 0), 152 expression=seq_get(args, 1), 153 position="LEADING" if is_left else "TRAILING", 154 ) 155 156 157def build_coalesce( 158 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 159) -> exp.Coalesce: 160 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 161 162 163def build_locate_strposition(args: t.List): 164 return exp.StrPosition( 165 this=seq_get(args, 1), 166 substr=seq_get(args, 0), 167 position=seq_get(args, 2), 168 ) 169 170 171class _Parser(type): 172 def __new__(cls, clsname, bases, attrs): 173 klass = super().__new__(cls, clsname, bases, attrs) 174 175 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 176 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 177 178 return klass 179 180 181class Parser(metaclass=_Parser): 182 """ 183 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 184 185 Args: 186 error_level: The desired error level. 187 Default: ErrorLevel.IMMEDIATE 188 error_message_context: The amount of context to capture from a query string when displaying 189 the error message (in number of characters). 190 Default: 100 191 max_errors: Maximum number of error messages to include in a raised ParseError. 192 This is only relevant if error_level is ErrorLevel.RAISE. 193 Default: 3 194 """ 195 196 FUNCTIONS: t.Dict[str, t.Callable] = { 197 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 198 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 199 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 200 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 201 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 202 ), 203 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 204 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 205 ), 206 "CHAR": lambda args: exp.Chr(expressions=args), 207 "CHR": lambda args: exp.Chr(expressions=args), 208 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 209 "CONCAT": lambda args, dialect: exp.Concat( 210 expressions=args, 211 safe=not dialect.STRICT_STRING_CONCAT, 212 coalesce=dialect.CONCAT_COALESCE, 213 ), 214 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 215 expressions=args, 216 safe=not dialect.STRICT_STRING_CONCAT, 217 coalesce=dialect.CONCAT_COALESCE, 218 ), 219 "CONVERT_TIMEZONE": build_convert_timezone, 220 "DATE_TO_DATE_STR": lambda args: exp.Cast( 221 this=seq_get(args, 0), 222 to=exp.DataType(this=exp.DataType.Type.TEXT), 223 ), 224 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 225 start=seq_get(args, 0), 226 end=seq_get(args, 1), 227 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 228 ), 229 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 230 "HEX": build_hex, 231 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 232 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 233 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 234 "LIKE": build_like, 235 "LOG": build_logarithm, 236 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 237 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 238 "LOWER": build_lower, 239 "LPAD": lambda args: build_pad(args), 240 "LEFTPAD": lambda args: build_pad(args), 241 "LTRIM": lambda args: build_trim(args), 242 "MOD": build_mod, 243 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 244 "RPAD": lambda args: build_pad(args, is_left=False), 245 "RTRIM": lambda args: build_trim(args, is_left=False), 246 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 247 if len(args) != 2 248 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 249 "STRPOS": exp.StrPosition.from_arg_list, 250 "CHARINDEX": lambda args: build_locate_strposition(args), 251 "INSTR": exp.StrPosition.from_arg_list, 252 "LOCATE": lambda args: build_locate_strposition(args), 253 "TIME_TO_TIME_STR": lambda args: exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 "TO_HEX": build_hex, 258 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 259 this=exp.Cast( 260 this=seq_get(args, 0), 261 to=exp.DataType(this=exp.DataType.Type.TEXT), 262 ), 263 start=exp.Literal.number(1), 264 length=exp.Literal.number(10), 265 ), 266 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 267 "UPPER": build_upper, 268 "VAR_MAP": build_var_map, 269 } 270 271 NO_PAREN_FUNCTIONS = { 272 TokenType.CURRENT_DATE: exp.CurrentDate, 273 TokenType.CURRENT_DATETIME: exp.CurrentDate, 274 TokenType.CURRENT_TIME: exp.CurrentTime, 275 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 276 TokenType.CURRENT_USER: exp.CurrentUser, 277 } 278 279 STRUCT_TYPE_TOKENS = { 280 TokenType.NESTED, 281 TokenType.OBJECT, 282 TokenType.STRUCT, 283 TokenType.UNION, 284 } 285 286 NESTED_TYPE_TOKENS = { 287 TokenType.ARRAY, 288 TokenType.LIST, 289 TokenType.LOWCARDINALITY, 290 TokenType.MAP, 291 TokenType.NULLABLE, 292 TokenType.RANGE, 293 *STRUCT_TYPE_TOKENS, 294 } 295 296 ENUM_TYPE_TOKENS = { 297 TokenType.DYNAMIC, 298 TokenType.ENUM, 299 TokenType.ENUM8, 300 TokenType.ENUM16, 301 } 302 303 AGGREGATE_TYPE_TOKENS = { 304 TokenType.AGGREGATEFUNCTION, 305 TokenType.SIMPLEAGGREGATEFUNCTION, 306 } 307 308 TYPE_TOKENS = { 309 TokenType.BIT, 310 TokenType.BOOLEAN, 311 TokenType.TINYINT, 312 TokenType.UTINYINT, 313 TokenType.SMALLINT, 314 TokenType.USMALLINT, 315 TokenType.INT, 316 TokenType.UINT, 317 TokenType.BIGINT, 318 TokenType.UBIGINT, 319 TokenType.INT128, 320 TokenType.UINT128, 321 TokenType.INT256, 322 TokenType.UINT256, 323 TokenType.MEDIUMINT, 324 TokenType.UMEDIUMINT, 325 TokenType.FIXEDSTRING, 326 TokenType.FLOAT, 327 TokenType.DOUBLE, 328 TokenType.UDOUBLE, 329 TokenType.CHAR, 330 TokenType.NCHAR, 331 TokenType.VARCHAR, 332 TokenType.NVARCHAR, 333 TokenType.BPCHAR, 334 TokenType.TEXT, 335 TokenType.MEDIUMTEXT, 336 TokenType.LONGTEXT, 337 TokenType.BLOB, 338 TokenType.MEDIUMBLOB, 339 TokenType.LONGBLOB, 340 TokenType.BINARY, 341 TokenType.VARBINARY, 342 TokenType.JSON, 343 TokenType.JSONB, 344 TokenType.INTERVAL, 345 TokenType.TINYBLOB, 346 TokenType.TINYTEXT, 347 TokenType.TIME, 348 TokenType.TIMETZ, 349 TokenType.TIMESTAMP, 350 TokenType.TIMESTAMP_S, 351 TokenType.TIMESTAMP_MS, 352 TokenType.TIMESTAMP_NS, 353 TokenType.TIMESTAMPTZ, 354 TokenType.TIMESTAMPLTZ, 355 TokenType.TIMESTAMPNTZ, 356 TokenType.DATETIME, 357 TokenType.DATETIME2, 358 TokenType.DATETIME64, 359 TokenType.SMALLDATETIME, 360 TokenType.DATE, 361 TokenType.DATE32, 362 TokenType.INT4RANGE, 363 TokenType.INT4MULTIRANGE, 364 TokenType.INT8RANGE, 365 TokenType.INT8MULTIRANGE, 366 TokenType.NUMRANGE, 367 TokenType.NUMMULTIRANGE, 368 TokenType.TSRANGE, 369 TokenType.TSMULTIRANGE, 370 TokenType.TSTZRANGE, 371 TokenType.TSTZMULTIRANGE, 372 TokenType.DATERANGE, 373 TokenType.DATEMULTIRANGE, 374 TokenType.DECIMAL, 375 TokenType.DECIMAL32, 376 TokenType.DECIMAL64, 377 TokenType.DECIMAL128, 378 TokenType.DECIMAL256, 379 TokenType.UDECIMAL, 380 TokenType.BIGDECIMAL, 381 TokenType.UUID, 382 TokenType.GEOGRAPHY, 383 TokenType.GEOGRAPHYPOINT, 384 TokenType.GEOMETRY, 385 TokenType.POINT, 386 TokenType.RING, 387 TokenType.LINESTRING, 388 TokenType.MULTILINESTRING, 389 TokenType.POLYGON, 390 TokenType.MULTIPOLYGON, 391 TokenType.HLLSKETCH, 392 TokenType.HSTORE, 393 TokenType.PSEUDO_TYPE, 394 TokenType.SUPER, 395 TokenType.SERIAL, 396 TokenType.SMALLSERIAL, 397 TokenType.BIGSERIAL, 398 TokenType.XML, 399 TokenType.YEAR, 400 TokenType.USERDEFINED, 401 TokenType.MONEY, 402 TokenType.SMALLMONEY, 403 TokenType.ROWVERSION, 404 TokenType.IMAGE, 405 TokenType.VARIANT, 406 TokenType.VECTOR, 407 TokenType.VOID, 408 TokenType.OBJECT, 409 TokenType.OBJECT_IDENTIFIER, 410 TokenType.INET, 411 TokenType.IPADDRESS, 412 TokenType.IPPREFIX, 413 TokenType.IPV4, 414 TokenType.IPV6, 415 TokenType.UNKNOWN, 416 TokenType.NOTHING, 417 TokenType.NULL, 418 TokenType.NAME, 419 TokenType.TDIGEST, 420 TokenType.DYNAMIC, 421 *ENUM_TYPE_TOKENS, 422 *NESTED_TYPE_TOKENS, 423 *AGGREGATE_TYPE_TOKENS, 424 } 425 426 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 427 TokenType.BIGINT: TokenType.UBIGINT, 428 TokenType.INT: TokenType.UINT, 429 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 430 TokenType.SMALLINT: TokenType.USMALLINT, 431 TokenType.TINYINT: TokenType.UTINYINT, 432 TokenType.DECIMAL: TokenType.UDECIMAL, 433 TokenType.DOUBLE: TokenType.UDOUBLE, 434 } 435 436 SUBQUERY_PREDICATES = { 437 TokenType.ANY: exp.Any, 438 TokenType.ALL: exp.All, 439 TokenType.EXISTS: exp.Exists, 440 TokenType.SOME: exp.Any, 441 } 442 443 RESERVED_TOKENS = { 444 *Tokenizer.SINGLE_TOKENS.values(), 445 TokenType.SELECT, 446 } - {TokenType.IDENTIFIER} 447 448 DB_CREATABLES = { 449 TokenType.DATABASE, 450 TokenType.DICTIONARY, 451 TokenType.FILE_FORMAT, 452 TokenType.MODEL, 453 TokenType.NAMESPACE, 454 TokenType.SCHEMA, 455 TokenType.SEMANTIC_VIEW, 456 TokenType.SEQUENCE, 457 TokenType.SINK, 458 TokenType.SOURCE, 459 TokenType.STAGE, 460 TokenType.STORAGE_INTEGRATION, 461 TokenType.STREAMLIT, 462 TokenType.TABLE, 463 TokenType.TAG, 464 TokenType.VIEW, 465 TokenType.WAREHOUSE, 466 } 467 468 CREATABLES = { 469 TokenType.COLUMN, 470 TokenType.CONSTRAINT, 471 TokenType.FOREIGN_KEY, 472 TokenType.FUNCTION, 473 TokenType.INDEX, 474 TokenType.PROCEDURE, 475 *DB_CREATABLES, 476 } 477 478 ALTERABLES = { 479 TokenType.INDEX, 480 TokenType.TABLE, 481 TokenType.VIEW, 482 TokenType.SESSION, 483 } 484 485 # Tokens that can represent identifiers 486 ID_VAR_TOKENS = { 487 TokenType.ALL, 488 TokenType.ATTACH, 489 TokenType.VAR, 490 TokenType.ANTI, 491 TokenType.APPLY, 492 TokenType.ASC, 493 TokenType.ASOF, 494 TokenType.AUTO_INCREMENT, 495 TokenType.BEGIN, 496 TokenType.BPCHAR, 497 TokenType.CACHE, 498 TokenType.CASE, 499 TokenType.COLLATE, 500 TokenType.COMMAND, 501 TokenType.COMMENT, 502 TokenType.COMMIT, 503 TokenType.CONSTRAINT, 504 TokenType.COPY, 505 TokenType.CUBE, 506 TokenType.CURRENT_SCHEMA, 507 TokenType.DEFAULT, 508 TokenType.DELETE, 509 TokenType.DESC, 510 TokenType.DESCRIBE, 511 TokenType.DETACH, 512 TokenType.DICTIONARY, 513 TokenType.DIV, 514 TokenType.END, 515 TokenType.EXECUTE, 516 TokenType.EXPORT, 517 TokenType.ESCAPE, 518 TokenType.FALSE, 519 TokenType.FIRST, 520 TokenType.FILTER, 521 TokenType.FINAL, 522 TokenType.FORMAT, 523 TokenType.FULL, 524 TokenType.GET, 525 TokenType.IDENTIFIER, 526 TokenType.IS, 527 TokenType.ISNULL, 528 TokenType.INTERVAL, 529 TokenType.KEEP, 530 TokenType.KILL, 531 TokenType.LEFT, 532 TokenType.LIMIT, 533 TokenType.LOAD, 534 TokenType.MERGE, 535 TokenType.NATURAL, 536 TokenType.NEXT, 537 TokenType.OFFSET, 538 TokenType.OPERATOR, 539 TokenType.ORDINALITY, 540 TokenType.OVERLAPS, 541 TokenType.OVERWRITE, 542 TokenType.PARTITION, 543 TokenType.PERCENT, 544 TokenType.PIVOT, 545 TokenType.PRAGMA, 546 TokenType.PUT, 547 TokenType.RANGE, 548 TokenType.RECURSIVE, 549 TokenType.REFERENCES, 550 TokenType.REFRESH, 551 TokenType.RENAME, 552 TokenType.REPLACE, 553 TokenType.RIGHT, 554 TokenType.ROLLUP, 555 TokenType.ROW, 556 TokenType.ROWS, 557 TokenType.SEMI, 558 TokenType.SET, 559 TokenType.SETTINGS, 560 TokenType.SHOW, 561 TokenType.TEMPORARY, 562 TokenType.TOP, 563 TokenType.TRUE, 564 TokenType.TRUNCATE, 565 TokenType.UNIQUE, 566 TokenType.UNNEST, 567 TokenType.UNPIVOT, 568 TokenType.UPDATE, 569 TokenType.USE, 570 TokenType.VOLATILE, 571 TokenType.WINDOW, 572 *ALTERABLES, 573 *CREATABLES, 574 *SUBQUERY_PREDICATES, 575 *TYPE_TOKENS, 576 *NO_PAREN_FUNCTIONS, 577 } 578 ID_VAR_TOKENS.remove(TokenType.UNION) 579 580 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 581 TokenType.ANTI, 582 TokenType.ASOF, 583 TokenType.FULL, 584 TokenType.LEFT, 585 TokenType.LOCK, 586 TokenType.NATURAL, 587 TokenType.RIGHT, 588 TokenType.SEMI, 589 TokenType.WINDOW, 590 } 591 592 ALIAS_TOKENS = ID_VAR_TOKENS 593 594 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 595 596 ARRAY_CONSTRUCTORS = { 597 "ARRAY": exp.Array, 598 "LIST": exp.List, 599 } 600 601 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 602 603 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 604 605 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 606 607 FUNC_TOKENS = { 608 TokenType.COLLATE, 609 TokenType.COMMAND, 610 TokenType.CURRENT_DATE, 611 TokenType.CURRENT_DATETIME, 612 TokenType.CURRENT_SCHEMA, 613 TokenType.CURRENT_TIMESTAMP, 614 TokenType.CURRENT_TIME, 615 TokenType.CURRENT_USER, 616 TokenType.FILTER, 617 TokenType.FIRST, 618 TokenType.FORMAT, 619 TokenType.GET, 620 TokenType.GLOB, 621 TokenType.IDENTIFIER, 622 TokenType.INDEX, 623 TokenType.ISNULL, 624 TokenType.ILIKE, 625 TokenType.INSERT, 626 TokenType.LIKE, 627 TokenType.MERGE, 628 TokenType.NEXT, 629 TokenType.OFFSET, 630 TokenType.PRIMARY_KEY, 631 TokenType.RANGE, 632 TokenType.REPLACE, 633 TokenType.RLIKE, 634 TokenType.ROW, 635 TokenType.UNNEST, 636 TokenType.VAR, 637 TokenType.LEFT, 638 TokenType.RIGHT, 639 TokenType.SEQUENCE, 640 TokenType.DATE, 641 TokenType.DATETIME, 642 TokenType.TABLE, 643 TokenType.TIMESTAMP, 644 TokenType.TIMESTAMPTZ, 645 TokenType.TRUNCATE, 646 TokenType.UTC_DATE, 647 TokenType.UTC_TIME, 648 TokenType.UTC_TIMESTAMP, 649 TokenType.WINDOW, 650 TokenType.XOR, 651 *TYPE_TOKENS, 652 *SUBQUERY_PREDICATES, 653 } 654 655 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 656 TokenType.AND: exp.And, 657 } 658 659 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 660 TokenType.COLON_EQ: exp.PropertyEQ, 661 } 662 663 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 664 TokenType.OR: exp.Or, 665 } 666 667 EQUALITY = { 668 TokenType.EQ: exp.EQ, 669 TokenType.NEQ: exp.NEQ, 670 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 671 } 672 673 COMPARISON = { 674 TokenType.GT: exp.GT, 675 TokenType.GTE: exp.GTE, 676 TokenType.LT: exp.LT, 677 TokenType.LTE: exp.LTE, 678 } 679 680 BITWISE = { 681 TokenType.AMP: exp.BitwiseAnd, 682 TokenType.CARET: exp.BitwiseXor, 683 TokenType.PIPE: exp.BitwiseOr, 684 } 685 686 TERM = { 687 TokenType.DASH: exp.Sub, 688 TokenType.PLUS: exp.Add, 689 TokenType.MOD: exp.Mod, 690 TokenType.COLLATE: exp.Collate, 691 } 692 693 FACTOR = { 694 TokenType.DIV: exp.IntDiv, 695 TokenType.LR_ARROW: exp.Distance, 696 TokenType.SLASH: exp.Div, 697 TokenType.STAR: exp.Mul, 698 } 699 700 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 701 702 TIMES = { 703 TokenType.TIME, 704 TokenType.TIMETZ, 705 } 706 707 TIMESTAMPS = { 708 TokenType.TIMESTAMP, 709 TokenType.TIMESTAMPNTZ, 710 TokenType.TIMESTAMPTZ, 711 TokenType.TIMESTAMPLTZ, 712 *TIMES, 713 } 714 715 SET_OPERATIONS = { 716 TokenType.UNION, 717 TokenType.INTERSECT, 718 TokenType.EXCEPT, 719 } 720 721 JOIN_METHODS = { 722 TokenType.ASOF, 723 TokenType.NATURAL, 724 TokenType.POSITIONAL, 725 } 726 727 JOIN_SIDES = { 728 TokenType.LEFT, 729 TokenType.RIGHT, 730 TokenType.FULL, 731 } 732 733 JOIN_KINDS = { 734 TokenType.ANTI, 735 TokenType.CROSS, 736 TokenType.INNER, 737 TokenType.OUTER, 738 TokenType.SEMI, 739 TokenType.STRAIGHT_JOIN, 740 } 741 742 JOIN_HINTS: t.Set[str] = set() 743 744 LAMBDAS = { 745 TokenType.ARROW: lambda self, expressions: self.expression( 746 exp.Lambda, 747 this=self._replace_lambda( 748 self._parse_assignment(), 749 expressions, 750 ), 751 expressions=expressions, 752 ), 753 TokenType.FARROW: lambda self, expressions: self.expression( 754 exp.Kwarg, 755 this=exp.var(expressions[0].name), 756 expression=self._parse_assignment(), 757 ), 758 } 759 760 COLUMN_OPERATORS = { 761 TokenType.DOT: None, 762 TokenType.DOTCOLON: lambda self, this, to: self.expression( 763 exp.JSONCast, 764 this=this, 765 to=to, 766 ), 767 TokenType.DCOLON: lambda self, this, to: self.build_cast( 768 strict=self.STRICT_CAST, this=this, to=to 769 ), 770 TokenType.ARROW: lambda self, this, path: self.expression( 771 exp.JSONExtract, 772 this=this, 773 expression=self.dialect.to_json_path(path), 774 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 775 ), 776 TokenType.DARROW: lambda self, this, path: self.expression( 777 exp.JSONExtractScalar, 778 this=this, 779 expression=self.dialect.to_json_path(path), 780 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 781 ), 782 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 783 exp.JSONBExtract, 784 this=this, 785 expression=path, 786 ), 787 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 788 exp.JSONBExtractScalar, 789 this=this, 790 expression=path, 791 ), 792 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 793 exp.JSONBContains, 794 this=this, 795 expression=key, 796 ), 797 } 798 799 CAST_COLUMN_OPERATORS = { 800 TokenType.DOTCOLON, 801 TokenType.DCOLON, 802 } 803 804 EXPRESSION_PARSERS = { 805 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 806 exp.Column: lambda self: self._parse_column(), 807 exp.Condition: lambda self: self._parse_assignment(), 808 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 809 exp.Expression: lambda self: self._parse_expression(), 810 exp.From: lambda self: self._parse_from(joins=True), 811 exp.Group: lambda self: self._parse_group(), 812 exp.Having: lambda self: self._parse_having(), 813 exp.Hint: lambda self: self._parse_hint_body(), 814 exp.Identifier: lambda self: self._parse_id_var(), 815 exp.Join: lambda self: self._parse_join(), 816 exp.Lambda: lambda self: self._parse_lambda(), 817 exp.Lateral: lambda self: self._parse_lateral(), 818 exp.Limit: lambda self: self._parse_limit(), 819 exp.Offset: lambda self: self._parse_offset(), 820 exp.Order: lambda self: self._parse_order(), 821 exp.Ordered: lambda self: self._parse_ordered(), 822 exp.Properties: lambda self: self._parse_properties(), 823 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 824 exp.Qualify: lambda self: self._parse_qualify(), 825 exp.Returning: lambda self: self._parse_returning(), 826 exp.Select: lambda self: self._parse_select(), 827 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 828 exp.Table: lambda self: self._parse_table_parts(), 829 exp.TableAlias: lambda self: self._parse_table_alias(), 830 exp.Tuple: lambda self: self._parse_value(values=False), 831 exp.Whens: lambda self: self._parse_when_matched(), 832 exp.Where: lambda self: self._parse_where(), 833 exp.Window: lambda self: self._parse_named_window(), 834 exp.With: lambda self: self._parse_with(), 835 "JOIN_TYPE": lambda self: self._parse_join_parts(), 836 } 837 838 STATEMENT_PARSERS = { 839 TokenType.ALTER: lambda self: self._parse_alter(), 840 TokenType.ANALYZE: lambda self: self._parse_analyze(), 841 TokenType.BEGIN: lambda self: self._parse_transaction(), 842 TokenType.CACHE: lambda self: self._parse_cache(), 843 TokenType.COMMENT: lambda self: self._parse_comment(), 844 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 845 TokenType.COPY: lambda self: self._parse_copy(), 846 TokenType.CREATE: lambda self: self._parse_create(), 847 TokenType.DELETE: lambda self: self._parse_delete(), 848 TokenType.DESC: lambda self: self._parse_describe(), 849 TokenType.DESCRIBE: lambda self: self._parse_describe(), 850 TokenType.DROP: lambda self: self._parse_drop(), 851 TokenType.GRANT: lambda self: self._parse_grant(), 852 TokenType.REVOKE: lambda self: self._parse_revoke(), 853 TokenType.INSERT: lambda self: self._parse_insert(), 854 TokenType.KILL: lambda self: self._parse_kill(), 855 TokenType.LOAD: lambda self: self._parse_load(), 856 TokenType.MERGE: lambda self: self._parse_merge(), 857 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 858 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 859 TokenType.REFRESH: lambda self: self._parse_refresh(), 860 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 861 TokenType.SET: lambda self: self._parse_set(), 862 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 863 TokenType.UNCACHE: lambda self: self._parse_uncache(), 864 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 865 TokenType.UPDATE: lambda self: self._parse_update(), 866 TokenType.USE: lambda self: self._parse_use(), 867 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 868 } 869 870 UNARY_PARSERS = { 871 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 872 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 873 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 874 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 875 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 876 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 877 } 878 879 STRING_PARSERS = { 880 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 881 exp.RawString, this=token.text 882 ), 883 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 884 exp.National, this=token.text 885 ), 886 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 887 TokenType.STRING: lambda self, token: self.expression( 888 exp.Literal, this=token.text, is_string=True 889 ), 890 TokenType.UNICODE_STRING: lambda self, token: self.expression( 891 exp.UnicodeString, 892 this=token.text, 893 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 894 ), 895 } 896 897 NUMERIC_PARSERS = { 898 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 899 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 900 TokenType.HEX_STRING: lambda self, token: self.expression( 901 exp.HexString, 902 this=token.text, 903 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 904 ), 905 TokenType.NUMBER: lambda self, token: self.expression( 906 exp.Literal, this=token.text, is_string=False 907 ), 908 } 909 910 PRIMARY_PARSERS = { 911 **STRING_PARSERS, 912 **NUMERIC_PARSERS, 913 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 914 TokenType.NULL: lambda self, _: self.expression(exp.Null), 915 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 916 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 917 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 918 TokenType.STAR: lambda self, _: self._parse_star_ops(), 919 } 920 921 PLACEHOLDER_PARSERS = { 922 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 923 TokenType.PARAMETER: lambda self: self._parse_parameter(), 924 TokenType.COLON: lambda self: ( 925 self.expression(exp.Placeholder, this=self._prev.text) 926 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 927 else None 928 ), 929 } 930 931 RANGE_PARSERS = { 932 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 933 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 934 TokenType.GLOB: binary_range_parser(exp.Glob), 935 TokenType.ILIKE: binary_range_parser(exp.ILike), 936 TokenType.IN: lambda self, this: self._parse_in(this), 937 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 938 TokenType.IS: lambda self, this: self._parse_is(this), 939 TokenType.LIKE: binary_range_parser(exp.Like), 940 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 941 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 942 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 943 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 944 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 945 } 946 947 PIPE_SYNTAX_TRANSFORM_PARSERS = { 948 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 949 "AS": lambda self, query: self._build_pipe_cte( 950 query, [exp.Star()], self._parse_table_alias() 951 ), 952 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 953 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 954 "ORDER BY": lambda self, query: query.order_by( 955 self._parse_order(), append=False, copy=False 956 ), 957 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 958 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 959 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 960 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 961 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 962 } 963 964 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 965 "ALLOWED_VALUES": lambda self: self.expression( 966 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 967 ), 968 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 969 "AUTO": lambda self: self._parse_auto_property(), 970 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 971 "BACKUP": lambda self: self.expression( 972 exp.BackupProperty, this=self._parse_var(any_token=True) 973 ), 974 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 975 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 976 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 977 "CHECKSUM": lambda self: self._parse_checksum(), 978 "CLUSTER BY": lambda self: self._parse_cluster(), 979 "CLUSTERED": lambda self: self._parse_clustered_by(), 980 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 981 exp.CollateProperty, **kwargs 982 ), 983 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 984 "CONTAINS": lambda self: self._parse_contains_property(), 985 "COPY": lambda self: self._parse_copy_property(), 986 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 987 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 988 "DEFINER": lambda self: self._parse_definer(), 989 "DETERMINISTIC": lambda self: self.expression( 990 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 991 ), 992 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 993 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 994 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 995 "DISTKEY": lambda self: self._parse_distkey(), 996 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 997 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 998 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 999 "ENVIRONMENT": lambda self: self.expression( 1000 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 1001 ), 1002 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1003 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1004 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1005 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1006 "FREESPACE": lambda self: self._parse_freespace(), 1007 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1008 "HEAP": lambda self: self.expression(exp.HeapProperty), 1009 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1010 "IMMUTABLE": lambda self: self.expression( 1011 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1012 ), 1013 "INHERITS": lambda self: self.expression( 1014 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1015 ), 1016 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1017 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1018 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1019 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1020 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1021 "LIKE": lambda self: self._parse_create_like(), 1022 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1023 "LOCK": lambda self: self._parse_locking(), 1024 "LOCKING": lambda self: self._parse_locking(), 1025 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1026 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1027 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1028 "MODIFIES": lambda self: self._parse_modifies_property(), 1029 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1030 "NO": lambda self: self._parse_no_property(), 1031 "ON": lambda self: self._parse_on_property(), 1032 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1033 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1034 "PARTITION": lambda self: self._parse_partitioned_of(), 1035 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1036 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1037 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1038 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1039 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1040 "READS": lambda self: self._parse_reads_property(), 1041 "REMOTE": lambda self: self._parse_remote_with_connection(), 1042 "RETURNS": lambda self: self._parse_returns(), 1043 "STRICT": lambda self: self.expression(exp.StrictProperty), 1044 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1045 "ROW": lambda self: self._parse_row(), 1046 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1047 "SAMPLE": lambda self: self.expression( 1048 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1049 ), 1050 "SECURE": lambda self: self.expression(exp.SecureProperty), 1051 "SECURITY": lambda self: self._parse_security(), 1052 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1053 "SETTINGS": lambda self: self._parse_settings_property(), 1054 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1055 "SORTKEY": lambda self: self._parse_sortkey(), 1056 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1057 "STABLE": lambda self: self.expression( 1058 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1059 ), 1060 "STORED": lambda self: self._parse_stored(), 1061 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1062 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1063 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1064 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1065 "TO": lambda self: self._parse_to_table(), 1066 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1067 "TRANSFORM": lambda self: self.expression( 1068 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1069 ), 1070 "TTL": lambda self: self._parse_ttl(), 1071 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1072 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1073 "VOLATILE": lambda self: self._parse_volatile_property(), 1074 "WITH": lambda self: self._parse_with_property(), 1075 } 1076 1077 CONSTRAINT_PARSERS = { 1078 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1079 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1080 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1081 "CHARACTER SET": lambda self: self.expression( 1082 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1083 ), 1084 "CHECK": lambda self: self.expression( 1085 exp.CheckColumnConstraint, 1086 this=self._parse_wrapped(self._parse_assignment), 1087 enforced=self._match_text_seq("ENFORCED"), 1088 ), 1089 "COLLATE": lambda self: self.expression( 1090 exp.CollateColumnConstraint, 1091 this=self._parse_identifier() or self._parse_column(), 1092 ), 1093 "COMMENT": lambda self: self.expression( 1094 exp.CommentColumnConstraint, this=self._parse_string() 1095 ), 1096 "COMPRESS": lambda self: self._parse_compress(), 1097 "CLUSTERED": lambda self: self.expression( 1098 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1099 ), 1100 "NONCLUSTERED": lambda self: self.expression( 1101 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1102 ), 1103 "DEFAULT": lambda self: self.expression( 1104 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1105 ), 1106 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1107 "EPHEMERAL": lambda self: self.expression( 1108 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1109 ), 1110 "EXCLUDE": lambda self: self.expression( 1111 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1112 ), 1113 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1114 "FORMAT": lambda self: self.expression( 1115 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1116 ), 1117 "GENERATED": lambda self: self._parse_generated_as_identity(), 1118 "IDENTITY": lambda self: self._parse_auto_increment(), 1119 "INLINE": lambda self: self._parse_inline(), 1120 "LIKE": lambda self: self._parse_create_like(), 1121 "NOT": lambda self: self._parse_not_constraint(), 1122 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1123 "ON": lambda self: ( 1124 self._match(TokenType.UPDATE) 1125 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1126 ) 1127 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1128 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1129 "PERIOD": lambda self: self._parse_period_for_system_time(), 1130 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1131 "REFERENCES": lambda self: self._parse_references(match=False), 1132 "TITLE": lambda self: self.expression( 1133 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1134 ), 1135 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1136 "UNIQUE": lambda self: self._parse_unique(), 1137 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1138 "WATERMARK": lambda self: self.expression( 1139 exp.WatermarkColumnConstraint, 1140 this=self._match(TokenType.FOR) and self._parse_column(), 1141 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1142 ), 1143 "WITH": lambda self: self.expression( 1144 exp.Properties, expressions=self._parse_wrapped_properties() 1145 ), 1146 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1147 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1148 } 1149 1150 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1151 if not self._match(TokenType.L_PAREN, advance=False): 1152 # Partitioning by bucket or truncate follows the syntax: 1153 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1154 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1155 self._retreat(self._index - 1) 1156 return None 1157 1158 klass = ( 1159 exp.PartitionedByBucket 1160 if self._prev.text.upper() == "BUCKET" 1161 else exp.PartitionByTruncate 1162 ) 1163 1164 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1165 this, expression = seq_get(args, 0), seq_get(args, 1) 1166 1167 if isinstance(this, exp.Literal): 1168 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1169 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1170 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1171 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1172 # 1173 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1174 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1175 this, expression = expression, this 1176 1177 return self.expression(klass, this=this, expression=expression) 1178 1179 ALTER_PARSERS = { 1180 "ADD": lambda self: self._parse_alter_table_add(), 1181 "AS": lambda self: self._parse_select(), 1182 "ALTER": lambda self: self._parse_alter_table_alter(), 1183 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1184 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1185 "DROP": lambda self: self._parse_alter_table_drop(), 1186 "RENAME": lambda self: self._parse_alter_table_rename(), 1187 "SET": lambda self: self._parse_alter_table_set(), 1188 "SWAP": lambda self: self.expression( 1189 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1190 ), 1191 } 1192 1193 ALTER_ALTER_PARSERS = { 1194 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1195 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1196 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1197 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1198 } 1199 1200 SCHEMA_UNNAMED_CONSTRAINTS = { 1201 "CHECK", 1202 "EXCLUDE", 1203 "FOREIGN KEY", 1204 "LIKE", 1205 "PERIOD", 1206 "PRIMARY KEY", 1207 "UNIQUE", 1208 "WATERMARK", 1209 "BUCKET", 1210 "TRUNCATE", 1211 } 1212 1213 NO_PAREN_FUNCTION_PARSERS = { 1214 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1215 "CASE": lambda self: self._parse_case(), 1216 "CONNECT_BY_ROOT": lambda self: self.expression( 1217 exp.ConnectByRoot, this=self._parse_column() 1218 ), 1219 "IF": lambda self: self._parse_if(), 1220 } 1221 1222 INVALID_FUNC_NAME_TOKENS = { 1223 TokenType.IDENTIFIER, 1224 TokenType.STRING, 1225 } 1226 1227 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1228 1229 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1230 1231 FUNCTION_PARSERS = { 1232 **{ 1233 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1234 }, 1235 **{ 1236 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1237 }, 1238 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1239 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1240 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1241 "DECODE": lambda self: self._parse_decode(), 1242 "EXTRACT": lambda self: self._parse_extract(), 1243 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1244 "GAP_FILL": lambda self: self._parse_gap_fill(), 1245 "JSON_OBJECT": lambda self: self._parse_json_object(), 1246 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1247 "JSON_TABLE": lambda self: self._parse_json_table(), 1248 "MATCH": lambda self: self._parse_match_against(), 1249 "NORMALIZE": lambda self: self._parse_normalize(), 1250 "OPENJSON": lambda self: self._parse_open_json(), 1251 "OVERLAY": lambda self: self._parse_overlay(), 1252 "POSITION": lambda self: self._parse_position(), 1253 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1254 "STRING_AGG": lambda self: self._parse_string_agg(), 1255 "SUBSTRING": lambda self: self._parse_substring(), 1256 "TRIM": lambda self: self._parse_trim(), 1257 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1258 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1259 "XMLELEMENT": lambda self: self.expression( 1260 exp.XMLElement, 1261 this=self._match_text_seq("NAME") and self._parse_id_var(), 1262 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1263 ), 1264 "XMLTABLE": lambda self: self._parse_xml_table(), 1265 } 1266 1267 QUERY_MODIFIER_PARSERS = { 1268 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1269 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1270 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1271 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1272 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1273 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1274 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1275 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1276 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1277 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1278 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1279 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1280 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1281 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1282 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1283 TokenType.CLUSTER_BY: lambda self: ( 1284 "cluster", 1285 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1286 ), 1287 TokenType.DISTRIBUTE_BY: lambda self: ( 1288 "distribute", 1289 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1290 ), 1291 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1292 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1293 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1294 } 1295 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1296 1297 SET_PARSERS = { 1298 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1299 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1300 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1301 "TRANSACTION": lambda self: self._parse_set_transaction(), 1302 } 1303 1304 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1305 1306 TYPE_LITERAL_PARSERS = { 1307 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1308 } 1309 1310 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1311 1312 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1313 1314 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1315 1316 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1317 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1318 "ISOLATION": ( 1319 ("LEVEL", "REPEATABLE", "READ"), 1320 ("LEVEL", "READ", "COMMITTED"), 1321 ("LEVEL", "READ", "UNCOMITTED"), 1322 ("LEVEL", "SERIALIZABLE"), 1323 ), 1324 "READ": ("WRITE", "ONLY"), 1325 } 1326 1327 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1328 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1329 ) 1330 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1331 1332 CREATE_SEQUENCE: OPTIONS_TYPE = { 1333 "SCALE": ("EXTEND", "NOEXTEND"), 1334 "SHARD": ("EXTEND", "NOEXTEND"), 1335 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1336 **dict.fromkeys( 1337 ( 1338 "SESSION", 1339 "GLOBAL", 1340 "KEEP", 1341 "NOKEEP", 1342 "ORDER", 1343 "NOORDER", 1344 "NOCACHE", 1345 "CYCLE", 1346 "NOCYCLE", 1347 "NOMINVALUE", 1348 "NOMAXVALUE", 1349 "NOSCALE", 1350 "NOSHARD", 1351 ), 1352 tuple(), 1353 ), 1354 } 1355 1356 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1357 1358 USABLES: OPTIONS_TYPE = dict.fromkeys( 1359 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1360 ) 1361 1362 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1363 1364 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1365 "TYPE": ("EVOLUTION",), 1366 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1367 } 1368 1369 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1370 1371 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1372 1373 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1374 "NOT": ("ENFORCED",), 1375 "MATCH": ( 1376 "FULL", 1377 "PARTIAL", 1378 "SIMPLE", 1379 ), 1380 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1381 "USING": ( 1382 "BTREE", 1383 "HASH", 1384 ), 1385 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1386 } 1387 1388 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1389 "NO": ("OTHERS",), 1390 "CURRENT": ("ROW",), 1391 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1392 } 1393 1394 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1395 1396 CLONE_KEYWORDS = {"CLONE", "COPY"} 1397 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1398 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1399 1400 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1401 1402 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1403 1404 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1405 1406 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1407 1408 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1409 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1410 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1411 1412 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1413 1414 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1415 1416 ADD_CONSTRAINT_TOKENS = { 1417 TokenType.CONSTRAINT, 1418 TokenType.FOREIGN_KEY, 1419 TokenType.INDEX, 1420 TokenType.KEY, 1421 TokenType.PRIMARY_KEY, 1422 TokenType.UNIQUE, 1423 } 1424 1425 DISTINCT_TOKENS = {TokenType.DISTINCT} 1426 1427 NULL_TOKENS = {TokenType.NULL} 1428 1429 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1430 1431 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1432 1433 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1434 1435 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1436 1437 ODBC_DATETIME_LITERALS = { 1438 "d": exp.Date, 1439 "t": exp.Time, 1440 "ts": exp.Timestamp, 1441 } 1442 1443 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1444 1445 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1446 1447 # The style options for the DESCRIBE statement 1448 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1449 1450 # The style options for the ANALYZE statement 1451 ANALYZE_STYLES = { 1452 "BUFFER_USAGE_LIMIT", 1453 "FULL", 1454 "LOCAL", 1455 "NO_WRITE_TO_BINLOG", 1456 "SAMPLE", 1457 "SKIP_LOCKED", 1458 "VERBOSE", 1459 } 1460 1461 ANALYZE_EXPRESSION_PARSERS = { 1462 "ALL": lambda self: self._parse_analyze_columns(), 1463 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1464 "DELETE": lambda self: self._parse_analyze_delete(), 1465 "DROP": lambda self: self._parse_analyze_histogram(), 1466 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1467 "LIST": lambda self: self._parse_analyze_list(), 1468 "PREDICATE": lambda self: self._parse_analyze_columns(), 1469 "UPDATE": lambda self: self._parse_analyze_histogram(), 1470 "VALIDATE": lambda self: self._parse_analyze_validate(), 1471 } 1472 1473 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1474 1475 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1476 1477 OPERATION_MODIFIERS: t.Set[str] = set() 1478 1479 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1480 1481 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1482 1483 STRICT_CAST = True 1484 1485 PREFIXED_PIVOT_COLUMNS = False 1486 IDENTIFY_PIVOT_STRINGS = False 1487 1488 LOG_DEFAULTS_TO_LN = False 1489 1490 # Whether the table sample clause expects CSV syntax 1491 TABLESAMPLE_CSV = False 1492 1493 # The default method used for table sampling 1494 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1495 1496 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1497 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1498 1499 # Whether the TRIM function expects the characters to trim as its first argument 1500 TRIM_PATTERN_FIRST = False 1501 1502 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1503 STRING_ALIASES = False 1504 1505 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1506 MODIFIERS_ATTACHED_TO_SET_OP = True 1507 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1508 1509 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1510 NO_PAREN_IF_COMMANDS = True 1511 1512 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1513 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1514 1515 # Whether the `:` operator is used to extract a value from a VARIANT column 1516 COLON_IS_VARIANT_EXTRACT = False 1517 1518 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1519 # If this is True and '(' is not found, the keyword will be treated as an identifier 1520 VALUES_FOLLOWED_BY_PAREN = True 1521 1522 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1523 SUPPORTS_IMPLICIT_UNNEST = False 1524 1525 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1526 INTERVAL_SPANS = True 1527 1528 # Whether a PARTITION clause can follow a table reference 1529 SUPPORTS_PARTITION_SELECTION = False 1530 1531 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1532 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1533 1534 # Whether the 'AS' keyword is optional in the CTE definition syntax 1535 OPTIONAL_ALIAS_TOKEN_CTE = True 1536 1537 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1538 ALTER_RENAME_REQUIRES_COLUMN = True 1539 1540 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1541 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1542 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1543 # as BigQuery, where all joins have the same precedence. 1544 JOINS_HAVE_EQUAL_PRECEDENCE = False 1545 1546 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1547 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1548 1549 # Whether map literals support arbitrary expressions as keys. 1550 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1551 # When False, keys are typically restricted to identifiers. 1552 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1553 1554 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1555 # is true for Snowflake but not for BigQuery which can also process strings 1556 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1557 1558 __slots__ = ( 1559 "error_level", 1560 "error_message_context", 1561 "max_errors", 1562 "dialect", 1563 "sql", 1564 "errors", 1565 "_tokens", 1566 "_index", 1567 "_curr", 1568 "_next", 1569 "_prev", 1570 "_prev_comments", 1571 "_pipe_cte_counter", 1572 ) 1573 1574 # Autofilled 1575 SHOW_TRIE: t.Dict = {} 1576 SET_TRIE: t.Dict = {} 1577 1578 def __init__( 1579 self, 1580 error_level: t.Optional[ErrorLevel] = None, 1581 error_message_context: int = 100, 1582 max_errors: int = 3, 1583 dialect: DialectType = None, 1584 ): 1585 from sqlglot.dialects import Dialect 1586 1587 self.error_level = error_level or ErrorLevel.IMMEDIATE 1588 self.error_message_context = error_message_context 1589 self.max_errors = max_errors 1590 self.dialect = Dialect.get_or_raise(dialect) 1591 self.reset() 1592 1593 def reset(self): 1594 self.sql = "" 1595 self.errors = [] 1596 self._tokens = [] 1597 self._index = 0 1598 self._curr = None 1599 self._next = None 1600 self._prev = None 1601 self._prev_comments = None 1602 self._pipe_cte_counter = 0 1603 1604 def parse( 1605 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1606 ) -> t.List[t.Optional[exp.Expression]]: 1607 """ 1608 Parses a list of tokens and returns a list of syntax trees, one tree 1609 per parsed SQL statement. 1610 1611 Args: 1612 raw_tokens: The list of tokens. 1613 sql: The original SQL string, used to produce helpful debug messages. 1614 1615 Returns: 1616 The list of the produced syntax trees. 1617 """ 1618 return self._parse( 1619 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1620 ) 1621 1622 def parse_into( 1623 self, 1624 expression_types: exp.IntoType, 1625 raw_tokens: t.List[Token], 1626 sql: t.Optional[str] = None, 1627 ) -> t.List[t.Optional[exp.Expression]]: 1628 """ 1629 Parses a list of tokens into a given Expression type. If a collection of Expression 1630 types is given instead, this method will try to parse the token list into each one 1631 of them, stopping at the first for which the parsing succeeds. 1632 1633 Args: 1634 expression_types: The expression type(s) to try and parse the token list into. 1635 raw_tokens: The list of tokens. 1636 sql: The original SQL string, used to produce helpful debug messages. 1637 1638 Returns: 1639 The target Expression. 1640 """ 1641 errors = [] 1642 for expression_type in ensure_list(expression_types): 1643 parser = self.EXPRESSION_PARSERS.get(expression_type) 1644 if not parser: 1645 raise TypeError(f"No parser registered for {expression_type}") 1646 1647 try: 1648 return self._parse(parser, raw_tokens, sql) 1649 except ParseError as e: 1650 e.errors[0]["into_expression"] = expression_type 1651 errors.append(e) 1652 1653 raise ParseError( 1654 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1655 errors=merge_errors(errors), 1656 ) from errors[-1] 1657 1658 def _parse( 1659 self, 1660 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1661 raw_tokens: t.List[Token], 1662 sql: t.Optional[str] = None, 1663 ) -> t.List[t.Optional[exp.Expression]]: 1664 self.reset() 1665 self.sql = sql or "" 1666 1667 total = len(raw_tokens) 1668 chunks: t.List[t.List[Token]] = [[]] 1669 1670 for i, token in enumerate(raw_tokens): 1671 if token.token_type == TokenType.SEMICOLON: 1672 if token.comments: 1673 chunks.append([token]) 1674 1675 if i < total - 1: 1676 chunks.append([]) 1677 else: 1678 chunks[-1].append(token) 1679 1680 expressions = [] 1681 1682 for tokens in chunks: 1683 self._index = -1 1684 self._tokens = tokens 1685 self._advance() 1686 1687 expressions.append(parse_method(self)) 1688 1689 if self._index < len(self._tokens): 1690 self.raise_error("Invalid expression / Unexpected token") 1691 1692 self.check_errors() 1693 1694 return expressions 1695 1696 def check_errors(self) -> None: 1697 """Logs or raises any found errors, depending on the chosen error level setting.""" 1698 if self.error_level == ErrorLevel.WARN: 1699 for error in self.errors: 1700 logger.error(str(error)) 1701 elif self.error_level == ErrorLevel.RAISE and self.errors: 1702 raise ParseError( 1703 concat_messages(self.errors, self.max_errors), 1704 errors=merge_errors(self.errors), 1705 ) 1706 1707 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1708 """ 1709 Appends an error in the list of recorded errors or raises it, depending on the chosen 1710 error level setting. 1711 """ 1712 token = token or self._curr or self._prev or Token.string("") 1713 start = token.start 1714 end = token.end + 1 1715 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1716 highlight = self.sql[start:end] 1717 end_context = self.sql[end : end + self.error_message_context] 1718 1719 error = ParseError.new( 1720 f"{message}. Line {token.line}, Col: {token.col}.\n" 1721 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1722 description=message, 1723 line=token.line, 1724 col=token.col, 1725 start_context=start_context, 1726 highlight=highlight, 1727 end_context=end_context, 1728 ) 1729 1730 if self.error_level == ErrorLevel.IMMEDIATE: 1731 raise error 1732 1733 self.errors.append(error) 1734 1735 def expression( 1736 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1737 ) -> E: 1738 """ 1739 Creates a new, validated Expression. 1740 1741 Args: 1742 exp_class: The expression class to instantiate. 1743 comments: An optional list of comments to attach to the expression. 1744 kwargs: The arguments to set for the expression along with their respective values. 1745 1746 Returns: 1747 The target expression. 1748 """ 1749 instance = exp_class(**kwargs) 1750 instance.add_comments(comments) if comments else self._add_comments(instance) 1751 return self.validate_expression(instance) 1752 1753 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1754 if expression and self._prev_comments: 1755 expression.add_comments(self._prev_comments) 1756 self._prev_comments = None 1757 1758 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1759 """ 1760 Validates an Expression, making sure that all its mandatory arguments are set. 1761 1762 Args: 1763 expression: The expression to validate. 1764 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1765 1766 Returns: 1767 The validated expression. 1768 """ 1769 if self.error_level != ErrorLevel.IGNORE: 1770 for error_message in expression.error_messages(args): 1771 self.raise_error(error_message) 1772 1773 return expression 1774 1775 def _find_sql(self, start: Token, end: Token) -> str: 1776 return self.sql[start.start : end.end + 1] 1777 1778 def _is_connected(self) -> bool: 1779 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1780 1781 def _advance(self, times: int = 1) -> None: 1782 self._index += times 1783 self._curr = seq_get(self._tokens, self._index) 1784 self._next = seq_get(self._tokens, self._index + 1) 1785 1786 if self._index > 0: 1787 self._prev = self._tokens[self._index - 1] 1788 self._prev_comments = self._prev.comments 1789 else: 1790 self._prev = None 1791 self._prev_comments = None 1792 1793 def _retreat(self, index: int) -> None: 1794 if index != self._index: 1795 self._advance(index - self._index) 1796 1797 def _warn_unsupported(self) -> None: 1798 if len(self._tokens) <= 1: 1799 return 1800 1801 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1802 # interested in emitting a warning for the one being currently processed. 1803 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1804 1805 logger.warning( 1806 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1807 ) 1808 1809 def _parse_command(self) -> exp.Command: 1810 self._warn_unsupported() 1811 return self.expression( 1812 exp.Command, 1813 comments=self._prev_comments, 1814 this=self._prev.text.upper(), 1815 expression=self._parse_string(), 1816 ) 1817 1818 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1819 """ 1820 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1821 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1822 solve this by setting & resetting the parser state accordingly 1823 """ 1824 index = self._index 1825 error_level = self.error_level 1826 1827 self.error_level = ErrorLevel.IMMEDIATE 1828 try: 1829 this = parse_method() 1830 except ParseError: 1831 this = None 1832 finally: 1833 if not this or retreat: 1834 self._retreat(index) 1835 self.error_level = error_level 1836 1837 return this 1838 1839 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1840 start = self._prev 1841 exists = self._parse_exists() if allow_exists else None 1842 1843 self._match(TokenType.ON) 1844 1845 materialized = self._match_text_seq("MATERIALIZED") 1846 kind = self._match_set(self.CREATABLES) and self._prev 1847 if not kind: 1848 return self._parse_as_command(start) 1849 1850 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1851 this = self._parse_user_defined_function(kind=kind.token_type) 1852 elif kind.token_type == TokenType.TABLE: 1853 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1854 elif kind.token_type == TokenType.COLUMN: 1855 this = self._parse_column() 1856 else: 1857 this = self._parse_id_var() 1858 1859 self._match(TokenType.IS) 1860 1861 return self.expression( 1862 exp.Comment, 1863 this=this, 1864 kind=kind.text, 1865 expression=self._parse_string(), 1866 exists=exists, 1867 materialized=materialized, 1868 ) 1869 1870 def _parse_to_table( 1871 self, 1872 ) -> exp.ToTableProperty: 1873 table = self._parse_table_parts(schema=True) 1874 return self.expression(exp.ToTableProperty, this=table) 1875 1876 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1877 def _parse_ttl(self) -> exp.Expression: 1878 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1879 this = self._parse_bitwise() 1880 1881 if self._match_text_seq("DELETE"): 1882 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1883 if self._match_text_seq("RECOMPRESS"): 1884 return self.expression( 1885 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1886 ) 1887 if self._match_text_seq("TO", "DISK"): 1888 return self.expression( 1889 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1890 ) 1891 if self._match_text_seq("TO", "VOLUME"): 1892 return self.expression( 1893 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1894 ) 1895 1896 return this 1897 1898 expressions = self._parse_csv(_parse_ttl_action) 1899 where = self._parse_where() 1900 group = self._parse_group() 1901 1902 aggregates = None 1903 if group and self._match(TokenType.SET): 1904 aggregates = self._parse_csv(self._parse_set_item) 1905 1906 return self.expression( 1907 exp.MergeTreeTTL, 1908 expressions=expressions, 1909 where=where, 1910 group=group, 1911 aggregates=aggregates, 1912 ) 1913 1914 def _parse_statement(self) -> t.Optional[exp.Expression]: 1915 if self._curr is None: 1916 return None 1917 1918 if self._match_set(self.STATEMENT_PARSERS): 1919 comments = self._prev_comments 1920 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1921 stmt.add_comments(comments, prepend=True) 1922 return stmt 1923 1924 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1925 return self._parse_command() 1926 1927 expression = self._parse_expression() 1928 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1929 return self._parse_query_modifiers(expression) 1930 1931 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1932 start = self._prev 1933 temporary = self._match(TokenType.TEMPORARY) 1934 materialized = self._match_text_seq("MATERIALIZED") 1935 1936 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1937 if not kind: 1938 return self._parse_as_command(start) 1939 1940 concurrently = self._match_text_seq("CONCURRENTLY") 1941 if_exists = exists or self._parse_exists() 1942 1943 if kind == "COLUMN": 1944 this = self._parse_column() 1945 else: 1946 this = self._parse_table_parts( 1947 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1948 ) 1949 1950 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1951 1952 if self._match(TokenType.L_PAREN, advance=False): 1953 expressions = self._parse_wrapped_csv(self._parse_types) 1954 else: 1955 expressions = None 1956 1957 return self.expression( 1958 exp.Drop, 1959 exists=if_exists, 1960 this=this, 1961 expressions=expressions, 1962 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1963 temporary=temporary, 1964 materialized=materialized, 1965 cascade=self._match_text_seq("CASCADE"), 1966 constraints=self._match_text_seq("CONSTRAINTS"), 1967 purge=self._match_text_seq("PURGE"), 1968 cluster=cluster, 1969 concurrently=concurrently, 1970 ) 1971 1972 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1973 return ( 1974 self._match_text_seq("IF") 1975 and (not not_ or self._match(TokenType.NOT)) 1976 and self._match(TokenType.EXISTS) 1977 ) 1978 1979 def _parse_create(self) -> exp.Create | exp.Command: 1980 # Note: this can't be None because we've matched a statement parser 1981 start = self._prev 1982 1983 replace = ( 1984 start.token_type == TokenType.REPLACE 1985 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1986 or self._match_pair(TokenType.OR, TokenType.ALTER) 1987 ) 1988 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1989 1990 unique = self._match(TokenType.UNIQUE) 1991 1992 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1993 clustered = True 1994 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1995 "COLUMNSTORE" 1996 ): 1997 clustered = False 1998 else: 1999 clustered = None 2000 2001 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2002 self._advance() 2003 2004 properties = None 2005 create_token = self._match_set(self.CREATABLES) and self._prev 2006 2007 if not create_token: 2008 # exp.Properties.Location.POST_CREATE 2009 properties = self._parse_properties() 2010 create_token = self._match_set(self.CREATABLES) and self._prev 2011 2012 if not properties or not create_token: 2013 return self._parse_as_command(start) 2014 2015 concurrently = self._match_text_seq("CONCURRENTLY") 2016 exists = self._parse_exists(not_=True) 2017 this = None 2018 expression: t.Optional[exp.Expression] = None 2019 indexes = None 2020 no_schema_binding = None 2021 begin = None 2022 end = None 2023 clone = None 2024 2025 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2026 nonlocal properties 2027 if properties and temp_props: 2028 properties.expressions.extend(temp_props.expressions) 2029 elif temp_props: 2030 properties = temp_props 2031 2032 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2033 this = self._parse_user_defined_function(kind=create_token.token_type) 2034 2035 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2036 extend_props(self._parse_properties()) 2037 2038 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2039 extend_props(self._parse_properties()) 2040 2041 if not expression: 2042 if self._match(TokenType.COMMAND): 2043 expression = self._parse_as_command(self._prev) 2044 else: 2045 begin = self._match(TokenType.BEGIN) 2046 return_ = self._match_text_seq("RETURN") 2047 2048 if self._match(TokenType.STRING, advance=False): 2049 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2050 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2051 expression = self._parse_string() 2052 extend_props(self._parse_properties()) 2053 else: 2054 expression = self._parse_user_defined_function_expression() 2055 2056 end = self._match_text_seq("END") 2057 2058 if return_: 2059 expression = self.expression(exp.Return, this=expression) 2060 elif create_token.token_type == TokenType.INDEX: 2061 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2062 if not self._match(TokenType.ON): 2063 index = self._parse_id_var() 2064 anonymous = False 2065 else: 2066 index = None 2067 anonymous = True 2068 2069 this = self._parse_index(index=index, anonymous=anonymous) 2070 elif create_token.token_type in self.DB_CREATABLES: 2071 table_parts = self._parse_table_parts( 2072 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2073 ) 2074 2075 # exp.Properties.Location.POST_NAME 2076 self._match(TokenType.COMMA) 2077 extend_props(self._parse_properties(before=True)) 2078 2079 this = self._parse_schema(this=table_parts) 2080 2081 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2082 extend_props(self._parse_properties()) 2083 2084 has_alias = self._match(TokenType.ALIAS) 2085 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2086 # exp.Properties.Location.POST_ALIAS 2087 extend_props(self._parse_properties()) 2088 2089 if create_token.token_type == TokenType.SEQUENCE: 2090 expression = self._parse_types() 2091 props = self._parse_properties() 2092 if props: 2093 sequence_props = exp.SequenceProperties() 2094 options = [] 2095 for prop in props: 2096 if isinstance(prop, exp.SequenceProperties): 2097 for arg, value in prop.args.items(): 2098 if arg == "options": 2099 options.extend(value) 2100 else: 2101 sequence_props.set(arg, value) 2102 prop.pop() 2103 2104 if options: 2105 sequence_props.set("options", options) 2106 2107 props.append("expressions", sequence_props) 2108 extend_props(props) 2109 else: 2110 expression = self._parse_ddl_select() 2111 2112 # Some dialects also support using a table as an alias instead of a SELECT. 2113 # Here we fallback to this as an alternative. 2114 if not expression and has_alias: 2115 expression = self._try_parse(self._parse_table_parts) 2116 2117 if create_token.token_type == TokenType.TABLE: 2118 # exp.Properties.Location.POST_EXPRESSION 2119 extend_props(self._parse_properties()) 2120 2121 indexes = [] 2122 while True: 2123 index = self._parse_index() 2124 2125 # exp.Properties.Location.POST_INDEX 2126 extend_props(self._parse_properties()) 2127 if not index: 2128 break 2129 else: 2130 self._match(TokenType.COMMA) 2131 indexes.append(index) 2132 elif create_token.token_type == TokenType.VIEW: 2133 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2134 no_schema_binding = True 2135 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2136 extend_props(self._parse_properties()) 2137 2138 shallow = self._match_text_seq("SHALLOW") 2139 2140 if self._match_texts(self.CLONE_KEYWORDS): 2141 copy = self._prev.text.lower() == "copy" 2142 clone = self.expression( 2143 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2144 ) 2145 2146 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2147 return self._parse_as_command(start) 2148 2149 create_kind_text = create_token.text.upper() 2150 return self.expression( 2151 exp.Create, 2152 this=this, 2153 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2154 replace=replace, 2155 refresh=refresh, 2156 unique=unique, 2157 expression=expression, 2158 exists=exists, 2159 properties=properties, 2160 indexes=indexes, 2161 no_schema_binding=no_schema_binding, 2162 begin=begin, 2163 end=end, 2164 clone=clone, 2165 concurrently=concurrently, 2166 clustered=clustered, 2167 ) 2168 2169 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2170 seq = exp.SequenceProperties() 2171 2172 options = [] 2173 index = self._index 2174 2175 while self._curr: 2176 self._match(TokenType.COMMA) 2177 if self._match_text_seq("INCREMENT"): 2178 self._match_text_seq("BY") 2179 self._match_text_seq("=") 2180 seq.set("increment", self._parse_term()) 2181 elif self._match_text_seq("MINVALUE"): 2182 seq.set("minvalue", self._parse_term()) 2183 elif self._match_text_seq("MAXVALUE"): 2184 seq.set("maxvalue", self._parse_term()) 2185 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2186 self._match_text_seq("=") 2187 seq.set("start", self._parse_term()) 2188 elif self._match_text_seq("CACHE"): 2189 # T-SQL allows empty CACHE which is initialized dynamically 2190 seq.set("cache", self._parse_number() or True) 2191 elif self._match_text_seq("OWNED", "BY"): 2192 # "OWNED BY NONE" is the default 2193 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2194 else: 2195 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2196 if opt: 2197 options.append(opt) 2198 else: 2199 break 2200 2201 seq.set("options", options if options else None) 2202 return None if self._index == index else seq 2203 2204 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2205 # only used for teradata currently 2206 self._match(TokenType.COMMA) 2207 2208 kwargs = { 2209 "no": self._match_text_seq("NO"), 2210 "dual": self._match_text_seq("DUAL"), 2211 "before": self._match_text_seq("BEFORE"), 2212 "default": self._match_text_seq("DEFAULT"), 2213 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2214 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2215 "after": self._match_text_seq("AFTER"), 2216 "minimum": self._match_texts(("MIN", "MINIMUM")), 2217 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2218 } 2219 2220 if self._match_texts(self.PROPERTY_PARSERS): 2221 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2222 try: 2223 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2224 except TypeError: 2225 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2226 2227 return None 2228 2229 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2230 return self._parse_wrapped_csv(self._parse_property) 2231 2232 def _parse_property(self) -> t.Optional[exp.Expression]: 2233 if self._match_texts(self.PROPERTY_PARSERS): 2234 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2235 2236 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2237 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2238 2239 if self._match_text_seq("COMPOUND", "SORTKEY"): 2240 return self._parse_sortkey(compound=True) 2241 2242 if self._match_text_seq("SQL", "SECURITY"): 2243 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2244 2245 index = self._index 2246 2247 seq_props = self._parse_sequence_properties() 2248 if seq_props: 2249 return seq_props 2250 2251 self._retreat(index) 2252 key = self._parse_column() 2253 2254 if not self._match(TokenType.EQ): 2255 self._retreat(index) 2256 return None 2257 2258 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2259 if isinstance(key, exp.Column): 2260 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2261 2262 value = self._parse_bitwise() or self._parse_var(any_token=True) 2263 2264 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2265 if isinstance(value, exp.Column): 2266 value = exp.var(value.name) 2267 2268 return self.expression(exp.Property, this=key, value=value) 2269 2270 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2271 if self._match_text_seq("BY"): 2272 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2273 2274 self._match(TokenType.ALIAS) 2275 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2276 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2277 2278 return self.expression( 2279 exp.FileFormatProperty, 2280 this=( 2281 self.expression( 2282 exp.InputOutputFormat, 2283 input_format=input_format, 2284 output_format=output_format, 2285 ) 2286 if input_format or output_format 2287 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2288 ), 2289 hive_format=True, 2290 ) 2291 2292 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2293 field = self._parse_field() 2294 if isinstance(field, exp.Identifier) and not field.quoted: 2295 field = exp.var(field) 2296 2297 return field 2298 2299 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2300 self._match(TokenType.EQ) 2301 self._match(TokenType.ALIAS) 2302 2303 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2304 2305 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2306 properties = [] 2307 while True: 2308 if before: 2309 prop = self._parse_property_before() 2310 else: 2311 prop = self._parse_property() 2312 if not prop: 2313 break 2314 for p in ensure_list(prop): 2315 properties.append(p) 2316 2317 if properties: 2318 return self.expression(exp.Properties, expressions=properties) 2319 2320 return None 2321 2322 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2323 return self.expression( 2324 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2325 ) 2326 2327 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2328 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2329 security_specifier = self._prev.text.upper() 2330 return self.expression(exp.SecurityProperty, this=security_specifier) 2331 return None 2332 2333 def _parse_settings_property(self) -> exp.SettingsProperty: 2334 return self.expression( 2335 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2336 ) 2337 2338 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2339 if self._index >= 2: 2340 pre_volatile_token = self._tokens[self._index - 2] 2341 else: 2342 pre_volatile_token = None 2343 2344 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2345 return exp.VolatileProperty() 2346 2347 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2348 2349 def _parse_retention_period(self) -> exp.Var: 2350 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2351 number = self._parse_number() 2352 number_str = f"{number} " if number else "" 2353 unit = self._parse_var(any_token=True) 2354 return exp.var(f"{number_str}{unit}") 2355 2356 def _parse_system_versioning_property( 2357 self, with_: bool = False 2358 ) -> exp.WithSystemVersioningProperty: 2359 self._match(TokenType.EQ) 2360 prop = self.expression( 2361 exp.WithSystemVersioningProperty, 2362 **{ # type: ignore 2363 "on": True, 2364 "with": with_, 2365 }, 2366 ) 2367 2368 if self._match_text_seq("OFF"): 2369 prop.set("on", False) 2370 return prop 2371 2372 self._match(TokenType.ON) 2373 if self._match(TokenType.L_PAREN): 2374 while self._curr and not self._match(TokenType.R_PAREN): 2375 if self._match_text_seq("HISTORY_TABLE", "="): 2376 prop.set("this", self._parse_table_parts()) 2377 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2378 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2379 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2380 prop.set("retention_period", self._parse_retention_period()) 2381 2382 self._match(TokenType.COMMA) 2383 2384 return prop 2385 2386 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2387 self._match(TokenType.EQ) 2388 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2389 prop = self.expression(exp.DataDeletionProperty, on=on) 2390 2391 if self._match(TokenType.L_PAREN): 2392 while self._curr and not self._match(TokenType.R_PAREN): 2393 if self._match_text_seq("FILTER_COLUMN", "="): 2394 prop.set("filter_column", self._parse_column()) 2395 elif self._match_text_seq("RETENTION_PERIOD", "="): 2396 prop.set("retention_period", self._parse_retention_period()) 2397 2398 self._match(TokenType.COMMA) 2399 2400 return prop 2401 2402 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2403 kind = "HASH" 2404 expressions: t.Optional[t.List[exp.Expression]] = None 2405 if self._match_text_seq("BY", "HASH"): 2406 expressions = self._parse_wrapped_csv(self._parse_id_var) 2407 elif self._match_text_seq("BY", "RANDOM"): 2408 kind = "RANDOM" 2409 2410 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2411 buckets: t.Optional[exp.Expression] = None 2412 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2413 buckets = self._parse_number() 2414 2415 return self.expression( 2416 exp.DistributedByProperty, 2417 expressions=expressions, 2418 kind=kind, 2419 buckets=buckets, 2420 order=self._parse_order(), 2421 ) 2422 2423 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2424 self._match_text_seq("KEY") 2425 expressions = self._parse_wrapped_id_vars() 2426 return self.expression(expr_type, expressions=expressions) 2427 2428 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2429 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2430 prop = self._parse_system_versioning_property(with_=True) 2431 self._match_r_paren() 2432 return prop 2433 2434 if self._match(TokenType.L_PAREN, advance=False): 2435 return self._parse_wrapped_properties() 2436 2437 if self._match_text_seq("JOURNAL"): 2438 return self._parse_withjournaltable() 2439 2440 if self._match_texts(self.VIEW_ATTRIBUTES): 2441 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2442 2443 if self._match_text_seq("DATA"): 2444 return self._parse_withdata(no=False) 2445 elif self._match_text_seq("NO", "DATA"): 2446 return self._parse_withdata(no=True) 2447 2448 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2449 return self._parse_serde_properties(with_=True) 2450 2451 if self._match(TokenType.SCHEMA): 2452 return self.expression( 2453 exp.WithSchemaBindingProperty, 2454 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2455 ) 2456 2457 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2458 return self.expression( 2459 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2460 ) 2461 2462 if not self._next: 2463 return None 2464 2465 return self._parse_withisolatedloading() 2466 2467 def _parse_procedure_option(self) -> exp.Expression | None: 2468 if self._match_text_seq("EXECUTE", "AS"): 2469 return self.expression( 2470 exp.ExecuteAsProperty, 2471 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2472 or self._parse_string(), 2473 ) 2474 2475 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2476 2477 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2478 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2479 self._match(TokenType.EQ) 2480 2481 user = self._parse_id_var() 2482 self._match(TokenType.PARAMETER) 2483 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2484 2485 if not user or not host: 2486 return None 2487 2488 return exp.DefinerProperty(this=f"{user}@{host}") 2489 2490 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2491 self._match(TokenType.TABLE) 2492 self._match(TokenType.EQ) 2493 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2494 2495 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2496 return self.expression(exp.LogProperty, no=no) 2497 2498 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2499 return self.expression(exp.JournalProperty, **kwargs) 2500 2501 def _parse_checksum(self) -> exp.ChecksumProperty: 2502 self._match(TokenType.EQ) 2503 2504 on = None 2505 if self._match(TokenType.ON): 2506 on = True 2507 elif self._match_text_seq("OFF"): 2508 on = False 2509 2510 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2511 2512 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2513 return self.expression( 2514 exp.Cluster, 2515 expressions=( 2516 self._parse_wrapped_csv(self._parse_ordered) 2517 if wrapped 2518 else self._parse_csv(self._parse_ordered) 2519 ), 2520 ) 2521 2522 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2523 self._match_text_seq("BY") 2524 2525 self._match_l_paren() 2526 expressions = self._parse_csv(self._parse_column) 2527 self._match_r_paren() 2528 2529 if self._match_text_seq("SORTED", "BY"): 2530 self._match_l_paren() 2531 sorted_by = self._parse_csv(self._parse_ordered) 2532 self._match_r_paren() 2533 else: 2534 sorted_by = None 2535 2536 self._match(TokenType.INTO) 2537 buckets = self._parse_number() 2538 self._match_text_seq("BUCKETS") 2539 2540 return self.expression( 2541 exp.ClusteredByProperty, 2542 expressions=expressions, 2543 sorted_by=sorted_by, 2544 buckets=buckets, 2545 ) 2546 2547 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2548 if not self._match_text_seq("GRANTS"): 2549 self._retreat(self._index - 1) 2550 return None 2551 2552 return self.expression(exp.CopyGrantsProperty) 2553 2554 def _parse_freespace(self) -> exp.FreespaceProperty: 2555 self._match(TokenType.EQ) 2556 return self.expression( 2557 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2558 ) 2559 2560 def _parse_mergeblockratio( 2561 self, no: bool = False, default: bool = False 2562 ) -> exp.MergeBlockRatioProperty: 2563 if self._match(TokenType.EQ): 2564 return self.expression( 2565 exp.MergeBlockRatioProperty, 2566 this=self._parse_number(), 2567 percent=self._match(TokenType.PERCENT), 2568 ) 2569 2570 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2571 2572 def _parse_datablocksize( 2573 self, 2574 default: t.Optional[bool] = None, 2575 minimum: t.Optional[bool] = None, 2576 maximum: t.Optional[bool] = None, 2577 ) -> exp.DataBlocksizeProperty: 2578 self._match(TokenType.EQ) 2579 size = self._parse_number() 2580 2581 units = None 2582 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2583 units = self._prev.text 2584 2585 return self.expression( 2586 exp.DataBlocksizeProperty, 2587 size=size, 2588 units=units, 2589 default=default, 2590 minimum=minimum, 2591 maximum=maximum, 2592 ) 2593 2594 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2595 self._match(TokenType.EQ) 2596 always = self._match_text_seq("ALWAYS") 2597 manual = self._match_text_seq("MANUAL") 2598 never = self._match_text_seq("NEVER") 2599 default = self._match_text_seq("DEFAULT") 2600 2601 autotemp = None 2602 if self._match_text_seq("AUTOTEMP"): 2603 autotemp = self._parse_schema() 2604 2605 return self.expression( 2606 exp.BlockCompressionProperty, 2607 always=always, 2608 manual=manual, 2609 never=never, 2610 default=default, 2611 autotemp=autotemp, 2612 ) 2613 2614 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2615 index = self._index 2616 no = self._match_text_seq("NO") 2617 concurrent = self._match_text_seq("CONCURRENT") 2618 2619 if not self._match_text_seq("ISOLATED", "LOADING"): 2620 self._retreat(index) 2621 return None 2622 2623 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2624 return self.expression( 2625 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2626 ) 2627 2628 def _parse_locking(self) -> exp.LockingProperty: 2629 if self._match(TokenType.TABLE): 2630 kind = "TABLE" 2631 elif self._match(TokenType.VIEW): 2632 kind = "VIEW" 2633 elif self._match(TokenType.ROW): 2634 kind = "ROW" 2635 elif self._match_text_seq("DATABASE"): 2636 kind = "DATABASE" 2637 else: 2638 kind = None 2639 2640 if kind in ("DATABASE", "TABLE", "VIEW"): 2641 this = self._parse_table_parts() 2642 else: 2643 this = None 2644 2645 if self._match(TokenType.FOR): 2646 for_or_in = "FOR" 2647 elif self._match(TokenType.IN): 2648 for_or_in = "IN" 2649 else: 2650 for_or_in = None 2651 2652 if self._match_text_seq("ACCESS"): 2653 lock_type = "ACCESS" 2654 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2655 lock_type = "EXCLUSIVE" 2656 elif self._match_text_seq("SHARE"): 2657 lock_type = "SHARE" 2658 elif self._match_text_seq("READ"): 2659 lock_type = "READ" 2660 elif self._match_text_seq("WRITE"): 2661 lock_type = "WRITE" 2662 elif self._match_text_seq("CHECKSUM"): 2663 lock_type = "CHECKSUM" 2664 else: 2665 lock_type = None 2666 2667 override = self._match_text_seq("OVERRIDE") 2668 2669 return self.expression( 2670 exp.LockingProperty, 2671 this=this, 2672 kind=kind, 2673 for_or_in=for_or_in, 2674 lock_type=lock_type, 2675 override=override, 2676 ) 2677 2678 def _parse_partition_by(self) -> t.List[exp.Expression]: 2679 if self._match(TokenType.PARTITION_BY): 2680 return self._parse_csv(self._parse_assignment) 2681 return [] 2682 2683 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2684 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2685 if self._match_text_seq("MINVALUE"): 2686 return exp.var("MINVALUE") 2687 if self._match_text_seq("MAXVALUE"): 2688 return exp.var("MAXVALUE") 2689 return self._parse_bitwise() 2690 2691 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2692 expression = None 2693 from_expressions = None 2694 to_expressions = None 2695 2696 if self._match(TokenType.IN): 2697 this = self._parse_wrapped_csv(self._parse_bitwise) 2698 elif self._match(TokenType.FROM): 2699 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2700 self._match_text_seq("TO") 2701 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2702 elif self._match_text_seq("WITH", "(", "MODULUS"): 2703 this = self._parse_number() 2704 self._match_text_seq(",", "REMAINDER") 2705 expression = self._parse_number() 2706 self._match_r_paren() 2707 else: 2708 self.raise_error("Failed to parse partition bound spec.") 2709 2710 return self.expression( 2711 exp.PartitionBoundSpec, 2712 this=this, 2713 expression=expression, 2714 from_expressions=from_expressions, 2715 to_expressions=to_expressions, 2716 ) 2717 2718 # https://www.postgresql.org/docs/current/sql-createtable.html 2719 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2720 if not self._match_text_seq("OF"): 2721 self._retreat(self._index - 1) 2722 return None 2723 2724 this = self._parse_table(schema=True) 2725 2726 if self._match(TokenType.DEFAULT): 2727 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2728 elif self._match_text_seq("FOR", "VALUES"): 2729 expression = self._parse_partition_bound_spec() 2730 else: 2731 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2732 2733 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2734 2735 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2736 self._match(TokenType.EQ) 2737 return self.expression( 2738 exp.PartitionedByProperty, 2739 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2740 ) 2741 2742 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2743 if self._match_text_seq("AND", "STATISTICS"): 2744 statistics = True 2745 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2746 statistics = False 2747 else: 2748 statistics = None 2749 2750 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2751 2752 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2753 if self._match_text_seq("SQL"): 2754 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2755 return None 2756 2757 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2758 if self._match_text_seq("SQL", "DATA"): 2759 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2760 return None 2761 2762 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2763 if self._match_text_seq("PRIMARY", "INDEX"): 2764 return exp.NoPrimaryIndexProperty() 2765 if self._match_text_seq("SQL"): 2766 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2767 return None 2768 2769 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2770 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2771 return exp.OnCommitProperty() 2772 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2773 return exp.OnCommitProperty(delete=True) 2774 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2775 2776 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2777 if self._match_text_seq("SQL", "DATA"): 2778 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2779 return None 2780 2781 def _parse_distkey(self) -> exp.DistKeyProperty: 2782 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2783 2784 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2785 table = self._parse_table(schema=True) 2786 2787 options = [] 2788 while self._match_texts(("INCLUDING", "EXCLUDING")): 2789 this = self._prev.text.upper() 2790 2791 id_var = self._parse_id_var() 2792 if not id_var: 2793 return None 2794 2795 options.append( 2796 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2797 ) 2798 2799 return self.expression(exp.LikeProperty, this=table, expressions=options) 2800 2801 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2802 return self.expression( 2803 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2804 ) 2805 2806 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2807 self._match(TokenType.EQ) 2808 return self.expression( 2809 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2810 ) 2811 2812 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2813 self._match_text_seq("WITH", "CONNECTION") 2814 return self.expression( 2815 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2816 ) 2817 2818 def _parse_returns(self) -> exp.ReturnsProperty: 2819 value: t.Optional[exp.Expression] 2820 null = None 2821 is_table = self._match(TokenType.TABLE) 2822 2823 if is_table: 2824 if self._match(TokenType.LT): 2825 value = self.expression( 2826 exp.Schema, 2827 this="TABLE", 2828 expressions=self._parse_csv(self._parse_struct_types), 2829 ) 2830 if not self._match(TokenType.GT): 2831 self.raise_error("Expecting >") 2832 else: 2833 value = self._parse_schema(exp.var("TABLE")) 2834 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2835 null = True 2836 value = None 2837 else: 2838 value = self._parse_types() 2839 2840 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2841 2842 def _parse_describe(self) -> exp.Describe: 2843 kind = self._match_set(self.CREATABLES) and self._prev.text 2844 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2845 if self._match(TokenType.DOT): 2846 style = None 2847 self._retreat(self._index - 2) 2848 2849 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2850 2851 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2852 this = self._parse_statement() 2853 else: 2854 this = self._parse_table(schema=True) 2855 2856 properties = self._parse_properties() 2857 expressions = properties.expressions if properties else None 2858 partition = self._parse_partition() 2859 return self.expression( 2860 exp.Describe, 2861 this=this, 2862 style=style, 2863 kind=kind, 2864 expressions=expressions, 2865 partition=partition, 2866 format=format, 2867 ) 2868 2869 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2870 kind = self._prev.text.upper() 2871 expressions = [] 2872 2873 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2874 if self._match(TokenType.WHEN): 2875 expression = self._parse_disjunction() 2876 self._match(TokenType.THEN) 2877 else: 2878 expression = None 2879 2880 else_ = self._match(TokenType.ELSE) 2881 2882 if not self._match(TokenType.INTO): 2883 return None 2884 2885 return self.expression( 2886 exp.ConditionalInsert, 2887 this=self.expression( 2888 exp.Insert, 2889 this=self._parse_table(schema=True), 2890 expression=self._parse_derived_table_values(), 2891 ), 2892 expression=expression, 2893 else_=else_, 2894 ) 2895 2896 expression = parse_conditional_insert() 2897 while expression is not None: 2898 expressions.append(expression) 2899 expression = parse_conditional_insert() 2900 2901 return self.expression( 2902 exp.MultitableInserts, 2903 kind=kind, 2904 comments=comments, 2905 expressions=expressions, 2906 source=self._parse_table(), 2907 ) 2908 2909 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2910 comments = [] 2911 hint = self._parse_hint() 2912 overwrite = self._match(TokenType.OVERWRITE) 2913 ignore = self._match(TokenType.IGNORE) 2914 local = self._match_text_seq("LOCAL") 2915 alternative = None 2916 is_function = None 2917 2918 if self._match_text_seq("DIRECTORY"): 2919 this: t.Optional[exp.Expression] = self.expression( 2920 exp.Directory, 2921 this=self._parse_var_or_string(), 2922 local=local, 2923 row_format=self._parse_row_format(match_row=True), 2924 ) 2925 else: 2926 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2927 comments += ensure_list(self._prev_comments) 2928 return self._parse_multitable_inserts(comments) 2929 2930 if self._match(TokenType.OR): 2931 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2932 2933 self._match(TokenType.INTO) 2934 comments += ensure_list(self._prev_comments) 2935 self._match(TokenType.TABLE) 2936 is_function = self._match(TokenType.FUNCTION) 2937 2938 this = ( 2939 self._parse_table(schema=True, parse_partition=True) 2940 if not is_function 2941 else self._parse_function() 2942 ) 2943 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2944 this.set("alias", self._parse_table_alias()) 2945 2946 returning = self._parse_returning() 2947 2948 return self.expression( 2949 exp.Insert, 2950 comments=comments, 2951 hint=hint, 2952 is_function=is_function, 2953 this=this, 2954 stored=self._match_text_seq("STORED") and self._parse_stored(), 2955 by_name=self._match_text_seq("BY", "NAME"), 2956 exists=self._parse_exists(), 2957 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2958 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2959 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2960 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2961 conflict=self._parse_on_conflict(), 2962 returning=returning or self._parse_returning(), 2963 overwrite=overwrite, 2964 alternative=alternative, 2965 ignore=ignore, 2966 source=self._match(TokenType.TABLE) and self._parse_table(), 2967 ) 2968 2969 def _parse_kill(self) -> exp.Kill: 2970 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2971 2972 return self.expression( 2973 exp.Kill, 2974 this=self._parse_primary(), 2975 kind=kind, 2976 ) 2977 2978 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2979 conflict = self._match_text_seq("ON", "CONFLICT") 2980 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2981 2982 if not conflict and not duplicate: 2983 return None 2984 2985 conflict_keys = None 2986 constraint = None 2987 2988 if conflict: 2989 if self._match_text_seq("ON", "CONSTRAINT"): 2990 constraint = self._parse_id_var() 2991 elif self._match(TokenType.L_PAREN): 2992 conflict_keys = self._parse_csv(self._parse_id_var) 2993 self._match_r_paren() 2994 2995 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2996 if self._prev.token_type == TokenType.UPDATE: 2997 self._match(TokenType.SET) 2998 expressions = self._parse_csv(self._parse_equality) 2999 else: 3000 expressions = None 3001 3002 return self.expression( 3003 exp.OnConflict, 3004 duplicate=duplicate, 3005 expressions=expressions, 3006 action=action, 3007 conflict_keys=conflict_keys, 3008 constraint=constraint, 3009 where=self._parse_where(), 3010 ) 3011 3012 def _parse_returning(self) -> t.Optional[exp.Returning]: 3013 if not self._match(TokenType.RETURNING): 3014 return None 3015 return self.expression( 3016 exp.Returning, 3017 expressions=self._parse_csv(self._parse_expression), 3018 into=self._match(TokenType.INTO) and self._parse_table_part(), 3019 ) 3020 3021 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3022 if not self._match(TokenType.FORMAT): 3023 return None 3024 return self._parse_row_format() 3025 3026 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3027 index = self._index 3028 with_ = with_ or self._match_text_seq("WITH") 3029 3030 if not self._match(TokenType.SERDE_PROPERTIES): 3031 self._retreat(index) 3032 return None 3033 return self.expression( 3034 exp.SerdeProperties, 3035 **{ # type: ignore 3036 "expressions": self._parse_wrapped_properties(), 3037 "with": with_, 3038 }, 3039 ) 3040 3041 def _parse_row_format( 3042 self, match_row: bool = False 3043 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3044 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3045 return None 3046 3047 if self._match_text_seq("SERDE"): 3048 this = self._parse_string() 3049 3050 serde_properties = self._parse_serde_properties() 3051 3052 return self.expression( 3053 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3054 ) 3055 3056 self._match_text_seq("DELIMITED") 3057 3058 kwargs = {} 3059 3060 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3061 kwargs["fields"] = self._parse_string() 3062 if self._match_text_seq("ESCAPED", "BY"): 3063 kwargs["escaped"] = self._parse_string() 3064 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3065 kwargs["collection_items"] = self._parse_string() 3066 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3067 kwargs["map_keys"] = self._parse_string() 3068 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3069 kwargs["lines"] = self._parse_string() 3070 if self._match_text_seq("NULL", "DEFINED", "AS"): 3071 kwargs["null"] = self._parse_string() 3072 3073 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3074 3075 def _parse_load(self) -> exp.LoadData | exp.Command: 3076 if self._match_text_seq("DATA"): 3077 local = self._match_text_seq("LOCAL") 3078 self._match_text_seq("INPATH") 3079 inpath = self._parse_string() 3080 overwrite = self._match(TokenType.OVERWRITE) 3081 self._match_pair(TokenType.INTO, TokenType.TABLE) 3082 3083 return self.expression( 3084 exp.LoadData, 3085 this=self._parse_table(schema=True), 3086 local=local, 3087 overwrite=overwrite, 3088 inpath=inpath, 3089 partition=self._parse_partition(), 3090 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3091 serde=self._match_text_seq("SERDE") and self._parse_string(), 3092 ) 3093 return self._parse_as_command(self._prev) 3094 3095 def _parse_delete(self) -> exp.Delete: 3096 # This handles MySQL's "Multiple-Table Syntax" 3097 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3098 tables = None 3099 if not self._match(TokenType.FROM, advance=False): 3100 tables = self._parse_csv(self._parse_table) or None 3101 3102 returning = self._parse_returning() 3103 3104 return self.expression( 3105 exp.Delete, 3106 tables=tables, 3107 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3108 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3109 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3110 where=self._parse_where(), 3111 returning=returning or self._parse_returning(), 3112 limit=self._parse_limit(), 3113 ) 3114 3115 def _parse_update(self) -> exp.Update: 3116 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3117 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3118 returning = self._parse_returning() 3119 return self.expression( 3120 exp.Update, 3121 **{ # type: ignore 3122 "this": this, 3123 "expressions": expressions, 3124 "from": self._parse_from(joins=True), 3125 "where": self._parse_where(), 3126 "returning": returning or self._parse_returning(), 3127 "order": self._parse_order(), 3128 "limit": self._parse_limit(), 3129 }, 3130 ) 3131 3132 def _parse_use(self) -> exp.Use: 3133 return self.expression( 3134 exp.Use, 3135 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3136 this=self._parse_table(schema=False), 3137 ) 3138 3139 def _parse_uncache(self) -> exp.Uncache: 3140 if not self._match(TokenType.TABLE): 3141 self.raise_error("Expecting TABLE after UNCACHE") 3142 3143 return self.expression( 3144 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3145 ) 3146 3147 def _parse_cache(self) -> exp.Cache: 3148 lazy = self._match_text_seq("LAZY") 3149 self._match(TokenType.TABLE) 3150 table = self._parse_table(schema=True) 3151 3152 options = [] 3153 if self._match_text_seq("OPTIONS"): 3154 self._match_l_paren() 3155 k = self._parse_string() 3156 self._match(TokenType.EQ) 3157 v = self._parse_string() 3158 options = [k, v] 3159 self._match_r_paren() 3160 3161 self._match(TokenType.ALIAS) 3162 return self.expression( 3163 exp.Cache, 3164 this=table, 3165 lazy=lazy, 3166 options=options, 3167 expression=self._parse_select(nested=True), 3168 ) 3169 3170 def _parse_partition(self) -> t.Optional[exp.Partition]: 3171 if not self._match_texts(self.PARTITION_KEYWORDS): 3172 return None 3173 3174 return self.expression( 3175 exp.Partition, 3176 subpartition=self._prev.text.upper() == "SUBPARTITION", 3177 expressions=self._parse_wrapped_csv(self._parse_assignment), 3178 ) 3179 3180 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3181 def _parse_value_expression() -> t.Optional[exp.Expression]: 3182 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3183 return exp.var(self._prev.text.upper()) 3184 return self._parse_expression() 3185 3186 if self._match(TokenType.L_PAREN): 3187 expressions = self._parse_csv(_parse_value_expression) 3188 self._match_r_paren() 3189 return self.expression(exp.Tuple, expressions=expressions) 3190 3191 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3192 expression = self._parse_expression() 3193 if expression: 3194 return self.expression(exp.Tuple, expressions=[expression]) 3195 return None 3196 3197 def _parse_projections(self) -> t.List[exp.Expression]: 3198 return self._parse_expressions() 3199 3200 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3201 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3202 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3203 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3204 ) 3205 elif self._match(TokenType.FROM): 3206 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3207 # Support parentheses for duckdb FROM-first syntax 3208 select = self._parse_select() 3209 if select: 3210 select.set("from", from_) 3211 this = select 3212 else: 3213 this = exp.select("*").from_(t.cast(exp.From, from_)) 3214 else: 3215 this = ( 3216 self._parse_table(consume_pipe=True) 3217 if table 3218 else self._parse_select(nested=True, parse_set_operation=False) 3219 ) 3220 3221 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3222 # in case a modifier (e.g. join) is following 3223 if table and isinstance(this, exp.Values) and this.alias: 3224 alias = this.args["alias"].pop() 3225 this = exp.Table(this=this, alias=alias) 3226 3227 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3228 3229 return this 3230 3231 def _parse_select( 3232 self, 3233 nested: bool = False, 3234 table: bool = False, 3235 parse_subquery_alias: bool = True, 3236 parse_set_operation: bool = True, 3237 consume_pipe: bool = True, 3238 ) -> t.Optional[exp.Expression]: 3239 query = self._parse_select_query( 3240 nested=nested, 3241 table=table, 3242 parse_subquery_alias=parse_subquery_alias, 3243 parse_set_operation=parse_set_operation, 3244 ) 3245 3246 if ( 3247 consume_pipe 3248 and self._match(TokenType.PIPE_GT, advance=False) 3249 and isinstance(query, exp.Query) 3250 ): 3251 query = self._parse_pipe_syntax_query(query) 3252 query = query.subquery(copy=False) if query and table else query 3253 3254 return query 3255 3256 def _parse_select_query( 3257 self, 3258 nested: bool = False, 3259 table: bool = False, 3260 parse_subquery_alias: bool = True, 3261 parse_set_operation: bool = True, 3262 ) -> t.Optional[exp.Expression]: 3263 cte = self._parse_with() 3264 3265 if cte: 3266 this = self._parse_statement() 3267 3268 if not this: 3269 self.raise_error("Failed to parse any statement following CTE") 3270 return cte 3271 3272 if "with" in this.arg_types: 3273 this.set("with", cte) 3274 else: 3275 self.raise_error(f"{this.key} does not support CTE") 3276 this = cte 3277 3278 return this 3279 3280 # duckdb supports leading with FROM x 3281 from_ = ( 3282 self._parse_from(consume_pipe=True) 3283 if self._match(TokenType.FROM, advance=False) 3284 else None 3285 ) 3286 3287 if self._match(TokenType.SELECT): 3288 comments = self._prev_comments 3289 3290 hint = self._parse_hint() 3291 3292 if self._next and not self._next.token_type == TokenType.DOT: 3293 all_ = self._match(TokenType.ALL) 3294 distinct = self._match_set(self.DISTINCT_TOKENS) 3295 else: 3296 all_, distinct = None, None 3297 3298 kind = ( 3299 self._match(TokenType.ALIAS) 3300 and self._match_texts(("STRUCT", "VALUE")) 3301 and self._prev.text.upper() 3302 ) 3303 3304 if distinct: 3305 distinct = self.expression( 3306 exp.Distinct, 3307 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3308 ) 3309 3310 if all_ and distinct: 3311 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3312 3313 operation_modifiers = [] 3314 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3315 operation_modifiers.append(exp.var(self._prev.text.upper())) 3316 3317 limit = self._parse_limit(top=True) 3318 projections = self._parse_projections() 3319 3320 this = self.expression( 3321 exp.Select, 3322 kind=kind, 3323 hint=hint, 3324 distinct=distinct, 3325 expressions=projections, 3326 limit=limit, 3327 operation_modifiers=operation_modifiers or None, 3328 ) 3329 this.comments = comments 3330 3331 into = self._parse_into() 3332 if into: 3333 this.set("into", into) 3334 3335 if not from_: 3336 from_ = self._parse_from() 3337 3338 if from_: 3339 this.set("from", from_) 3340 3341 this = self._parse_query_modifiers(this) 3342 elif (table or nested) and self._match(TokenType.L_PAREN): 3343 this = self._parse_wrapped_select(table=table) 3344 3345 # We return early here so that the UNION isn't attached to the subquery by the 3346 # following call to _parse_set_operations, but instead becomes the parent node 3347 self._match_r_paren() 3348 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3349 elif self._match(TokenType.VALUES, advance=False): 3350 this = self._parse_derived_table_values() 3351 elif from_: 3352 this = exp.select("*").from_(from_.this, copy=False) 3353 elif self._match(TokenType.SUMMARIZE): 3354 table = self._match(TokenType.TABLE) 3355 this = self._parse_select() or self._parse_string() or self._parse_table() 3356 return self.expression(exp.Summarize, this=this, table=table) 3357 elif self._match(TokenType.DESCRIBE): 3358 this = self._parse_describe() 3359 elif self._match_text_seq("STREAM"): 3360 this = self._parse_function() 3361 if this: 3362 this = self.expression(exp.Stream, this=this) 3363 else: 3364 self._retreat(self._index - 1) 3365 else: 3366 this = None 3367 3368 return self._parse_set_operations(this) if parse_set_operation else this 3369 3370 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3371 self._match_text_seq("SEARCH") 3372 3373 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3374 3375 if not kind: 3376 return None 3377 3378 self._match_text_seq("FIRST", "BY") 3379 3380 return self.expression( 3381 exp.RecursiveWithSearch, 3382 kind=kind, 3383 this=self._parse_id_var(), 3384 expression=self._match_text_seq("SET") and self._parse_id_var(), 3385 using=self._match_text_seq("USING") and self._parse_id_var(), 3386 ) 3387 3388 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3389 if not skip_with_token and not self._match(TokenType.WITH): 3390 return None 3391 3392 comments = self._prev_comments 3393 recursive = self._match(TokenType.RECURSIVE) 3394 3395 last_comments = None 3396 expressions = [] 3397 while True: 3398 cte = self._parse_cte() 3399 if isinstance(cte, exp.CTE): 3400 expressions.append(cte) 3401 if last_comments: 3402 cte.add_comments(last_comments) 3403 3404 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3405 break 3406 else: 3407 self._match(TokenType.WITH) 3408 3409 last_comments = self._prev_comments 3410 3411 return self.expression( 3412 exp.With, 3413 comments=comments, 3414 expressions=expressions, 3415 recursive=recursive, 3416 search=self._parse_recursive_with_search(), 3417 ) 3418 3419 def _parse_cte(self) -> t.Optional[exp.CTE]: 3420 index = self._index 3421 3422 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3423 if not alias or not alias.this: 3424 self.raise_error("Expected CTE to have alias") 3425 3426 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3427 self._retreat(index) 3428 return None 3429 3430 comments = self._prev_comments 3431 3432 if self._match_text_seq("NOT", "MATERIALIZED"): 3433 materialized = False 3434 elif self._match_text_seq("MATERIALIZED"): 3435 materialized = True 3436 else: 3437 materialized = None 3438 3439 cte = self.expression( 3440 exp.CTE, 3441 this=self._parse_wrapped(self._parse_statement), 3442 alias=alias, 3443 materialized=materialized, 3444 comments=comments, 3445 ) 3446 3447 values = cte.this 3448 if isinstance(values, exp.Values): 3449 if values.alias: 3450 cte.set("this", exp.select("*").from_(values)) 3451 else: 3452 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3453 3454 return cte 3455 3456 def _parse_table_alias( 3457 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3458 ) -> t.Optional[exp.TableAlias]: 3459 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3460 # so this section tries to parse the clause version and if it fails, it treats the token 3461 # as an identifier (alias) 3462 if self._can_parse_limit_or_offset(): 3463 return None 3464 3465 any_token = self._match(TokenType.ALIAS) 3466 alias = ( 3467 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3468 or self._parse_string_as_identifier() 3469 ) 3470 3471 index = self._index 3472 if self._match(TokenType.L_PAREN): 3473 columns = self._parse_csv(self._parse_function_parameter) 3474 self._match_r_paren() if columns else self._retreat(index) 3475 else: 3476 columns = None 3477 3478 if not alias and not columns: 3479 return None 3480 3481 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3482 3483 # We bubble up comments from the Identifier to the TableAlias 3484 if isinstance(alias, exp.Identifier): 3485 table_alias.add_comments(alias.pop_comments()) 3486 3487 return table_alias 3488 3489 def _parse_subquery( 3490 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3491 ) -> t.Optional[exp.Subquery]: 3492 if not this: 3493 return None 3494 3495 return self.expression( 3496 exp.Subquery, 3497 this=this, 3498 pivots=self._parse_pivots(), 3499 alias=self._parse_table_alias() if parse_alias else None, 3500 sample=self._parse_table_sample(), 3501 ) 3502 3503 def _implicit_unnests_to_explicit(self, this: E) -> E: 3504 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3505 3506 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3507 for i, join in enumerate(this.args.get("joins") or []): 3508 table = join.this 3509 normalized_table = table.copy() 3510 normalized_table.meta["maybe_column"] = True 3511 normalized_table = _norm(normalized_table, dialect=self.dialect) 3512 3513 if isinstance(table, exp.Table) and not join.args.get("on"): 3514 if normalized_table.parts[0].name in refs: 3515 table_as_column = table.to_column() 3516 unnest = exp.Unnest(expressions=[table_as_column]) 3517 3518 # Table.to_column creates a parent Alias node that we want to convert to 3519 # a TableAlias and attach to the Unnest, so it matches the parser's output 3520 if isinstance(table.args.get("alias"), exp.TableAlias): 3521 table_as_column.replace(table_as_column.this) 3522 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3523 3524 table.replace(unnest) 3525 3526 refs.add(normalized_table.alias_or_name) 3527 3528 return this 3529 3530 def _parse_query_modifiers( 3531 self, this: t.Optional[exp.Expression] 3532 ) -> t.Optional[exp.Expression]: 3533 if isinstance(this, self.MODIFIABLES): 3534 for join in self._parse_joins(): 3535 this.append("joins", join) 3536 for lateral in iter(self._parse_lateral, None): 3537 this.append("laterals", lateral) 3538 3539 while True: 3540 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3541 modifier_token = self._curr 3542 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3543 key, expression = parser(self) 3544 3545 if expression: 3546 if this.args.get(key): 3547 self.raise_error( 3548 f"Found multiple '{modifier_token.text.upper()}' clauses", 3549 token=modifier_token, 3550 ) 3551 3552 this.set(key, expression) 3553 if key == "limit": 3554 offset = expression.args.pop("offset", None) 3555 3556 if offset: 3557 offset = exp.Offset(expression=offset) 3558 this.set("offset", offset) 3559 3560 limit_by_expressions = expression.expressions 3561 expression.set("expressions", None) 3562 offset.set("expressions", limit_by_expressions) 3563 continue 3564 break 3565 3566 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3567 this = self._implicit_unnests_to_explicit(this) 3568 3569 return this 3570 3571 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3572 start = self._curr 3573 while self._curr: 3574 self._advance() 3575 3576 end = self._tokens[self._index - 1] 3577 return exp.Hint(expressions=[self._find_sql(start, end)]) 3578 3579 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3580 return self._parse_function_call() 3581 3582 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3583 start_index = self._index 3584 should_fallback_to_string = False 3585 3586 hints = [] 3587 try: 3588 for hint in iter( 3589 lambda: self._parse_csv( 3590 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3591 ), 3592 [], 3593 ): 3594 hints.extend(hint) 3595 except ParseError: 3596 should_fallback_to_string = True 3597 3598 if should_fallback_to_string or self._curr: 3599 self._retreat(start_index) 3600 return self._parse_hint_fallback_to_string() 3601 3602 return self.expression(exp.Hint, expressions=hints) 3603 3604 def _parse_hint(self) -> t.Optional[exp.Hint]: 3605 if self._match(TokenType.HINT) and self._prev_comments: 3606 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3607 3608 return None 3609 3610 def _parse_into(self) -> t.Optional[exp.Into]: 3611 if not self._match(TokenType.INTO): 3612 return None 3613 3614 temp = self._match(TokenType.TEMPORARY) 3615 unlogged = self._match_text_seq("UNLOGGED") 3616 self._match(TokenType.TABLE) 3617 3618 return self.expression( 3619 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3620 ) 3621 3622 def _parse_from( 3623 self, 3624 joins: bool = False, 3625 skip_from_token: bool = False, 3626 consume_pipe: bool = False, 3627 ) -> t.Optional[exp.From]: 3628 if not skip_from_token and not self._match(TokenType.FROM): 3629 return None 3630 3631 return self.expression( 3632 exp.From, 3633 comments=self._prev_comments, 3634 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3635 ) 3636 3637 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3638 return self.expression( 3639 exp.MatchRecognizeMeasure, 3640 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3641 this=self._parse_expression(), 3642 ) 3643 3644 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3645 if not self._match(TokenType.MATCH_RECOGNIZE): 3646 return None 3647 3648 self._match_l_paren() 3649 3650 partition = self._parse_partition_by() 3651 order = self._parse_order() 3652 3653 measures = ( 3654 self._parse_csv(self._parse_match_recognize_measure) 3655 if self._match_text_seq("MEASURES") 3656 else None 3657 ) 3658 3659 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3660 rows = exp.var("ONE ROW PER MATCH") 3661 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3662 text = "ALL ROWS PER MATCH" 3663 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3664 text += " SHOW EMPTY MATCHES" 3665 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3666 text += " OMIT EMPTY MATCHES" 3667 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3668 text += " WITH UNMATCHED ROWS" 3669 rows = exp.var(text) 3670 else: 3671 rows = None 3672 3673 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3674 text = "AFTER MATCH SKIP" 3675 if self._match_text_seq("PAST", "LAST", "ROW"): 3676 text += " PAST LAST ROW" 3677 elif self._match_text_seq("TO", "NEXT", "ROW"): 3678 text += " TO NEXT ROW" 3679 elif self._match_text_seq("TO", "FIRST"): 3680 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3681 elif self._match_text_seq("TO", "LAST"): 3682 text += f" TO LAST {self._advance_any().text}" # type: ignore 3683 after = exp.var(text) 3684 else: 3685 after = None 3686 3687 if self._match_text_seq("PATTERN"): 3688 self._match_l_paren() 3689 3690 if not self._curr: 3691 self.raise_error("Expecting )", self._curr) 3692 3693 paren = 1 3694 start = self._curr 3695 3696 while self._curr and paren > 0: 3697 if self._curr.token_type == TokenType.L_PAREN: 3698 paren += 1 3699 if self._curr.token_type == TokenType.R_PAREN: 3700 paren -= 1 3701 3702 end = self._prev 3703 self._advance() 3704 3705 if paren > 0: 3706 self.raise_error("Expecting )", self._curr) 3707 3708 pattern = exp.var(self._find_sql(start, end)) 3709 else: 3710 pattern = None 3711 3712 define = ( 3713 self._parse_csv(self._parse_name_as_expression) 3714 if self._match_text_seq("DEFINE") 3715 else None 3716 ) 3717 3718 self._match_r_paren() 3719 3720 return self.expression( 3721 exp.MatchRecognize, 3722 partition_by=partition, 3723 order=order, 3724 measures=measures, 3725 rows=rows, 3726 after=after, 3727 pattern=pattern, 3728 define=define, 3729 alias=self._parse_table_alias(), 3730 ) 3731 3732 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3733 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3734 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3735 cross_apply = False 3736 3737 if cross_apply is not None: 3738 this = self._parse_select(table=True) 3739 view = None 3740 outer = None 3741 elif self._match(TokenType.LATERAL): 3742 this = self._parse_select(table=True) 3743 view = self._match(TokenType.VIEW) 3744 outer = self._match(TokenType.OUTER) 3745 else: 3746 return None 3747 3748 if not this: 3749 this = ( 3750 self._parse_unnest() 3751 or self._parse_function() 3752 or self._parse_id_var(any_token=False) 3753 ) 3754 3755 while self._match(TokenType.DOT): 3756 this = exp.Dot( 3757 this=this, 3758 expression=self._parse_function() or self._parse_id_var(any_token=False), 3759 ) 3760 3761 ordinality: t.Optional[bool] = None 3762 3763 if view: 3764 table = self._parse_id_var(any_token=False) 3765 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3766 table_alias: t.Optional[exp.TableAlias] = self.expression( 3767 exp.TableAlias, this=table, columns=columns 3768 ) 3769 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3770 # We move the alias from the lateral's child node to the lateral itself 3771 table_alias = this.args["alias"].pop() 3772 else: 3773 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3774 table_alias = self._parse_table_alias() 3775 3776 return self.expression( 3777 exp.Lateral, 3778 this=this, 3779 view=view, 3780 outer=outer, 3781 alias=table_alias, 3782 cross_apply=cross_apply, 3783 ordinality=ordinality, 3784 ) 3785 3786 def _parse_join_parts( 3787 self, 3788 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3789 return ( 3790 self._match_set(self.JOIN_METHODS) and self._prev, 3791 self._match_set(self.JOIN_SIDES) and self._prev, 3792 self._match_set(self.JOIN_KINDS) and self._prev, 3793 ) 3794 3795 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3796 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3797 this = self._parse_column() 3798 if isinstance(this, exp.Column): 3799 return this.this 3800 return this 3801 3802 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3803 3804 def _parse_join( 3805 self, skip_join_token: bool = False, parse_bracket: bool = False 3806 ) -> t.Optional[exp.Join]: 3807 if self._match(TokenType.COMMA): 3808 table = self._try_parse(self._parse_table) 3809 cross_join = self.expression(exp.Join, this=table) if table else None 3810 3811 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3812 cross_join.set("kind", "CROSS") 3813 3814 return cross_join 3815 3816 index = self._index 3817 method, side, kind = self._parse_join_parts() 3818 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3819 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3820 join_comments = self._prev_comments 3821 3822 if not skip_join_token and not join: 3823 self._retreat(index) 3824 kind = None 3825 method = None 3826 side = None 3827 3828 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3829 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3830 3831 if not skip_join_token and not join and not outer_apply and not cross_apply: 3832 return None 3833 3834 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3835 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3836 kwargs["expressions"] = self._parse_csv( 3837 lambda: self._parse_table(parse_bracket=parse_bracket) 3838 ) 3839 3840 if method: 3841 kwargs["method"] = method.text 3842 if side: 3843 kwargs["side"] = side.text 3844 if kind: 3845 kwargs["kind"] = kind.text 3846 if hint: 3847 kwargs["hint"] = hint 3848 3849 if self._match(TokenType.MATCH_CONDITION): 3850 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3851 3852 if self._match(TokenType.ON): 3853 kwargs["on"] = self._parse_assignment() 3854 elif self._match(TokenType.USING): 3855 kwargs["using"] = self._parse_using_identifiers() 3856 elif ( 3857 not method 3858 and not (outer_apply or cross_apply) 3859 and not isinstance(kwargs["this"], exp.Unnest) 3860 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3861 ): 3862 index = self._index 3863 joins: t.Optional[list] = list(self._parse_joins()) 3864 3865 if joins and self._match(TokenType.ON): 3866 kwargs["on"] = self._parse_assignment() 3867 elif joins and self._match(TokenType.USING): 3868 kwargs["using"] = self._parse_using_identifiers() 3869 else: 3870 joins = None 3871 self._retreat(index) 3872 3873 kwargs["this"].set("joins", joins if joins else None) 3874 3875 kwargs["pivots"] = self._parse_pivots() 3876 3877 comments = [c for token in (method, side, kind) if token for c in token.comments] 3878 comments = (join_comments or []) + comments 3879 return self.expression(exp.Join, comments=comments, **kwargs) 3880 3881 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3882 this = self._parse_assignment() 3883 3884 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3885 return this 3886 3887 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3888 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3889 3890 return this 3891 3892 def _parse_index_params(self) -> exp.IndexParameters: 3893 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3894 3895 if self._match(TokenType.L_PAREN, advance=False): 3896 columns = self._parse_wrapped_csv(self._parse_with_operator) 3897 else: 3898 columns = None 3899 3900 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3901 partition_by = self._parse_partition_by() 3902 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3903 tablespace = ( 3904 self._parse_var(any_token=True) 3905 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3906 else None 3907 ) 3908 where = self._parse_where() 3909 3910 on = self._parse_field() if self._match(TokenType.ON) else None 3911 3912 return self.expression( 3913 exp.IndexParameters, 3914 using=using, 3915 columns=columns, 3916 include=include, 3917 partition_by=partition_by, 3918 where=where, 3919 with_storage=with_storage, 3920 tablespace=tablespace, 3921 on=on, 3922 ) 3923 3924 def _parse_index( 3925 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3926 ) -> t.Optional[exp.Index]: 3927 if index or anonymous: 3928 unique = None 3929 primary = None 3930 amp = None 3931 3932 self._match(TokenType.ON) 3933 self._match(TokenType.TABLE) # hive 3934 table = self._parse_table_parts(schema=True) 3935 else: 3936 unique = self._match(TokenType.UNIQUE) 3937 primary = self._match_text_seq("PRIMARY") 3938 amp = self._match_text_seq("AMP") 3939 3940 if not self._match(TokenType.INDEX): 3941 return None 3942 3943 index = self._parse_id_var() 3944 table = None 3945 3946 params = self._parse_index_params() 3947 3948 return self.expression( 3949 exp.Index, 3950 this=index, 3951 table=table, 3952 unique=unique, 3953 primary=primary, 3954 amp=amp, 3955 params=params, 3956 ) 3957 3958 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3959 hints: t.List[exp.Expression] = [] 3960 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3961 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3962 hints.append( 3963 self.expression( 3964 exp.WithTableHint, 3965 expressions=self._parse_csv( 3966 lambda: self._parse_function() or self._parse_var(any_token=True) 3967 ), 3968 ) 3969 ) 3970 self._match_r_paren() 3971 else: 3972 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3973 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3974 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3975 3976 self._match_set((TokenType.INDEX, TokenType.KEY)) 3977 if self._match(TokenType.FOR): 3978 hint.set("target", self._advance_any() and self._prev.text.upper()) 3979 3980 hint.set("expressions", self._parse_wrapped_id_vars()) 3981 hints.append(hint) 3982 3983 return hints or None 3984 3985 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3986 return ( 3987 (not schema and self._parse_function(optional_parens=False)) 3988 or self._parse_id_var(any_token=False) 3989 or self._parse_string_as_identifier() 3990 or self._parse_placeholder() 3991 ) 3992 3993 def _parse_table_parts( 3994 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3995 ) -> exp.Table: 3996 catalog = None 3997 db = None 3998 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3999 4000 while self._match(TokenType.DOT): 4001 if catalog: 4002 # This allows nesting the table in arbitrarily many dot expressions if needed 4003 table = self.expression( 4004 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4005 ) 4006 else: 4007 catalog = db 4008 db = table 4009 # "" used for tsql FROM a..b case 4010 table = self._parse_table_part(schema=schema) or "" 4011 4012 if ( 4013 wildcard 4014 and self._is_connected() 4015 and (isinstance(table, exp.Identifier) or not table) 4016 and self._match(TokenType.STAR) 4017 ): 4018 if isinstance(table, exp.Identifier): 4019 table.args["this"] += "*" 4020 else: 4021 table = exp.Identifier(this="*") 4022 4023 # We bubble up comments from the Identifier to the Table 4024 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4025 4026 if is_db_reference: 4027 catalog = db 4028 db = table 4029 table = None 4030 4031 if not table and not is_db_reference: 4032 self.raise_error(f"Expected table name but got {self._curr}") 4033 if not db and is_db_reference: 4034 self.raise_error(f"Expected database name but got {self._curr}") 4035 4036 table = self.expression( 4037 exp.Table, 4038 comments=comments, 4039 this=table, 4040 db=db, 4041 catalog=catalog, 4042 ) 4043 4044 changes = self._parse_changes() 4045 if changes: 4046 table.set("changes", changes) 4047 4048 at_before = self._parse_historical_data() 4049 if at_before: 4050 table.set("when", at_before) 4051 4052 pivots = self._parse_pivots() 4053 if pivots: 4054 table.set("pivots", pivots) 4055 4056 return table 4057 4058 def _parse_table( 4059 self, 4060 schema: bool = False, 4061 joins: bool = False, 4062 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4063 parse_bracket: bool = False, 4064 is_db_reference: bool = False, 4065 parse_partition: bool = False, 4066 consume_pipe: bool = False, 4067 ) -> t.Optional[exp.Expression]: 4068 lateral = self._parse_lateral() 4069 if lateral: 4070 return lateral 4071 4072 unnest = self._parse_unnest() 4073 if unnest: 4074 return unnest 4075 4076 values = self._parse_derived_table_values() 4077 if values: 4078 return values 4079 4080 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4081 if subquery: 4082 if not subquery.args.get("pivots"): 4083 subquery.set("pivots", self._parse_pivots()) 4084 return subquery 4085 4086 bracket = parse_bracket and self._parse_bracket(None) 4087 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4088 4089 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4090 self._parse_table 4091 ) 4092 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4093 4094 only = self._match(TokenType.ONLY) 4095 4096 this = t.cast( 4097 exp.Expression, 4098 bracket 4099 or rows_from 4100 or self._parse_bracket( 4101 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4102 ), 4103 ) 4104 4105 if only: 4106 this.set("only", only) 4107 4108 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4109 self._match_text_seq("*") 4110 4111 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4112 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4113 this.set("partition", self._parse_partition()) 4114 4115 if schema: 4116 return self._parse_schema(this=this) 4117 4118 version = self._parse_version() 4119 4120 if version: 4121 this.set("version", version) 4122 4123 if self.dialect.ALIAS_POST_TABLESAMPLE: 4124 this.set("sample", self._parse_table_sample()) 4125 4126 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4127 if alias: 4128 this.set("alias", alias) 4129 4130 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4131 return self.expression( 4132 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4133 ) 4134 4135 this.set("hints", self._parse_table_hints()) 4136 4137 if not this.args.get("pivots"): 4138 this.set("pivots", self._parse_pivots()) 4139 4140 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4141 this.set("sample", self._parse_table_sample()) 4142 4143 if joins: 4144 for join in self._parse_joins(): 4145 this.append("joins", join) 4146 4147 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4148 this.set("ordinality", True) 4149 this.set("alias", self._parse_table_alias()) 4150 4151 return this 4152 4153 def _parse_version(self) -> t.Optional[exp.Version]: 4154 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4155 this = "TIMESTAMP" 4156 elif self._match(TokenType.VERSION_SNAPSHOT): 4157 this = "VERSION" 4158 else: 4159 return None 4160 4161 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4162 kind = self._prev.text.upper() 4163 start = self._parse_bitwise() 4164 self._match_texts(("TO", "AND")) 4165 end = self._parse_bitwise() 4166 expression: t.Optional[exp.Expression] = self.expression( 4167 exp.Tuple, expressions=[start, end] 4168 ) 4169 elif self._match_text_seq("CONTAINED", "IN"): 4170 kind = "CONTAINED IN" 4171 expression = self.expression( 4172 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4173 ) 4174 elif self._match(TokenType.ALL): 4175 kind = "ALL" 4176 expression = None 4177 else: 4178 self._match_text_seq("AS", "OF") 4179 kind = "AS OF" 4180 expression = self._parse_type() 4181 4182 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4183 4184 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4185 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4186 index = self._index 4187 historical_data = None 4188 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4189 this = self._prev.text.upper() 4190 kind = ( 4191 self._match(TokenType.L_PAREN) 4192 and self._match_texts(self.HISTORICAL_DATA_KIND) 4193 and self._prev.text.upper() 4194 ) 4195 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4196 4197 if expression: 4198 self._match_r_paren() 4199 historical_data = self.expression( 4200 exp.HistoricalData, this=this, kind=kind, expression=expression 4201 ) 4202 else: 4203 self._retreat(index) 4204 4205 return historical_data 4206 4207 def _parse_changes(self) -> t.Optional[exp.Changes]: 4208 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4209 return None 4210 4211 information = self._parse_var(any_token=True) 4212 self._match_r_paren() 4213 4214 return self.expression( 4215 exp.Changes, 4216 information=information, 4217 at_before=self._parse_historical_data(), 4218 end=self._parse_historical_data(), 4219 ) 4220 4221 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4222 if not self._match(TokenType.UNNEST): 4223 return None 4224 4225 expressions = self._parse_wrapped_csv(self._parse_equality) 4226 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4227 4228 alias = self._parse_table_alias() if with_alias else None 4229 4230 if alias: 4231 if self.dialect.UNNEST_COLUMN_ONLY: 4232 if alias.args.get("columns"): 4233 self.raise_error("Unexpected extra column alias in unnest.") 4234 4235 alias.set("columns", [alias.this]) 4236 alias.set("this", None) 4237 4238 columns = alias.args.get("columns") or [] 4239 if offset and len(expressions) < len(columns): 4240 offset = columns.pop() 4241 4242 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4243 self._match(TokenType.ALIAS) 4244 offset = self._parse_id_var( 4245 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4246 ) or exp.to_identifier("offset") 4247 4248 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4249 4250 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4251 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4252 if not is_derived and not ( 4253 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4254 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4255 ): 4256 return None 4257 4258 expressions = self._parse_csv(self._parse_value) 4259 alias = self._parse_table_alias() 4260 4261 if is_derived: 4262 self._match_r_paren() 4263 4264 return self.expression( 4265 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4266 ) 4267 4268 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4269 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4270 as_modifier and self._match_text_seq("USING", "SAMPLE") 4271 ): 4272 return None 4273 4274 bucket_numerator = None 4275 bucket_denominator = None 4276 bucket_field = None 4277 percent = None 4278 size = None 4279 seed = None 4280 4281 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4282 matched_l_paren = self._match(TokenType.L_PAREN) 4283 4284 if self.TABLESAMPLE_CSV: 4285 num = None 4286 expressions = self._parse_csv(self._parse_primary) 4287 else: 4288 expressions = None 4289 num = ( 4290 self._parse_factor() 4291 if self._match(TokenType.NUMBER, advance=False) 4292 else self._parse_primary() or self._parse_placeholder() 4293 ) 4294 4295 if self._match_text_seq("BUCKET"): 4296 bucket_numerator = self._parse_number() 4297 self._match_text_seq("OUT", "OF") 4298 bucket_denominator = bucket_denominator = self._parse_number() 4299 self._match(TokenType.ON) 4300 bucket_field = self._parse_field() 4301 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4302 percent = num 4303 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4304 size = num 4305 else: 4306 percent = num 4307 4308 if matched_l_paren: 4309 self._match_r_paren() 4310 4311 if self._match(TokenType.L_PAREN): 4312 method = self._parse_var(upper=True) 4313 seed = self._match(TokenType.COMMA) and self._parse_number() 4314 self._match_r_paren() 4315 elif self._match_texts(("SEED", "REPEATABLE")): 4316 seed = self._parse_wrapped(self._parse_number) 4317 4318 if not method and self.DEFAULT_SAMPLING_METHOD: 4319 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4320 4321 return self.expression( 4322 exp.TableSample, 4323 expressions=expressions, 4324 method=method, 4325 bucket_numerator=bucket_numerator, 4326 bucket_denominator=bucket_denominator, 4327 bucket_field=bucket_field, 4328 percent=percent, 4329 size=size, 4330 seed=seed, 4331 ) 4332 4333 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4334 return list(iter(self._parse_pivot, None)) or None 4335 4336 def _parse_joins(self) -> t.Iterator[exp.Join]: 4337 return iter(self._parse_join, None) 4338 4339 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4340 if not self._match(TokenType.INTO): 4341 return None 4342 4343 return self.expression( 4344 exp.UnpivotColumns, 4345 this=self._match_text_seq("NAME") and self._parse_column(), 4346 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4347 ) 4348 4349 # https://duckdb.org/docs/sql/statements/pivot 4350 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4351 def _parse_on() -> t.Optional[exp.Expression]: 4352 this = self._parse_bitwise() 4353 4354 if self._match(TokenType.IN): 4355 # PIVOT ... ON col IN (row_val1, row_val2) 4356 return self._parse_in(this) 4357 if self._match(TokenType.ALIAS, advance=False): 4358 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4359 return self._parse_alias(this) 4360 4361 return this 4362 4363 this = self._parse_table() 4364 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4365 into = self._parse_unpivot_columns() 4366 using = self._match(TokenType.USING) and self._parse_csv( 4367 lambda: self._parse_alias(self._parse_function()) 4368 ) 4369 group = self._parse_group() 4370 4371 return self.expression( 4372 exp.Pivot, 4373 this=this, 4374 expressions=expressions, 4375 using=using, 4376 group=group, 4377 unpivot=is_unpivot, 4378 into=into, 4379 ) 4380 4381 def _parse_pivot_in(self) -> exp.In: 4382 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4383 this = self._parse_select_or_expression() 4384 4385 self._match(TokenType.ALIAS) 4386 alias = self._parse_bitwise() 4387 if alias: 4388 if isinstance(alias, exp.Column) and not alias.db: 4389 alias = alias.this 4390 return self.expression(exp.PivotAlias, this=this, alias=alias) 4391 4392 return this 4393 4394 value = self._parse_column() 4395 4396 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4397 self.raise_error("Expecting IN (") 4398 4399 if self._match(TokenType.ANY): 4400 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4401 else: 4402 exprs = self._parse_csv(_parse_aliased_expression) 4403 4404 self._match_r_paren() 4405 return self.expression(exp.In, this=value, expressions=exprs) 4406 4407 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4408 func = self._parse_function() 4409 if not func: 4410 self.raise_error("Expecting an aggregation function in PIVOT") 4411 4412 return self._parse_alias(func) 4413 4414 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4415 index = self._index 4416 include_nulls = None 4417 4418 if self._match(TokenType.PIVOT): 4419 unpivot = False 4420 elif self._match(TokenType.UNPIVOT): 4421 unpivot = True 4422 4423 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4424 if self._match_text_seq("INCLUDE", "NULLS"): 4425 include_nulls = True 4426 elif self._match_text_seq("EXCLUDE", "NULLS"): 4427 include_nulls = False 4428 else: 4429 return None 4430 4431 expressions = [] 4432 4433 if not self._match(TokenType.L_PAREN): 4434 self._retreat(index) 4435 return None 4436 4437 if unpivot: 4438 expressions = self._parse_csv(self._parse_column) 4439 else: 4440 expressions = self._parse_csv(self._parse_pivot_aggregation) 4441 4442 if not expressions: 4443 self.raise_error("Failed to parse PIVOT's aggregation list") 4444 4445 if not self._match(TokenType.FOR): 4446 self.raise_error("Expecting FOR") 4447 4448 fields = [] 4449 while True: 4450 field = self._try_parse(self._parse_pivot_in) 4451 if not field: 4452 break 4453 fields.append(field) 4454 4455 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4456 self._parse_bitwise 4457 ) 4458 4459 group = self._parse_group() 4460 4461 self._match_r_paren() 4462 4463 pivot = self.expression( 4464 exp.Pivot, 4465 expressions=expressions, 4466 fields=fields, 4467 unpivot=unpivot, 4468 include_nulls=include_nulls, 4469 default_on_null=default_on_null, 4470 group=group, 4471 ) 4472 4473 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4474 pivot.set("alias", self._parse_table_alias()) 4475 4476 if not unpivot: 4477 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4478 4479 columns: t.List[exp.Expression] = [] 4480 all_fields = [] 4481 for pivot_field in pivot.fields: 4482 pivot_field_expressions = pivot_field.expressions 4483 4484 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4485 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4486 continue 4487 4488 all_fields.append( 4489 [ 4490 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4491 for fld in pivot_field_expressions 4492 ] 4493 ) 4494 4495 if all_fields: 4496 if names: 4497 all_fields.append(names) 4498 4499 # Generate all possible combinations of the pivot columns 4500 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4501 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4502 for fld_parts_tuple in itertools.product(*all_fields): 4503 fld_parts = list(fld_parts_tuple) 4504 4505 if names and self.PREFIXED_PIVOT_COLUMNS: 4506 # Move the "name" to the front of the list 4507 fld_parts.insert(0, fld_parts.pop(-1)) 4508 4509 columns.append(exp.to_identifier("_".join(fld_parts))) 4510 4511 pivot.set("columns", columns) 4512 4513 return pivot 4514 4515 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4516 return [agg.alias for agg in aggregations if agg.alias] 4517 4518 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4519 if not skip_where_token and not self._match(TokenType.PREWHERE): 4520 return None 4521 4522 return self.expression( 4523 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4524 ) 4525 4526 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4527 if not skip_where_token and not self._match(TokenType.WHERE): 4528 return None 4529 4530 return self.expression( 4531 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4532 ) 4533 4534 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4535 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4536 return None 4537 comments = self._prev_comments 4538 4539 elements: t.Dict[str, t.Any] = defaultdict(list) 4540 4541 if self._match(TokenType.ALL): 4542 elements["all"] = True 4543 elif self._match(TokenType.DISTINCT): 4544 elements["all"] = False 4545 4546 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4547 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4548 4549 while True: 4550 index = self._index 4551 4552 elements["expressions"].extend( 4553 self._parse_csv( 4554 lambda: None 4555 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4556 else self._parse_assignment() 4557 ) 4558 ) 4559 4560 before_with_index = self._index 4561 with_prefix = self._match(TokenType.WITH) 4562 4563 if self._match(TokenType.ROLLUP): 4564 elements["rollup"].append( 4565 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4566 ) 4567 elif self._match(TokenType.CUBE): 4568 elements["cube"].append( 4569 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4570 ) 4571 elif self._match(TokenType.GROUPING_SETS): 4572 elements["grouping_sets"].append( 4573 self.expression( 4574 exp.GroupingSets, 4575 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4576 ) 4577 ) 4578 elif self._match_text_seq("TOTALS"): 4579 elements["totals"] = True # type: ignore 4580 4581 if before_with_index <= self._index <= before_with_index + 1: 4582 self._retreat(before_with_index) 4583 break 4584 4585 if index == self._index: 4586 break 4587 4588 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4589 4590 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4591 return self.expression( 4592 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4593 ) 4594 4595 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4596 if self._match(TokenType.L_PAREN): 4597 grouping_set = self._parse_csv(self._parse_column) 4598 self._match_r_paren() 4599 return self.expression(exp.Tuple, expressions=grouping_set) 4600 4601 return self._parse_column() 4602 4603 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4604 if not skip_having_token and not self._match(TokenType.HAVING): 4605 return None 4606 return self.expression( 4607 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4608 ) 4609 4610 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4611 if not self._match(TokenType.QUALIFY): 4612 return None 4613 return self.expression(exp.Qualify, this=self._parse_assignment()) 4614 4615 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4616 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4617 exp.Prior, this=self._parse_bitwise() 4618 ) 4619 connect = self._parse_assignment() 4620 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4621 return connect 4622 4623 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4624 if skip_start_token: 4625 start = None 4626 elif self._match(TokenType.START_WITH): 4627 start = self._parse_assignment() 4628 else: 4629 return None 4630 4631 self._match(TokenType.CONNECT_BY) 4632 nocycle = self._match_text_seq("NOCYCLE") 4633 connect = self._parse_connect_with_prior() 4634 4635 if not start and self._match(TokenType.START_WITH): 4636 start = self._parse_assignment() 4637 4638 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4639 4640 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4641 this = self._parse_id_var(any_token=True) 4642 if self._match(TokenType.ALIAS): 4643 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4644 return this 4645 4646 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4647 if self._match_text_seq("INTERPOLATE"): 4648 return self._parse_wrapped_csv(self._parse_name_as_expression) 4649 return None 4650 4651 def _parse_order( 4652 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4653 ) -> t.Optional[exp.Expression]: 4654 siblings = None 4655 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4656 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4657 return this 4658 4659 siblings = True 4660 4661 return self.expression( 4662 exp.Order, 4663 comments=self._prev_comments, 4664 this=this, 4665 expressions=self._parse_csv(self._parse_ordered), 4666 siblings=siblings, 4667 ) 4668 4669 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4670 if not self._match(token): 4671 return None 4672 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4673 4674 def _parse_ordered( 4675 self, parse_method: t.Optional[t.Callable] = None 4676 ) -> t.Optional[exp.Ordered]: 4677 this = parse_method() if parse_method else self._parse_assignment() 4678 if not this: 4679 return None 4680 4681 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4682 this = exp.var("ALL") 4683 4684 asc = self._match(TokenType.ASC) 4685 desc = self._match(TokenType.DESC) or (asc and False) 4686 4687 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4688 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4689 4690 nulls_first = is_nulls_first or False 4691 explicitly_null_ordered = is_nulls_first or is_nulls_last 4692 4693 if ( 4694 not explicitly_null_ordered 4695 and ( 4696 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4697 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4698 ) 4699 and self.dialect.NULL_ORDERING != "nulls_are_last" 4700 ): 4701 nulls_first = True 4702 4703 if self._match_text_seq("WITH", "FILL"): 4704 with_fill = self.expression( 4705 exp.WithFill, 4706 **{ # type: ignore 4707 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4708 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4709 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4710 "interpolate": self._parse_interpolate(), 4711 }, 4712 ) 4713 else: 4714 with_fill = None 4715 4716 return self.expression( 4717 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4718 ) 4719 4720 def _parse_limit_options(self) -> exp.LimitOptions: 4721 percent = self._match(TokenType.PERCENT) 4722 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4723 self._match_text_seq("ONLY") 4724 with_ties = self._match_text_seq("WITH", "TIES") 4725 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4726 4727 def _parse_limit( 4728 self, 4729 this: t.Optional[exp.Expression] = None, 4730 top: bool = False, 4731 skip_limit_token: bool = False, 4732 ) -> t.Optional[exp.Expression]: 4733 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4734 comments = self._prev_comments 4735 if top: 4736 limit_paren = self._match(TokenType.L_PAREN) 4737 expression = self._parse_term() if limit_paren else self._parse_number() 4738 4739 if limit_paren: 4740 self._match_r_paren() 4741 4742 limit_options = self._parse_limit_options() 4743 else: 4744 limit_options = None 4745 expression = self._parse_term() 4746 4747 if self._match(TokenType.COMMA): 4748 offset = expression 4749 expression = self._parse_term() 4750 else: 4751 offset = None 4752 4753 limit_exp = self.expression( 4754 exp.Limit, 4755 this=this, 4756 expression=expression, 4757 offset=offset, 4758 comments=comments, 4759 limit_options=limit_options, 4760 expressions=self._parse_limit_by(), 4761 ) 4762 4763 return limit_exp 4764 4765 if self._match(TokenType.FETCH): 4766 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4767 direction = self._prev.text.upper() if direction else "FIRST" 4768 4769 count = self._parse_field(tokens=self.FETCH_TOKENS) 4770 4771 return self.expression( 4772 exp.Fetch, 4773 direction=direction, 4774 count=count, 4775 limit_options=self._parse_limit_options(), 4776 ) 4777 4778 return this 4779 4780 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4781 if not self._match(TokenType.OFFSET): 4782 return this 4783 4784 count = self._parse_term() 4785 self._match_set((TokenType.ROW, TokenType.ROWS)) 4786 4787 return self.expression( 4788 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4789 ) 4790 4791 def _can_parse_limit_or_offset(self) -> bool: 4792 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4793 return False 4794 4795 index = self._index 4796 result = bool( 4797 self._try_parse(self._parse_limit, retreat=True) 4798 or self._try_parse(self._parse_offset, retreat=True) 4799 ) 4800 self._retreat(index) 4801 return result 4802 4803 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4804 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4805 4806 def _parse_locks(self) -> t.List[exp.Lock]: 4807 locks = [] 4808 while True: 4809 update, key = None, None 4810 if self._match_text_seq("FOR", "UPDATE"): 4811 update = True 4812 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4813 "LOCK", "IN", "SHARE", "MODE" 4814 ): 4815 update = False 4816 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4817 update, key = False, True 4818 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4819 update, key = True, True 4820 else: 4821 break 4822 4823 expressions = None 4824 if self._match_text_seq("OF"): 4825 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4826 4827 wait: t.Optional[bool | exp.Expression] = None 4828 if self._match_text_seq("NOWAIT"): 4829 wait = True 4830 elif self._match_text_seq("WAIT"): 4831 wait = self._parse_primary() 4832 elif self._match_text_seq("SKIP", "LOCKED"): 4833 wait = False 4834 4835 locks.append( 4836 self.expression( 4837 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4838 ) 4839 ) 4840 4841 return locks 4842 4843 def parse_set_operation( 4844 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4845 ) -> t.Optional[exp.Expression]: 4846 start = self._index 4847 _, side_token, kind_token = self._parse_join_parts() 4848 4849 side = side_token.text if side_token else None 4850 kind = kind_token.text if kind_token else None 4851 4852 if not self._match_set(self.SET_OPERATIONS): 4853 self._retreat(start) 4854 return None 4855 4856 token_type = self._prev.token_type 4857 4858 if token_type == TokenType.UNION: 4859 operation: t.Type[exp.SetOperation] = exp.Union 4860 elif token_type == TokenType.EXCEPT: 4861 operation = exp.Except 4862 else: 4863 operation = exp.Intersect 4864 4865 comments = self._prev.comments 4866 4867 if self._match(TokenType.DISTINCT): 4868 distinct: t.Optional[bool] = True 4869 elif self._match(TokenType.ALL): 4870 distinct = False 4871 else: 4872 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4873 if distinct is None: 4874 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4875 4876 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4877 "STRICT", "CORRESPONDING" 4878 ) 4879 if self._match_text_seq("CORRESPONDING"): 4880 by_name = True 4881 if not side and not kind: 4882 kind = "INNER" 4883 4884 on_column_list = None 4885 if by_name and self._match_texts(("ON", "BY")): 4886 on_column_list = self._parse_wrapped_csv(self._parse_column) 4887 4888 expression = self._parse_select( 4889 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4890 ) 4891 4892 return self.expression( 4893 operation, 4894 comments=comments, 4895 this=this, 4896 distinct=distinct, 4897 by_name=by_name, 4898 expression=expression, 4899 side=side, 4900 kind=kind, 4901 on=on_column_list, 4902 ) 4903 4904 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4905 while this: 4906 setop = self.parse_set_operation(this) 4907 if not setop: 4908 break 4909 this = setop 4910 4911 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4912 expression = this.expression 4913 4914 if expression: 4915 for arg in self.SET_OP_MODIFIERS: 4916 expr = expression.args.get(arg) 4917 if expr: 4918 this.set(arg, expr.pop()) 4919 4920 return this 4921 4922 def _parse_expression(self) -> t.Optional[exp.Expression]: 4923 return self._parse_alias(self._parse_assignment()) 4924 4925 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4926 this = self._parse_disjunction() 4927 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4928 # This allows us to parse <non-identifier token> := <expr> 4929 this = exp.column( 4930 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4931 ) 4932 4933 while self._match_set(self.ASSIGNMENT): 4934 if isinstance(this, exp.Column) and len(this.parts) == 1: 4935 this = this.this 4936 4937 this = self.expression( 4938 self.ASSIGNMENT[self._prev.token_type], 4939 this=this, 4940 comments=self._prev_comments, 4941 expression=self._parse_assignment(), 4942 ) 4943 4944 return this 4945 4946 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4947 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4948 4949 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4950 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4951 4952 def _parse_equality(self) -> t.Optional[exp.Expression]: 4953 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4954 4955 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4956 return self._parse_tokens(self._parse_range, self.COMPARISON) 4957 4958 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4959 this = this or self._parse_bitwise() 4960 negate = self._match(TokenType.NOT) 4961 4962 if self._match_set(self.RANGE_PARSERS): 4963 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4964 if not expression: 4965 return this 4966 4967 this = expression 4968 elif self._match(TokenType.ISNULL): 4969 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4970 4971 # Postgres supports ISNULL and NOTNULL for conditions. 4972 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4973 if self._match(TokenType.NOTNULL): 4974 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4975 this = self.expression(exp.Not, this=this) 4976 4977 if negate: 4978 this = self._negate_range(this) 4979 4980 if self._match(TokenType.IS): 4981 this = self._parse_is(this) 4982 4983 return this 4984 4985 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4986 if not this: 4987 return this 4988 4989 return self.expression(exp.Not, this=this) 4990 4991 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4992 index = self._index - 1 4993 negate = self._match(TokenType.NOT) 4994 4995 if self._match_text_seq("DISTINCT", "FROM"): 4996 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4997 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4998 4999 if self._match(TokenType.JSON): 5000 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5001 5002 if self._match_text_seq("WITH"): 5003 _with = True 5004 elif self._match_text_seq("WITHOUT"): 5005 _with = False 5006 else: 5007 _with = None 5008 5009 unique = self._match(TokenType.UNIQUE) 5010 self._match_text_seq("KEYS") 5011 expression: t.Optional[exp.Expression] = self.expression( 5012 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5013 ) 5014 else: 5015 expression = self._parse_primary() or self._parse_null() 5016 if not expression: 5017 self._retreat(index) 5018 return None 5019 5020 this = self.expression(exp.Is, this=this, expression=expression) 5021 return self.expression(exp.Not, this=this) if negate else this 5022 5023 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5024 unnest = self._parse_unnest(with_alias=False) 5025 if unnest: 5026 this = self.expression(exp.In, this=this, unnest=unnest) 5027 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5028 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5029 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5030 5031 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 5032 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 5033 else: 5034 this = self.expression(exp.In, this=this, expressions=expressions) 5035 5036 if matched_l_paren: 5037 self._match_r_paren(this) 5038 elif not self._match(TokenType.R_BRACKET, expression=this): 5039 self.raise_error("Expecting ]") 5040 else: 5041 this = self.expression(exp.In, this=this, field=self._parse_column()) 5042 5043 return this 5044 5045 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5046 symmetric = None 5047 if self._match_text_seq("SYMMETRIC"): 5048 symmetric = True 5049 elif self._match_text_seq("ASYMMETRIC"): 5050 symmetric = False 5051 5052 low = self._parse_bitwise() 5053 self._match(TokenType.AND) 5054 high = self._parse_bitwise() 5055 5056 return self.expression( 5057 exp.Between, 5058 this=this, 5059 low=low, 5060 high=high, 5061 symmetric=symmetric, 5062 ) 5063 5064 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5065 if not self._match(TokenType.ESCAPE): 5066 return this 5067 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5068 5069 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5070 index = self._index 5071 5072 if not self._match(TokenType.INTERVAL) and match_interval: 5073 return None 5074 5075 if self._match(TokenType.STRING, advance=False): 5076 this = self._parse_primary() 5077 else: 5078 this = self._parse_term() 5079 5080 if not this or ( 5081 isinstance(this, exp.Column) 5082 and not this.table 5083 and not this.this.quoted 5084 and this.name.upper() == "IS" 5085 ): 5086 self._retreat(index) 5087 return None 5088 5089 unit = self._parse_function() or ( 5090 not self._match(TokenType.ALIAS, advance=False) 5091 and self._parse_var(any_token=True, upper=True) 5092 ) 5093 5094 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5095 # each INTERVAL expression into this canonical form so it's easy to transpile 5096 if this and this.is_number: 5097 this = exp.Literal.string(this.to_py()) 5098 elif this and this.is_string: 5099 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5100 if parts and unit: 5101 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5102 unit = None 5103 self._retreat(self._index - 1) 5104 5105 if len(parts) == 1: 5106 this = exp.Literal.string(parts[0][0]) 5107 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5108 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5109 unit = self.expression( 5110 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5111 ) 5112 5113 interval = self.expression(exp.Interval, this=this, unit=unit) 5114 5115 index = self._index 5116 self._match(TokenType.PLUS) 5117 5118 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5119 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5120 return self.expression( 5121 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5122 ) 5123 5124 self._retreat(index) 5125 return interval 5126 5127 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5128 this = self._parse_term() 5129 5130 while True: 5131 if self._match_set(self.BITWISE): 5132 this = self.expression( 5133 self.BITWISE[self._prev.token_type], 5134 this=this, 5135 expression=self._parse_term(), 5136 ) 5137 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5138 this = self.expression( 5139 exp.DPipe, 5140 this=this, 5141 expression=self._parse_term(), 5142 safe=not self.dialect.STRICT_STRING_CONCAT, 5143 ) 5144 elif self._match(TokenType.DQMARK): 5145 this = self.expression( 5146 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5147 ) 5148 elif self._match_pair(TokenType.LT, TokenType.LT): 5149 this = self.expression( 5150 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5151 ) 5152 elif self._match_pair(TokenType.GT, TokenType.GT): 5153 this = self.expression( 5154 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5155 ) 5156 else: 5157 break 5158 5159 return this 5160 5161 def _parse_term(self) -> t.Optional[exp.Expression]: 5162 this = self._parse_factor() 5163 5164 while self._match_set(self.TERM): 5165 klass = self.TERM[self._prev.token_type] 5166 comments = self._prev_comments 5167 expression = self._parse_factor() 5168 5169 this = self.expression(klass, this=this, comments=comments, expression=expression) 5170 5171 if isinstance(this, exp.Collate): 5172 expr = this.expression 5173 5174 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5175 # fallback to Identifier / Var 5176 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5177 ident = expr.this 5178 if isinstance(ident, exp.Identifier): 5179 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5180 5181 return this 5182 5183 def _parse_factor(self) -> t.Optional[exp.Expression]: 5184 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5185 this = parse_method() 5186 5187 while self._match_set(self.FACTOR): 5188 klass = self.FACTOR[self._prev.token_type] 5189 comments = self._prev_comments 5190 expression = parse_method() 5191 5192 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5193 self._retreat(self._index - 1) 5194 return this 5195 5196 this = self.expression(klass, this=this, comments=comments, expression=expression) 5197 5198 if isinstance(this, exp.Div): 5199 this.args["typed"] = self.dialect.TYPED_DIVISION 5200 this.args["safe"] = self.dialect.SAFE_DIVISION 5201 5202 return this 5203 5204 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5205 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5206 5207 def _parse_unary(self) -> t.Optional[exp.Expression]: 5208 if self._match_set(self.UNARY_PARSERS): 5209 return self.UNARY_PARSERS[self._prev.token_type](self) 5210 return self._parse_at_time_zone(self._parse_type()) 5211 5212 def _parse_type( 5213 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5214 ) -> t.Optional[exp.Expression]: 5215 interval = parse_interval and self._parse_interval() 5216 if interval: 5217 return interval 5218 5219 index = self._index 5220 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5221 5222 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5223 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5224 if isinstance(data_type, exp.Cast): 5225 # This constructor can contain ops directly after it, for instance struct unnesting: 5226 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5227 return self._parse_column_ops(data_type) 5228 5229 if data_type: 5230 index2 = self._index 5231 this = self._parse_primary() 5232 5233 if isinstance(this, exp.Literal): 5234 literal = this.name 5235 this = self._parse_column_ops(this) 5236 5237 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5238 if parser: 5239 return parser(self, this, data_type) 5240 5241 if ( 5242 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5243 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5244 and TIME_ZONE_RE.search(literal) 5245 ): 5246 data_type = exp.DataType.build("TIMESTAMPTZ") 5247 5248 return self.expression(exp.Cast, this=this, to=data_type) 5249 5250 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5251 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5252 # 5253 # If the index difference here is greater than 1, that means the parser itself must have 5254 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5255 # 5256 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5257 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5258 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5259 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5260 # 5261 # In these cases, we don't really want to return the converted type, but instead retreat 5262 # and try to parse a Column or Identifier in the section below. 5263 if data_type.expressions and index2 - index > 1: 5264 self._retreat(index2) 5265 return self._parse_column_ops(data_type) 5266 5267 self._retreat(index) 5268 5269 if fallback_to_identifier: 5270 return self._parse_id_var() 5271 5272 this = self._parse_column() 5273 return this and self._parse_column_ops(this) 5274 5275 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5276 this = self._parse_type() 5277 if not this: 5278 return None 5279 5280 if isinstance(this, exp.Column) and not this.table: 5281 this = exp.var(this.name.upper()) 5282 5283 return self.expression( 5284 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5285 ) 5286 5287 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5288 type_name = identifier.name 5289 5290 while self._match(TokenType.DOT): 5291 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5292 5293 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5294 5295 def _parse_types( 5296 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5297 ) -> t.Optional[exp.Expression]: 5298 index = self._index 5299 5300 this: t.Optional[exp.Expression] = None 5301 prefix = self._match_text_seq("SYSUDTLIB", ".") 5302 5303 if self._match_set(self.TYPE_TOKENS): 5304 type_token = self._prev.token_type 5305 else: 5306 type_token = None 5307 identifier = allow_identifiers and self._parse_id_var( 5308 any_token=False, tokens=(TokenType.VAR,) 5309 ) 5310 if isinstance(identifier, exp.Identifier): 5311 try: 5312 tokens = self.dialect.tokenize(identifier.name) 5313 except TokenError: 5314 tokens = None 5315 5316 if tokens and len(tokens) == 1 and tokens[0].token_type in self.TYPE_TOKENS: 5317 type_token = tokens[0].token_type 5318 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5319 this = self._parse_user_defined_type(identifier) 5320 else: 5321 self._retreat(self._index - 1) 5322 return None 5323 else: 5324 return None 5325 5326 if type_token == TokenType.PSEUDO_TYPE: 5327 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5328 5329 if type_token == TokenType.OBJECT_IDENTIFIER: 5330 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5331 5332 # https://materialize.com/docs/sql/types/map/ 5333 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5334 key_type = self._parse_types( 5335 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5336 ) 5337 if not self._match(TokenType.FARROW): 5338 self._retreat(index) 5339 return None 5340 5341 value_type = self._parse_types( 5342 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5343 ) 5344 if not self._match(TokenType.R_BRACKET): 5345 self._retreat(index) 5346 return None 5347 5348 return exp.DataType( 5349 this=exp.DataType.Type.MAP, 5350 expressions=[key_type, value_type], 5351 nested=True, 5352 prefix=prefix, 5353 ) 5354 5355 nested = type_token in self.NESTED_TYPE_TOKENS 5356 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5357 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5358 expressions = None 5359 maybe_func = False 5360 5361 if self._match(TokenType.L_PAREN): 5362 if is_struct: 5363 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5364 elif nested: 5365 expressions = self._parse_csv( 5366 lambda: self._parse_types( 5367 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5368 ) 5369 ) 5370 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5371 this = expressions[0] 5372 this.set("nullable", True) 5373 self._match_r_paren() 5374 return this 5375 elif type_token in self.ENUM_TYPE_TOKENS: 5376 expressions = self._parse_csv(self._parse_equality) 5377 elif is_aggregate: 5378 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5379 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5380 ) 5381 if not func_or_ident: 5382 return None 5383 expressions = [func_or_ident] 5384 if self._match(TokenType.COMMA): 5385 expressions.extend( 5386 self._parse_csv( 5387 lambda: self._parse_types( 5388 check_func=check_func, 5389 schema=schema, 5390 allow_identifiers=allow_identifiers, 5391 ) 5392 ) 5393 ) 5394 else: 5395 expressions = self._parse_csv(self._parse_type_size) 5396 5397 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5398 if type_token == TokenType.VECTOR and len(expressions) == 2: 5399 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5400 5401 if not self._match(TokenType.R_PAREN): 5402 self._retreat(index) 5403 return None 5404 5405 maybe_func = True 5406 5407 values: t.Optional[t.List[exp.Expression]] = None 5408 5409 if nested and self._match(TokenType.LT): 5410 if is_struct: 5411 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5412 else: 5413 expressions = self._parse_csv( 5414 lambda: self._parse_types( 5415 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5416 ) 5417 ) 5418 5419 if not self._match(TokenType.GT): 5420 self.raise_error("Expecting >") 5421 5422 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5423 values = self._parse_csv(self._parse_assignment) 5424 if not values and is_struct: 5425 values = None 5426 self._retreat(self._index - 1) 5427 else: 5428 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5429 5430 if type_token in self.TIMESTAMPS: 5431 if self._match_text_seq("WITH", "TIME", "ZONE"): 5432 maybe_func = False 5433 tz_type = ( 5434 exp.DataType.Type.TIMETZ 5435 if type_token in self.TIMES 5436 else exp.DataType.Type.TIMESTAMPTZ 5437 ) 5438 this = exp.DataType(this=tz_type, expressions=expressions) 5439 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5440 maybe_func = False 5441 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5442 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5443 maybe_func = False 5444 elif type_token == TokenType.INTERVAL: 5445 unit = self._parse_var(upper=True) 5446 if unit: 5447 if self._match_text_seq("TO"): 5448 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5449 5450 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5451 else: 5452 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5453 elif type_token == TokenType.VOID: 5454 this = exp.DataType(this=exp.DataType.Type.NULL) 5455 5456 if maybe_func and check_func: 5457 index2 = self._index 5458 peek = self._parse_string() 5459 5460 if not peek: 5461 self._retreat(index) 5462 return None 5463 5464 self._retreat(index2) 5465 5466 if not this: 5467 if self._match_text_seq("UNSIGNED"): 5468 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5469 if not unsigned_type_token: 5470 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5471 5472 type_token = unsigned_type_token or type_token 5473 5474 this = exp.DataType( 5475 this=exp.DataType.Type[type_token.value], 5476 expressions=expressions, 5477 nested=nested, 5478 prefix=prefix, 5479 ) 5480 5481 # Empty arrays/structs are allowed 5482 if values is not None: 5483 cls = exp.Struct if is_struct else exp.Array 5484 this = exp.cast(cls(expressions=values), this, copy=False) 5485 5486 elif expressions: 5487 this.set("expressions", expressions) 5488 5489 # https://materialize.com/docs/sql/types/list/#type-name 5490 while self._match(TokenType.LIST): 5491 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5492 5493 index = self._index 5494 5495 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5496 matched_array = self._match(TokenType.ARRAY) 5497 5498 while self._curr: 5499 datatype_token = self._prev.token_type 5500 matched_l_bracket = self._match(TokenType.L_BRACKET) 5501 5502 if (not matched_l_bracket and not matched_array) or ( 5503 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5504 ): 5505 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5506 # not to be confused with the fixed size array parsing 5507 break 5508 5509 matched_array = False 5510 values = self._parse_csv(self._parse_assignment) or None 5511 if ( 5512 values 5513 and not schema 5514 and ( 5515 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5516 ) 5517 ): 5518 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5519 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5520 self._retreat(index) 5521 break 5522 5523 this = exp.DataType( 5524 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5525 ) 5526 self._match(TokenType.R_BRACKET) 5527 5528 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5529 converter = self.TYPE_CONVERTERS.get(this.this) 5530 if converter: 5531 this = converter(t.cast(exp.DataType, this)) 5532 5533 return this 5534 5535 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5536 index = self._index 5537 5538 if ( 5539 self._curr 5540 and self._next 5541 and self._curr.token_type in self.TYPE_TOKENS 5542 and self._next.token_type in self.TYPE_TOKENS 5543 ): 5544 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5545 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5546 this = self._parse_id_var() 5547 else: 5548 this = ( 5549 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5550 or self._parse_id_var() 5551 ) 5552 5553 self._match(TokenType.COLON) 5554 5555 if ( 5556 type_required 5557 and not isinstance(this, exp.DataType) 5558 and not self._match_set(self.TYPE_TOKENS, advance=False) 5559 ): 5560 self._retreat(index) 5561 return self._parse_types() 5562 5563 return self._parse_column_def(this) 5564 5565 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5566 if not self._match_text_seq("AT", "TIME", "ZONE"): 5567 return this 5568 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5569 5570 def _parse_column(self) -> t.Optional[exp.Expression]: 5571 this = self._parse_column_reference() 5572 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5573 5574 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5575 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5576 5577 return column 5578 5579 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5580 this = self._parse_field() 5581 if ( 5582 not this 5583 and self._match(TokenType.VALUES, advance=False) 5584 and self.VALUES_FOLLOWED_BY_PAREN 5585 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5586 ): 5587 this = self._parse_id_var() 5588 5589 if isinstance(this, exp.Identifier): 5590 # We bubble up comments from the Identifier to the Column 5591 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5592 5593 return this 5594 5595 def _parse_colon_as_variant_extract( 5596 self, this: t.Optional[exp.Expression] 5597 ) -> t.Optional[exp.Expression]: 5598 casts = [] 5599 json_path = [] 5600 escape = None 5601 5602 while self._match(TokenType.COLON): 5603 start_index = self._index 5604 5605 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5606 path = self._parse_column_ops( 5607 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5608 ) 5609 5610 # The cast :: operator has a lower precedence than the extraction operator :, so 5611 # we rearrange the AST appropriately to avoid casting the JSON path 5612 while isinstance(path, exp.Cast): 5613 casts.append(path.to) 5614 path = path.this 5615 5616 if casts: 5617 dcolon_offset = next( 5618 i 5619 for i, t in enumerate(self._tokens[start_index:]) 5620 if t.token_type == TokenType.DCOLON 5621 ) 5622 end_token = self._tokens[start_index + dcolon_offset - 1] 5623 else: 5624 end_token = self._prev 5625 5626 if path: 5627 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5628 # it'll roundtrip to a string literal in GET_PATH 5629 if isinstance(path, exp.Identifier) and path.quoted: 5630 escape = True 5631 5632 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5633 5634 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5635 # Databricks transforms it back to the colon/dot notation 5636 if json_path: 5637 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5638 5639 if json_path_expr: 5640 json_path_expr.set("escape", escape) 5641 5642 this = self.expression( 5643 exp.JSONExtract, 5644 this=this, 5645 expression=json_path_expr, 5646 variant_extract=True, 5647 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5648 ) 5649 5650 while casts: 5651 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5652 5653 return this 5654 5655 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5656 return self._parse_types() 5657 5658 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5659 this = self._parse_bracket(this) 5660 5661 while self._match_set(self.COLUMN_OPERATORS): 5662 op_token = self._prev.token_type 5663 op = self.COLUMN_OPERATORS.get(op_token) 5664 5665 if op_token in self.CAST_COLUMN_OPERATORS: 5666 field = self._parse_dcolon() 5667 if not field: 5668 self.raise_error("Expected type") 5669 elif op and self._curr: 5670 field = self._parse_column_reference() or self._parse_bracket() 5671 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5672 field = self._parse_column_ops(field) 5673 else: 5674 field = self._parse_field(any_token=True, anonymous_func=True) 5675 5676 # Function calls can be qualified, e.g., x.y.FOO() 5677 # This converts the final AST to a series of Dots leading to the function call 5678 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5679 if isinstance(field, (exp.Func, exp.Window)) and this: 5680 this = this.transform( 5681 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5682 ) 5683 5684 if op: 5685 this = op(self, this, field) 5686 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5687 this = self.expression( 5688 exp.Column, 5689 comments=this.comments, 5690 this=field, 5691 table=this.this, 5692 db=this.args.get("table"), 5693 catalog=this.args.get("db"), 5694 ) 5695 elif isinstance(field, exp.Window): 5696 # Move the exp.Dot's to the window's function 5697 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5698 field.set("this", window_func) 5699 this = field 5700 else: 5701 this = self.expression(exp.Dot, this=this, expression=field) 5702 5703 if field and field.comments: 5704 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5705 5706 this = self._parse_bracket(this) 5707 5708 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5709 5710 def _parse_paren(self) -> t.Optional[exp.Expression]: 5711 if not self._match(TokenType.L_PAREN): 5712 return None 5713 5714 comments = self._prev_comments 5715 query = self._parse_select() 5716 5717 if query: 5718 expressions = [query] 5719 else: 5720 expressions = self._parse_expressions() 5721 5722 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5723 5724 if not this and self._match(TokenType.R_PAREN, advance=False): 5725 this = self.expression(exp.Tuple) 5726 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5727 this = self._parse_subquery(this=this, parse_alias=False) 5728 elif isinstance(this, exp.Subquery): 5729 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5730 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5731 this = self.expression(exp.Tuple, expressions=expressions) 5732 else: 5733 this = self.expression(exp.Paren, this=this) 5734 5735 if this: 5736 this.add_comments(comments) 5737 5738 self._match_r_paren(expression=this) 5739 return this 5740 5741 def _parse_primary(self) -> t.Optional[exp.Expression]: 5742 if self._match_set(self.PRIMARY_PARSERS): 5743 token_type = self._prev.token_type 5744 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5745 5746 if token_type == TokenType.STRING: 5747 expressions = [primary] 5748 while self._match(TokenType.STRING): 5749 expressions.append(exp.Literal.string(self._prev.text)) 5750 5751 if len(expressions) > 1: 5752 return self.expression(exp.Concat, expressions=expressions) 5753 5754 return primary 5755 5756 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5757 return exp.Literal.number(f"0.{self._prev.text}") 5758 5759 return self._parse_paren() 5760 5761 def _parse_field( 5762 self, 5763 any_token: bool = False, 5764 tokens: t.Optional[t.Collection[TokenType]] = None, 5765 anonymous_func: bool = False, 5766 ) -> t.Optional[exp.Expression]: 5767 if anonymous_func: 5768 field = ( 5769 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5770 or self._parse_primary() 5771 ) 5772 else: 5773 field = self._parse_primary() or self._parse_function( 5774 anonymous=anonymous_func, any_token=any_token 5775 ) 5776 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5777 5778 def _parse_function( 5779 self, 5780 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5781 anonymous: bool = False, 5782 optional_parens: bool = True, 5783 any_token: bool = False, 5784 ) -> t.Optional[exp.Expression]: 5785 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5786 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5787 fn_syntax = False 5788 if ( 5789 self._match(TokenType.L_BRACE, advance=False) 5790 and self._next 5791 and self._next.text.upper() == "FN" 5792 ): 5793 self._advance(2) 5794 fn_syntax = True 5795 5796 func = self._parse_function_call( 5797 functions=functions, 5798 anonymous=anonymous, 5799 optional_parens=optional_parens, 5800 any_token=any_token, 5801 ) 5802 5803 if fn_syntax: 5804 self._match(TokenType.R_BRACE) 5805 5806 return func 5807 5808 def _parse_function_call( 5809 self, 5810 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5811 anonymous: bool = False, 5812 optional_parens: bool = True, 5813 any_token: bool = False, 5814 ) -> t.Optional[exp.Expression]: 5815 if not self._curr: 5816 return None 5817 5818 comments = self._curr.comments 5819 prev = self._prev 5820 token = self._curr 5821 token_type = self._curr.token_type 5822 this = self._curr.text 5823 upper = this.upper() 5824 5825 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5826 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5827 self._advance() 5828 return self._parse_window(parser(self)) 5829 5830 if not self._next or self._next.token_type != TokenType.L_PAREN: 5831 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5832 self._advance() 5833 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5834 5835 return None 5836 5837 if any_token: 5838 if token_type in self.RESERVED_TOKENS: 5839 return None 5840 elif token_type not in self.FUNC_TOKENS: 5841 return None 5842 5843 self._advance(2) 5844 5845 parser = self.FUNCTION_PARSERS.get(upper) 5846 if parser and not anonymous: 5847 this = parser(self) 5848 else: 5849 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5850 5851 if subquery_predicate: 5852 expr = None 5853 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5854 expr = self._parse_select() 5855 self._match_r_paren() 5856 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5857 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5858 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5859 self._advance(-1) 5860 expr = self._parse_bitwise() 5861 5862 if expr: 5863 return self.expression(subquery_predicate, comments=comments, this=expr) 5864 5865 if functions is None: 5866 functions = self.FUNCTIONS 5867 5868 function = functions.get(upper) 5869 known_function = function and not anonymous 5870 5871 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5872 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5873 5874 post_func_comments = self._curr and self._curr.comments 5875 if known_function and post_func_comments: 5876 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5877 # call we'll construct it as exp.Anonymous, even if it's "known" 5878 if any( 5879 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5880 for comment in post_func_comments 5881 ): 5882 known_function = False 5883 5884 if alias and known_function: 5885 args = self._kv_to_prop_eq(args) 5886 5887 if known_function: 5888 func_builder = t.cast(t.Callable, function) 5889 5890 if "dialect" in func_builder.__code__.co_varnames: 5891 func = func_builder(args, dialect=self.dialect) 5892 else: 5893 func = func_builder(args) 5894 5895 func = self.validate_expression(func, args) 5896 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5897 func.meta["name"] = this 5898 5899 this = func 5900 else: 5901 if token_type == TokenType.IDENTIFIER: 5902 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5903 5904 this = self.expression(exp.Anonymous, this=this, expressions=args) 5905 this = this.update_positions(token) 5906 5907 if isinstance(this, exp.Expression): 5908 this.add_comments(comments) 5909 5910 self._match_r_paren(this) 5911 return self._parse_window(this) 5912 5913 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5914 return expression 5915 5916 def _kv_to_prop_eq( 5917 self, expressions: t.List[exp.Expression], parse_map: bool = False 5918 ) -> t.List[exp.Expression]: 5919 transformed = [] 5920 5921 for index, e in enumerate(expressions): 5922 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5923 if isinstance(e, exp.Alias): 5924 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5925 5926 if not isinstance(e, exp.PropertyEQ): 5927 e = self.expression( 5928 exp.PropertyEQ, 5929 this=e.this if parse_map else exp.to_identifier(e.this.name), 5930 expression=e.expression, 5931 ) 5932 5933 if isinstance(e.this, exp.Column): 5934 e.this.replace(e.this.this) 5935 else: 5936 e = self._to_prop_eq(e, index) 5937 5938 transformed.append(e) 5939 5940 return transformed 5941 5942 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5943 return self._parse_statement() 5944 5945 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5946 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5947 5948 def _parse_user_defined_function( 5949 self, kind: t.Optional[TokenType] = None 5950 ) -> t.Optional[exp.Expression]: 5951 this = self._parse_table_parts(schema=True) 5952 5953 if not self._match(TokenType.L_PAREN): 5954 return this 5955 5956 expressions = self._parse_csv(self._parse_function_parameter) 5957 self._match_r_paren() 5958 return self.expression( 5959 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5960 ) 5961 5962 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5963 literal = self._parse_primary() 5964 if literal: 5965 return self.expression(exp.Introducer, this=token.text, expression=literal) 5966 5967 return self._identifier_expression(token) 5968 5969 def _parse_session_parameter(self) -> exp.SessionParameter: 5970 kind = None 5971 this = self._parse_id_var() or self._parse_primary() 5972 5973 if this and self._match(TokenType.DOT): 5974 kind = this.name 5975 this = self._parse_var() or self._parse_primary() 5976 5977 return self.expression(exp.SessionParameter, this=this, kind=kind) 5978 5979 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5980 return self._parse_id_var() 5981 5982 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5983 index = self._index 5984 5985 if self._match(TokenType.L_PAREN): 5986 expressions = t.cast( 5987 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5988 ) 5989 5990 if not self._match(TokenType.R_PAREN): 5991 self._retreat(index) 5992 else: 5993 expressions = [self._parse_lambda_arg()] 5994 5995 if self._match_set(self.LAMBDAS): 5996 return self.LAMBDAS[self._prev.token_type](self, expressions) 5997 5998 self._retreat(index) 5999 6000 this: t.Optional[exp.Expression] 6001 6002 if self._match(TokenType.DISTINCT): 6003 this = self.expression( 6004 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 6005 ) 6006 else: 6007 this = self._parse_select_or_expression(alias=alias) 6008 6009 return self._parse_limit( 6010 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6011 ) 6012 6013 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6014 index = self._index 6015 if not self._match(TokenType.L_PAREN): 6016 return this 6017 6018 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6019 # expr can be of both types 6020 if self._match_set(self.SELECT_START_TOKENS): 6021 self._retreat(index) 6022 return this 6023 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6024 self._match_r_paren() 6025 return self.expression(exp.Schema, this=this, expressions=args) 6026 6027 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6028 return self._parse_column_def(self._parse_field(any_token=True)) 6029 6030 def _parse_column_def( 6031 self, this: t.Optional[exp.Expression], computed_column: bool = True 6032 ) -> t.Optional[exp.Expression]: 6033 # column defs are not really columns, they're identifiers 6034 if isinstance(this, exp.Column): 6035 this = this.this 6036 6037 if not computed_column: 6038 self._match(TokenType.ALIAS) 6039 6040 kind = self._parse_types(schema=True) 6041 6042 if self._match_text_seq("FOR", "ORDINALITY"): 6043 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6044 6045 constraints: t.List[exp.Expression] = [] 6046 6047 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6048 ("ALIAS", "MATERIALIZED") 6049 ): 6050 persisted = self._prev.text.upper() == "MATERIALIZED" 6051 constraint_kind = exp.ComputedColumnConstraint( 6052 this=self._parse_assignment(), 6053 persisted=persisted or self._match_text_seq("PERSISTED"), 6054 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6055 ) 6056 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6057 elif ( 6058 kind 6059 and self._match(TokenType.ALIAS, advance=False) 6060 and ( 6061 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6062 or (self._next and self._next.token_type == TokenType.L_PAREN) 6063 ) 6064 ): 6065 self._advance() 6066 constraints.append( 6067 self.expression( 6068 exp.ColumnConstraint, 6069 kind=exp.ComputedColumnConstraint( 6070 this=self._parse_disjunction(), 6071 persisted=self._match_texts(("STORED", "VIRTUAL")) 6072 and self._prev.text.upper() == "STORED", 6073 ), 6074 ) 6075 ) 6076 6077 while True: 6078 constraint = self._parse_column_constraint() 6079 if not constraint: 6080 break 6081 constraints.append(constraint) 6082 6083 if not kind and not constraints: 6084 return this 6085 6086 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6087 6088 def _parse_auto_increment( 6089 self, 6090 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6091 start = None 6092 increment = None 6093 order = None 6094 6095 if self._match(TokenType.L_PAREN, advance=False): 6096 args = self._parse_wrapped_csv(self._parse_bitwise) 6097 start = seq_get(args, 0) 6098 increment = seq_get(args, 1) 6099 elif self._match_text_seq("START"): 6100 start = self._parse_bitwise() 6101 self._match_text_seq("INCREMENT") 6102 increment = self._parse_bitwise() 6103 if self._match_text_seq("ORDER"): 6104 order = True 6105 elif self._match_text_seq("NOORDER"): 6106 order = False 6107 6108 if start and increment: 6109 return exp.GeneratedAsIdentityColumnConstraint( 6110 start=start, increment=increment, this=False, order=order 6111 ) 6112 6113 return exp.AutoIncrementColumnConstraint() 6114 6115 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6116 if not self._match_text_seq("REFRESH"): 6117 self._retreat(self._index - 1) 6118 return None 6119 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6120 6121 def _parse_compress(self) -> exp.CompressColumnConstraint: 6122 if self._match(TokenType.L_PAREN, advance=False): 6123 return self.expression( 6124 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6125 ) 6126 6127 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6128 6129 def _parse_generated_as_identity( 6130 self, 6131 ) -> ( 6132 exp.GeneratedAsIdentityColumnConstraint 6133 | exp.ComputedColumnConstraint 6134 | exp.GeneratedAsRowColumnConstraint 6135 ): 6136 if self._match_text_seq("BY", "DEFAULT"): 6137 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6138 this = self.expression( 6139 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6140 ) 6141 else: 6142 self._match_text_seq("ALWAYS") 6143 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6144 6145 self._match(TokenType.ALIAS) 6146 6147 if self._match_text_seq("ROW"): 6148 start = self._match_text_seq("START") 6149 if not start: 6150 self._match(TokenType.END) 6151 hidden = self._match_text_seq("HIDDEN") 6152 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6153 6154 identity = self._match_text_seq("IDENTITY") 6155 6156 if self._match(TokenType.L_PAREN): 6157 if self._match(TokenType.START_WITH): 6158 this.set("start", self._parse_bitwise()) 6159 if self._match_text_seq("INCREMENT", "BY"): 6160 this.set("increment", self._parse_bitwise()) 6161 if self._match_text_seq("MINVALUE"): 6162 this.set("minvalue", self._parse_bitwise()) 6163 if self._match_text_seq("MAXVALUE"): 6164 this.set("maxvalue", self._parse_bitwise()) 6165 6166 if self._match_text_seq("CYCLE"): 6167 this.set("cycle", True) 6168 elif self._match_text_seq("NO", "CYCLE"): 6169 this.set("cycle", False) 6170 6171 if not identity: 6172 this.set("expression", self._parse_range()) 6173 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6174 args = self._parse_csv(self._parse_bitwise) 6175 this.set("start", seq_get(args, 0)) 6176 this.set("increment", seq_get(args, 1)) 6177 6178 self._match_r_paren() 6179 6180 return this 6181 6182 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6183 self._match_text_seq("LENGTH") 6184 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6185 6186 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6187 if self._match_text_seq("NULL"): 6188 return self.expression(exp.NotNullColumnConstraint) 6189 if self._match_text_seq("CASESPECIFIC"): 6190 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6191 if self._match_text_seq("FOR", "REPLICATION"): 6192 return self.expression(exp.NotForReplicationColumnConstraint) 6193 6194 # Unconsume the `NOT` token 6195 self._retreat(self._index - 1) 6196 return None 6197 6198 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6199 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6200 6201 procedure_option_follows = ( 6202 self._match(TokenType.WITH, advance=False) 6203 and self._next 6204 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6205 ) 6206 6207 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6208 return self.expression( 6209 exp.ColumnConstraint, 6210 this=this, 6211 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6212 ) 6213 6214 return this 6215 6216 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6217 if not self._match(TokenType.CONSTRAINT): 6218 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6219 6220 return self.expression( 6221 exp.Constraint, 6222 this=self._parse_id_var(), 6223 expressions=self._parse_unnamed_constraints(), 6224 ) 6225 6226 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6227 constraints = [] 6228 while True: 6229 constraint = self._parse_unnamed_constraint() or self._parse_function() 6230 if not constraint: 6231 break 6232 constraints.append(constraint) 6233 6234 return constraints 6235 6236 def _parse_unnamed_constraint( 6237 self, constraints: t.Optional[t.Collection[str]] = None 6238 ) -> t.Optional[exp.Expression]: 6239 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6240 constraints or self.CONSTRAINT_PARSERS 6241 ): 6242 return None 6243 6244 constraint = self._prev.text.upper() 6245 if constraint not in self.CONSTRAINT_PARSERS: 6246 self.raise_error(f"No parser found for schema constraint {constraint}.") 6247 6248 return self.CONSTRAINT_PARSERS[constraint](self) 6249 6250 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6251 return self._parse_id_var(any_token=False) 6252 6253 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6254 self._match_texts(("KEY", "INDEX")) 6255 return self.expression( 6256 exp.UniqueColumnConstraint, 6257 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6258 this=self._parse_schema(self._parse_unique_key()), 6259 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6260 on_conflict=self._parse_on_conflict(), 6261 options=self._parse_key_constraint_options(), 6262 ) 6263 6264 def _parse_key_constraint_options(self) -> t.List[str]: 6265 options = [] 6266 while True: 6267 if not self._curr: 6268 break 6269 6270 if self._match(TokenType.ON): 6271 action = None 6272 on = self._advance_any() and self._prev.text 6273 6274 if self._match_text_seq("NO", "ACTION"): 6275 action = "NO ACTION" 6276 elif self._match_text_seq("CASCADE"): 6277 action = "CASCADE" 6278 elif self._match_text_seq("RESTRICT"): 6279 action = "RESTRICT" 6280 elif self._match_pair(TokenType.SET, TokenType.NULL): 6281 action = "SET NULL" 6282 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6283 action = "SET DEFAULT" 6284 else: 6285 self.raise_error("Invalid key constraint") 6286 6287 options.append(f"ON {on} {action}") 6288 else: 6289 var = self._parse_var_from_options( 6290 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6291 ) 6292 if not var: 6293 break 6294 options.append(var.name) 6295 6296 return options 6297 6298 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6299 if match and not self._match(TokenType.REFERENCES): 6300 return None 6301 6302 expressions = None 6303 this = self._parse_table(schema=True) 6304 options = self._parse_key_constraint_options() 6305 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6306 6307 def _parse_foreign_key(self) -> exp.ForeignKey: 6308 expressions = ( 6309 self._parse_wrapped_id_vars() 6310 if not self._match(TokenType.REFERENCES, advance=False) 6311 else None 6312 ) 6313 reference = self._parse_references() 6314 on_options = {} 6315 6316 while self._match(TokenType.ON): 6317 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6318 self.raise_error("Expected DELETE or UPDATE") 6319 6320 kind = self._prev.text.lower() 6321 6322 if self._match_text_seq("NO", "ACTION"): 6323 action = "NO ACTION" 6324 elif self._match(TokenType.SET): 6325 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6326 action = "SET " + self._prev.text.upper() 6327 else: 6328 self._advance() 6329 action = self._prev.text.upper() 6330 6331 on_options[kind] = action 6332 6333 return self.expression( 6334 exp.ForeignKey, 6335 expressions=expressions, 6336 reference=reference, 6337 options=self._parse_key_constraint_options(), 6338 **on_options, # type: ignore 6339 ) 6340 6341 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6342 return self._parse_ordered() or self._parse_field() 6343 6344 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6345 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6346 self._retreat(self._index - 1) 6347 return None 6348 6349 id_vars = self._parse_wrapped_id_vars() 6350 return self.expression( 6351 exp.PeriodForSystemTimeConstraint, 6352 this=seq_get(id_vars, 0), 6353 expression=seq_get(id_vars, 1), 6354 ) 6355 6356 def _parse_primary_key( 6357 self, wrapped_optional: bool = False, in_props: bool = False 6358 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6359 desc = ( 6360 self._match_set((TokenType.ASC, TokenType.DESC)) 6361 and self._prev.token_type == TokenType.DESC 6362 ) 6363 6364 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6365 return self.expression( 6366 exp.PrimaryKeyColumnConstraint, 6367 desc=desc, 6368 options=self._parse_key_constraint_options(), 6369 ) 6370 6371 expressions = self._parse_wrapped_csv( 6372 self._parse_primary_key_part, optional=wrapped_optional 6373 ) 6374 6375 return self.expression( 6376 exp.PrimaryKey, 6377 expressions=expressions, 6378 include=self._parse_index_params(), 6379 options=self._parse_key_constraint_options(), 6380 ) 6381 6382 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6383 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6384 6385 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6386 """ 6387 Parses a datetime column in ODBC format. We parse the column into the corresponding 6388 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6389 same as we did for `DATE('yyyy-mm-dd')`. 6390 6391 Reference: 6392 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6393 """ 6394 self._match(TokenType.VAR) 6395 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6396 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6397 if not self._match(TokenType.R_BRACE): 6398 self.raise_error("Expected }") 6399 return expression 6400 6401 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6402 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6403 return this 6404 6405 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6406 map_token = seq_get(self._tokens, self._index - 2) 6407 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6408 else: 6409 parse_map = False 6410 6411 bracket_kind = self._prev.token_type 6412 if ( 6413 bracket_kind == TokenType.L_BRACE 6414 and self._curr 6415 and self._curr.token_type == TokenType.VAR 6416 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6417 ): 6418 return self._parse_odbc_datetime_literal() 6419 6420 expressions = self._parse_csv( 6421 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6422 ) 6423 6424 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6425 self.raise_error("Expected ]") 6426 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6427 self.raise_error("Expected }") 6428 6429 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6430 if bracket_kind == TokenType.L_BRACE: 6431 this = self.expression( 6432 exp.Struct, 6433 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6434 ) 6435 elif not this: 6436 this = build_array_constructor( 6437 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6438 ) 6439 else: 6440 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6441 if constructor_type: 6442 return build_array_constructor( 6443 constructor_type, 6444 args=expressions, 6445 bracket_kind=bracket_kind, 6446 dialect=self.dialect, 6447 ) 6448 6449 expressions = apply_index_offset( 6450 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6451 ) 6452 this = self.expression( 6453 exp.Bracket, 6454 this=this, 6455 expressions=expressions, 6456 comments=this.pop_comments(), 6457 ) 6458 6459 self._add_comments(this) 6460 return self._parse_bracket(this) 6461 6462 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6463 if self._match(TokenType.COLON): 6464 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6465 return this 6466 6467 def _parse_case(self) -> t.Optional[exp.Expression]: 6468 if self._match(TokenType.DOT, advance=False): 6469 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 6470 self._retreat(self._index - 1) 6471 return None 6472 6473 ifs = [] 6474 default = None 6475 6476 comments = self._prev_comments 6477 expression = self._parse_assignment() 6478 6479 while self._match(TokenType.WHEN): 6480 this = self._parse_assignment() 6481 self._match(TokenType.THEN) 6482 then = self._parse_assignment() 6483 ifs.append(self.expression(exp.If, this=this, true=then)) 6484 6485 if self._match(TokenType.ELSE): 6486 default = self._parse_assignment() 6487 6488 if not self._match(TokenType.END): 6489 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6490 default = exp.column("interval") 6491 else: 6492 self.raise_error("Expected END after CASE", self._prev) 6493 6494 return self.expression( 6495 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6496 ) 6497 6498 def _parse_if(self) -> t.Optional[exp.Expression]: 6499 if self._match(TokenType.L_PAREN): 6500 args = self._parse_csv( 6501 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6502 ) 6503 this = self.validate_expression(exp.If.from_arg_list(args), args) 6504 self._match_r_paren() 6505 else: 6506 index = self._index - 1 6507 6508 if self.NO_PAREN_IF_COMMANDS and index == 0: 6509 return self._parse_as_command(self._prev) 6510 6511 condition = self._parse_assignment() 6512 6513 if not condition: 6514 self._retreat(index) 6515 return None 6516 6517 self._match(TokenType.THEN) 6518 true = self._parse_assignment() 6519 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6520 self._match(TokenType.END) 6521 this = self.expression(exp.If, this=condition, true=true, false=false) 6522 6523 return this 6524 6525 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6526 if not self._match_text_seq("VALUE", "FOR"): 6527 self._retreat(self._index - 1) 6528 return None 6529 6530 return self.expression( 6531 exp.NextValueFor, 6532 this=self._parse_column(), 6533 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6534 ) 6535 6536 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6537 this = self._parse_function() or self._parse_var_or_string(upper=True) 6538 6539 if self._match(TokenType.FROM): 6540 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6541 6542 if not self._match(TokenType.COMMA): 6543 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6544 6545 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6546 6547 def _parse_gap_fill(self) -> exp.GapFill: 6548 self._match(TokenType.TABLE) 6549 this = self._parse_table() 6550 6551 self._match(TokenType.COMMA) 6552 args = [this, *self._parse_csv(self._parse_lambda)] 6553 6554 gap_fill = exp.GapFill.from_arg_list(args) 6555 return self.validate_expression(gap_fill, args) 6556 6557 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6558 this = self._parse_assignment() 6559 6560 if not self._match(TokenType.ALIAS): 6561 if self._match(TokenType.COMMA): 6562 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6563 6564 self.raise_error("Expected AS after CAST") 6565 6566 fmt = None 6567 to = self._parse_types() 6568 6569 default = self._match(TokenType.DEFAULT) 6570 if default: 6571 default = self._parse_bitwise() 6572 self._match_text_seq("ON", "CONVERSION", "ERROR") 6573 6574 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6575 fmt_string = self._parse_string() 6576 fmt = self._parse_at_time_zone(fmt_string) 6577 6578 if not to: 6579 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6580 if to.this in exp.DataType.TEMPORAL_TYPES: 6581 this = self.expression( 6582 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6583 this=this, 6584 format=exp.Literal.string( 6585 format_time( 6586 fmt_string.this if fmt_string else "", 6587 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6588 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6589 ) 6590 ), 6591 safe=safe, 6592 ) 6593 6594 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6595 this.set("zone", fmt.args["zone"]) 6596 return this 6597 elif not to: 6598 self.raise_error("Expected TYPE after CAST") 6599 elif isinstance(to, exp.Identifier): 6600 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6601 elif to.this == exp.DataType.Type.CHAR: 6602 if self._match(TokenType.CHARACTER_SET): 6603 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6604 6605 return self.build_cast( 6606 strict=strict, 6607 this=this, 6608 to=to, 6609 format=fmt, 6610 safe=safe, 6611 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6612 default=default, 6613 ) 6614 6615 def _parse_string_agg(self) -> exp.GroupConcat: 6616 if self._match(TokenType.DISTINCT): 6617 args: t.List[t.Optional[exp.Expression]] = [ 6618 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6619 ] 6620 if self._match(TokenType.COMMA): 6621 args.extend(self._parse_csv(self._parse_assignment)) 6622 else: 6623 args = self._parse_csv(self._parse_assignment) # type: ignore 6624 6625 if self._match_text_seq("ON", "OVERFLOW"): 6626 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6627 if self._match_text_seq("ERROR"): 6628 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6629 else: 6630 self._match_text_seq("TRUNCATE") 6631 on_overflow = self.expression( 6632 exp.OverflowTruncateBehavior, 6633 this=self._parse_string(), 6634 with_count=( 6635 self._match_text_seq("WITH", "COUNT") 6636 or not self._match_text_seq("WITHOUT", "COUNT") 6637 ), 6638 ) 6639 else: 6640 on_overflow = None 6641 6642 index = self._index 6643 if not self._match(TokenType.R_PAREN) and args: 6644 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6645 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6646 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6647 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6648 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6649 6650 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6651 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6652 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6653 if not self._match_text_seq("WITHIN", "GROUP"): 6654 self._retreat(index) 6655 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6656 6657 # The corresponding match_r_paren will be called in parse_function (caller) 6658 self._match_l_paren() 6659 6660 return self.expression( 6661 exp.GroupConcat, 6662 this=self._parse_order(this=seq_get(args, 0)), 6663 separator=seq_get(args, 1), 6664 on_overflow=on_overflow, 6665 ) 6666 6667 def _parse_convert( 6668 self, strict: bool, safe: t.Optional[bool] = None 6669 ) -> t.Optional[exp.Expression]: 6670 this = self._parse_bitwise() 6671 6672 if self._match(TokenType.USING): 6673 to: t.Optional[exp.Expression] = self.expression( 6674 exp.CharacterSet, this=self._parse_var() 6675 ) 6676 elif self._match(TokenType.COMMA): 6677 to = self._parse_types() 6678 else: 6679 to = None 6680 6681 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6682 6683 def _parse_xml_table(self) -> exp.XMLTable: 6684 namespaces = None 6685 passing = None 6686 columns = None 6687 6688 if self._match_text_seq("XMLNAMESPACES", "("): 6689 namespaces = self._parse_xml_namespace() 6690 self._match_text_seq(")", ",") 6691 6692 this = self._parse_string() 6693 6694 if self._match_text_seq("PASSING"): 6695 # The BY VALUE keywords are optional and are provided for semantic clarity 6696 self._match_text_seq("BY", "VALUE") 6697 passing = self._parse_csv(self._parse_column) 6698 6699 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6700 6701 if self._match_text_seq("COLUMNS"): 6702 columns = self._parse_csv(self._parse_field_def) 6703 6704 return self.expression( 6705 exp.XMLTable, 6706 this=this, 6707 namespaces=namespaces, 6708 passing=passing, 6709 columns=columns, 6710 by_ref=by_ref, 6711 ) 6712 6713 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6714 namespaces = [] 6715 6716 while True: 6717 if self._match(TokenType.DEFAULT): 6718 uri = self._parse_string() 6719 else: 6720 uri = self._parse_alias(self._parse_string()) 6721 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6722 if not self._match(TokenType.COMMA): 6723 break 6724 6725 return namespaces 6726 6727 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6728 args = self._parse_csv(self._parse_assignment) 6729 6730 if len(args) < 3: 6731 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6732 6733 return self.expression(exp.DecodeCase, expressions=args) 6734 6735 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6736 self._match_text_seq("KEY") 6737 key = self._parse_column() 6738 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6739 self._match_text_seq("VALUE") 6740 value = self._parse_bitwise() 6741 6742 if not key and not value: 6743 return None 6744 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6745 6746 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6747 if not this or not self._match_text_seq("FORMAT", "JSON"): 6748 return this 6749 6750 return self.expression(exp.FormatJson, this=this) 6751 6752 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6753 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6754 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6755 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6756 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6757 else: 6758 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6759 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6760 6761 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6762 6763 if not empty and not error and not null: 6764 return None 6765 6766 return self.expression( 6767 exp.OnCondition, 6768 empty=empty, 6769 error=error, 6770 null=null, 6771 ) 6772 6773 def _parse_on_handling( 6774 self, on: str, *values: str 6775 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6776 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6777 for value in values: 6778 if self._match_text_seq(value, "ON", on): 6779 return f"{value} ON {on}" 6780 6781 index = self._index 6782 if self._match(TokenType.DEFAULT): 6783 default_value = self._parse_bitwise() 6784 if self._match_text_seq("ON", on): 6785 return default_value 6786 6787 self._retreat(index) 6788 6789 return None 6790 6791 @t.overload 6792 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6793 6794 @t.overload 6795 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6796 6797 def _parse_json_object(self, agg=False): 6798 star = self._parse_star() 6799 expressions = ( 6800 [star] 6801 if star 6802 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6803 ) 6804 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6805 6806 unique_keys = None 6807 if self._match_text_seq("WITH", "UNIQUE"): 6808 unique_keys = True 6809 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6810 unique_keys = False 6811 6812 self._match_text_seq("KEYS") 6813 6814 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6815 self._parse_type() 6816 ) 6817 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6818 6819 return self.expression( 6820 exp.JSONObjectAgg if agg else exp.JSONObject, 6821 expressions=expressions, 6822 null_handling=null_handling, 6823 unique_keys=unique_keys, 6824 return_type=return_type, 6825 encoding=encoding, 6826 ) 6827 6828 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6829 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6830 if not self._match_text_seq("NESTED"): 6831 this = self._parse_id_var() 6832 kind = self._parse_types(allow_identifiers=False) 6833 nested = None 6834 else: 6835 this = None 6836 kind = None 6837 nested = True 6838 6839 path = self._match_text_seq("PATH") and self._parse_string() 6840 nested_schema = nested and self._parse_json_schema() 6841 6842 return self.expression( 6843 exp.JSONColumnDef, 6844 this=this, 6845 kind=kind, 6846 path=path, 6847 nested_schema=nested_schema, 6848 ) 6849 6850 def _parse_json_schema(self) -> exp.JSONSchema: 6851 self._match_text_seq("COLUMNS") 6852 return self.expression( 6853 exp.JSONSchema, 6854 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6855 ) 6856 6857 def _parse_json_table(self) -> exp.JSONTable: 6858 this = self._parse_format_json(self._parse_bitwise()) 6859 path = self._match(TokenType.COMMA) and self._parse_string() 6860 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6861 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6862 schema = self._parse_json_schema() 6863 6864 return exp.JSONTable( 6865 this=this, 6866 schema=schema, 6867 path=path, 6868 error_handling=error_handling, 6869 empty_handling=empty_handling, 6870 ) 6871 6872 def _parse_match_against(self) -> exp.MatchAgainst: 6873 if self._match_text_seq("TABLE"): 6874 # parse SingleStore MATCH(TABLE ...) syntax 6875 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 6876 expressions = [] 6877 table = self._parse_table() 6878 if table: 6879 expressions = [table] 6880 else: 6881 expressions = self._parse_csv(self._parse_column) 6882 6883 self._match_text_seq(")", "AGAINST", "(") 6884 6885 this = self._parse_string() 6886 6887 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6888 modifier = "IN NATURAL LANGUAGE MODE" 6889 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6890 modifier = f"{modifier} WITH QUERY EXPANSION" 6891 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6892 modifier = "IN BOOLEAN MODE" 6893 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6894 modifier = "WITH QUERY EXPANSION" 6895 else: 6896 modifier = None 6897 6898 return self.expression( 6899 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6900 ) 6901 6902 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6903 def _parse_open_json(self) -> exp.OpenJSON: 6904 this = self._parse_bitwise() 6905 path = self._match(TokenType.COMMA) and self._parse_string() 6906 6907 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6908 this = self._parse_field(any_token=True) 6909 kind = self._parse_types() 6910 path = self._parse_string() 6911 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6912 6913 return self.expression( 6914 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6915 ) 6916 6917 expressions = None 6918 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6919 self._match_l_paren() 6920 expressions = self._parse_csv(_parse_open_json_column_def) 6921 6922 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6923 6924 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6925 args = self._parse_csv(self._parse_bitwise) 6926 6927 if self._match(TokenType.IN): 6928 return self.expression( 6929 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6930 ) 6931 6932 if haystack_first: 6933 haystack = seq_get(args, 0) 6934 needle = seq_get(args, 1) 6935 else: 6936 haystack = seq_get(args, 1) 6937 needle = seq_get(args, 0) 6938 6939 return self.expression( 6940 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6941 ) 6942 6943 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6944 args = self._parse_csv(self._parse_table) 6945 return exp.JoinHint(this=func_name.upper(), expressions=args) 6946 6947 def _parse_substring(self) -> exp.Substring: 6948 # Postgres supports the form: substring(string [from int] [for int]) 6949 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6950 6951 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6952 6953 if self._match(TokenType.FROM): 6954 args.append(self._parse_bitwise()) 6955 if self._match(TokenType.FOR): 6956 if len(args) == 1: 6957 args.append(exp.Literal.number(1)) 6958 args.append(self._parse_bitwise()) 6959 6960 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6961 6962 def _parse_trim(self) -> exp.Trim: 6963 # https://www.w3resource.com/sql/character-functions/trim.php 6964 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6965 6966 position = None 6967 collation = None 6968 expression = None 6969 6970 if self._match_texts(self.TRIM_TYPES): 6971 position = self._prev.text.upper() 6972 6973 this = self._parse_bitwise() 6974 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6975 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6976 expression = self._parse_bitwise() 6977 6978 if invert_order: 6979 this, expression = expression, this 6980 6981 if self._match(TokenType.COLLATE): 6982 collation = self._parse_bitwise() 6983 6984 return self.expression( 6985 exp.Trim, this=this, position=position, expression=expression, collation=collation 6986 ) 6987 6988 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6989 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6990 6991 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6992 return self._parse_window(self._parse_id_var(), alias=True) 6993 6994 def _parse_respect_or_ignore_nulls( 6995 self, this: t.Optional[exp.Expression] 6996 ) -> t.Optional[exp.Expression]: 6997 if self._match_text_seq("IGNORE", "NULLS"): 6998 return self.expression(exp.IgnoreNulls, this=this) 6999 if self._match_text_seq("RESPECT", "NULLS"): 7000 return self.expression(exp.RespectNulls, this=this) 7001 return this 7002 7003 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 7004 if self._match(TokenType.HAVING): 7005 self._match_texts(("MAX", "MIN")) 7006 max = self._prev.text.upper() != "MIN" 7007 return self.expression( 7008 exp.HavingMax, this=this, expression=self._parse_column(), max=max 7009 ) 7010 7011 return this 7012 7013 def _parse_window( 7014 self, this: t.Optional[exp.Expression], alias: bool = False 7015 ) -> t.Optional[exp.Expression]: 7016 func = this 7017 comments = func.comments if isinstance(func, exp.Expression) else None 7018 7019 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7020 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7021 if self._match_text_seq("WITHIN", "GROUP"): 7022 order = self._parse_wrapped(self._parse_order) 7023 this = self.expression(exp.WithinGroup, this=this, expression=order) 7024 7025 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7026 self._match(TokenType.WHERE) 7027 this = self.expression( 7028 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7029 ) 7030 self._match_r_paren() 7031 7032 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7033 # Some dialects choose to implement and some do not. 7034 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7035 7036 # There is some code above in _parse_lambda that handles 7037 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7038 7039 # The below changes handle 7040 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7041 7042 # Oracle allows both formats 7043 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7044 # and Snowflake chose to do the same for familiarity 7045 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7046 if isinstance(this, exp.AggFunc): 7047 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7048 7049 if ignore_respect and ignore_respect is not this: 7050 ignore_respect.replace(ignore_respect.this) 7051 this = self.expression(ignore_respect.__class__, this=this) 7052 7053 this = self._parse_respect_or_ignore_nulls(this) 7054 7055 # bigquery select from window x AS (partition by ...) 7056 if alias: 7057 over = None 7058 self._match(TokenType.ALIAS) 7059 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7060 return this 7061 else: 7062 over = self._prev.text.upper() 7063 7064 if comments and isinstance(func, exp.Expression): 7065 func.pop_comments() 7066 7067 if not self._match(TokenType.L_PAREN): 7068 return self.expression( 7069 exp.Window, 7070 comments=comments, 7071 this=this, 7072 alias=self._parse_id_var(False), 7073 over=over, 7074 ) 7075 7076 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7077 7078 first = self._match(TokenType.FIRST) 7079 if self._match_text_seq("LAST"): 7080 first = False 7081 7082 partition, order = self._parse_partition_and_order() 7083 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7084 7085 if kind: 7086 self._match(TokenType.BETWEEN) 7087 start = self._parse_window_spec() 7088 7089 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 7090 exclude = ( 7091 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7092 if self._match_text_seq("EXCLUDE") 7093 else None 7094 ) 7095 7096 spec = self.expression( 7097 exp.WindowSpec, 7098 kind=kind, 7099 start=start["value"], 7100 start_side=start["side"], 7101 end=end.get("value"), 7102 end_side=end.get("side"), 7103 exclude=exclude, 7104 ) 7105 else: 7106 spec = None 7107 7108 self._match_r_paren() 7109 7110 window = self.expression( 7111 exp.Window, 7112 comments=comments, 7113 this=this, 7114 partition_by=partition, 7115 order=order, 7116 spec=spec, 7117 alias=window_alias, 7118 over=over, 7119 first=first, 7120 ) 7121 7122 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7123 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7124 return self._parse_window(window, alias=alias) 7125 7126 return window 7127 7128 def _parse_partition_and_order( 7129 self, 7130 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7131 return self._parse_partition_by(), self._parse_order() 7132 7133 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7134 self._match(TokenType.BETWEEN) 7135 7136 return { 7137 "value": ( 7138 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7139 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7140 or self._parse_type() 7141 ), 7142 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7143 } 7144 7145 def _parse_alias( 7146 self, this: t.Optional[exp.Expression], explicit: bool = False 7147 ) -> t.Optional[exp.Expression]: 7148 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7149 # so this section tries to parse the clause version and if it fails, it treats the token 7150 # as an identifier (alias) 7151 if self._can_parse_limit_or_offset(): 7152 return this 7153 7154 any_token = self._match(TokenType.ALIAS) 7155 comments = self._prev_comments or [] 7156 7157 if explicit and not any_token: 7158 return this 7159 7160 if self._match(TokenType.L_PAREN): 7161 aliases = self.expression( 7162 exp.Aliases, 7163 comments=comments, 7164 this=this, 7165 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7166 ) 7167 self._match_r_paren(aliases) 7168 return aliases 7169 7170 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7171 self.STRING_ALIASES and self._parse_string_as_identifier() 7172 ) 7173 7174 if alias: 7175 comments.extend(alias.pop_comments()) 7176 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7177 column = this.this 7178 7179 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7180 if not this.comments and column and column.comments: 7181 this.comments = column.pop_comments() 7182 7183 return this 7184 7185 def _parse_id_var( 7186 self, 7187 any_token: bool = True, 7188 tokens: t.Optional[t.Collection[TokenType]] = None, 7189 ) -> t.Optional[exp.Expression]: 7190 expression = self._parse_identifier() 7191 if not expression and ( 7192 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7193 ): 7194 quoted = self._prev.token_type == TokenType.STRING 7195 expression = self._identifier_expression(quoted=quoted) 7196 7197 return expression 7198 7199 def _parse_string(self) -> t.Optional[exp.Expression]: 7200 if self._match_set(self.STRING_PARSERS): 7201 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7202 return self._parse_placeholder() 7203 7204 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7205 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7206 if output: 7207 output.update_positions(self._prev) 7208 return output 7209 7210 def _parse_number(self) -> t.Optional[exp.Expression]: 7211 if self._match_set(self.NUMERIC_PARSERS): 7212 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7213 return self._parse_placeholder() 7214 7215 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7216 if self._match(TokenType.IDENTIFIER): 7217 return self._identifier_expression(quoted=True) 7218 return self._parse_placeholder() 7219 7220 def _parse_var( 7221 self, 7222 any_token: bool = False, 7223 tokens: t.Optional[t.Collection[TokenType]] = None, 7224 upper: bool = False, 7225 ) -> t.Optional[exp.Expression]: 7226 if ( 7227 (any_token and self._advance_any()) 7228 or self._match(TokenType.VAR) 7229 or (self._match_set(tokens) if tokens else False) 7230 ): 7231 return self.expression( 7232 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7233 ) 7234 return self._parse_placeholder() 7235 7236 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7237 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7238 self._advance() 7239 return self._prev 7240 return None 7241 7242 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7243 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7244 7245 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7246 return self._parse_primary() or self._parse_var(any_token=True) 7247 7248 def _parse_null(self) -> t.Optional[exp.Expression]: 7249 if self._match_set(self.NULL_TOKENS): 7250 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7251 return self._parse_placeholder() 7252 7253 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7254 if self._match(TokenType.TRUE): 7255 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7256 if self._match(TokenType.FALSE): 7257 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7258 return self._parse_placeholder() 7259 7260 def _parse_star(self) -> t.Optional[exp.Expression]: 7261 if self._match(TokenType.STAR): 7262 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7263 return self._parse_placeholder() 7264 7265 def _parse_parameter(self) -> exp.Parameter: 7266 this = self._parse_identifier() or self._parse_primary_or_var() 7267 return self.expression(exp.Parameter, this=this) 7268 7269 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7270 if self._match_set(self.PLACEHOLDER_PARSERS): 7271 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7272 if placeholder: 7273 return placeholder 7274 self._advance(-1) 7275 return None 7276 7277 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7278 if not self._match_texts(keywords): 7279 return None 7280 if self._match(TokenType.L_PAREN, advance=False): 7281 return self._parse_wrapped_csv(self._parse_expression) 7282 7283 expression = self._parse_expression() 7284 return [expression] if expression else None 7285 7286 def _parse_csv( 7287 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7288 ) -> t.List[exp.Expression]: 7289 parse_result = parse_method() 7290 items = [parse_result] if parse_result is not None else [] 7291 7292 while self._match(sep): 7293 self._add_comments(parse_result) 7294 parse_result = parse_method() 7295 if parse_result is not None: 7296 items.append(parse_result) 7297 7298 return items 7299 7300 def _parse_tokens( 7301 self, parse_method: t.Callable, expressions: t.Dict 7302 ) -> t.Optional[exp.Expression]: 7303 this = parse_method() 7304 7305 while self._match_set(expressions): 7306 this = self.expression( 7307 expressions[self._prev.token_type], 7308 this=this, 7309 comments=self._prev_comments, 7310 expression=parse_method(), 7311 ) 7312 7313 return this 7314 7315 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7316 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7317 7318 def _parse_wrapped_csv( 7319 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7320 ) -> t.List[exp.Expression]: 7321 return self._parse_wrapped( 7322 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7323 ) 7324 7325 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7326 wrapped = self._match(TokenType.L_PAREN) 7327 if not wrapped and not optional: 7328 self.raise_error("Expecting (") 7329 parse_result = parse_method() 7330 if wrapped: 7331 self._match_r_paren() 7332 return parse_result 7333 7334 def _parse_expressions(self) -> t.List[exp.Expression]: 7335 return self._parse_csv(self._parse_expression) 7336 7337 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7338 return ( 7339 self._parse_set_operations( 7340 self._parse_alias(self._parse_assignment(), explicit=True) 7341 if alias 7342 else self._parse_assignment() 7343 ) 7344 or self._parse_select() 7345 ) 7346 7347 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7348 return self._parse_query_modifiers( 7349 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7350 ) 7351 7352 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7353 this = None 7354 if self._match_texts(self.TRANSACTION_KIND): 7355 this = self._prev.text 7356 7357 self._match_texts(("TRANSACTION", "WORK")) 7358 7359 modes = [] 7360 while True: 7361 mode = [] 7362 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 7363 mode.append(self._prev.text) 7364 7365 if mode: 7366 modes.append(" ".join(mode)) 7367 if not self._match(TokenType.COMMA): 7368 break 7369 7370 return self.expression(exp.Transaction, this=this, modes=modes) 7371 7372 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7373 chain = None 7374 savepoint = None 7375 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7376 7377 self._match_texts(("TRANSACTION", "WORK")) 7378 7379 if self._match_text_seq("TO"): 7380 self._match_text_seq("SAVEPOINT") 7381 savepoint = self._parse_id_var() 7382 7383 if self._match(TokenType.AND): 7384 chain = not self._match_text_seq("NO") 7385 self._match_text_seq("CHAIN") 7386 7387 if is_rollback: 7388 return self.expression(exp.Rollback, savepoint=savepoint) 7389 7390 return self.expression(exp.Commit, chain=chain) 7391 7392 def _parse_refresh(self) -> exp.Refresh: 7393 self._match(TokenType.TABLE) 7394 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7395 7396 def _parse_column_def_with_exists(self): 7397 start = self._index 7398 self._match(TokenType.COLUMN) 7399 7400 exists_column = self._parse_exists(not_=True) 7401 expression = self._parse_field_def() 7402 7403 if not isinstance(expression, exp.ColumnDef): 7404 self._retreat(start) 7405 return None 7406 7407 expression.set("exists", exists_column) 7408 7409 return expression 7410 7411 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7412 if not self._prev.text.upper() == "ADD": 7413 return None 7414 7415 expression = self._parse_column_def_with_exists() 7416 if not expression: 7417 return None 7418 7419 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7420 if self._match_texts(("FIRST", "AFTER")): 7421 position = self._prev.text 7422 column_position = self.expression( 7423 exp.ColumnPosition, this=self._parse_column(), position=position 7424 ) 7425 expression.set("position", column_position) 7426 7427 return expression 7428 7429 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7430 drop = self._match(TokenType.DROP) and self._parse_drop() 7431 if drop and not isinstance(drop, exp.Command): 7432 drop.set("kind", drop.args.get("kind", "COLUMN")) 7433 return drop 7434 7435 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7436 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7437 return self.expression( 7438 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7439 ) 7440 7441 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7442 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7443 self._match_text_seq("ADD") 7444 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7445 return self.expression( 7446 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7447 ) 7448 7449 column_def = self._parse_add_column() 7450 if isinstance(column_def, exp.ColumnDef): 7451 return column_def 7452 7453 exists = self._parse_exists(not_=True) 7454 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7455 return self.expression( 7456 exp.AddPartition, 7457 exists=exists, 7458 this=self._parse_field(any_token=True), 7459 location=self._match_text_seq("LOCATION", advance=False) 7460 and self._parse_property(), 7461 ) 7462 7463 return None 7464 7465 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7466 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7467 or self._match_text_seq("COLUMNS") 7468 ): 7469 schema = self._parse_schema() 7470 7471 return ( 7472 ensure_list(schema) 7473 if schema 7474 else self._parse_csv(self._parse_column_def_with_exists) 7475 ) 7476 7477 return self._parse_csv(_parse_add_alteration) 7478 7479 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7480 if self._match_texts(self.ALTER_ALTER_PARSERS): 7481 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7482 7483 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7484 # keyword after ALTER we default to parsing this statement 7485 self._match(TokenType.COLUMN) 7486 column = self._parse_field(any_token=True) 7487 7488 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7489 return self.expression(exp.AlterColumn, this=column, drop=True) 7490 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7491 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7492 if self._match(TokenType.COMMENT): 7493 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7494 if self._match_text_seq("DROP", "NOT", "NULL"): 7495 return self.expression( 7496 exp.AlterColumn, 7497 this=column, 7498 drop=True, 7499 allow_null=True, 7500 ) 7501 if self._match_text_seq("SET", "NOT", "NULL"): 7502 return self.expression( 7503 exp.AlterColumn, 7504 this=column, 7505 allow_null=False, 7506 ) 7507 7508 if self._match_text_seq("SET", "VISIBLE"): 7509 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7510 if self._match_text_seq("SET", "INVISIBLE"): 7511 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7512 7513 self._match_text_seq("SET", "DATA") 7514 self._match_text_seq("TYPE") 7515 return self.expression( 7516 exp.AlterColumn, 7517 this=column, 7518 dtype=self._parse_types(), 7519 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7520 using=self._match(TokenType.USING) and self._parse_assignment(), 7521 ) 7522 7523 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7524 if self._match_texts(("ALL", "EVEN", "AUTO")): 7525 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7526 7527 self._match_text_seq("KEY", "DISTKEY") 7528 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7529 7530 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7531 if compound: 7532 self._match_text_seq("SORTKEY") 7533 7534 if self._match(TokenType.L_PAREN, advance=False): 7535 return self.expression( 7536 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7537 ) 7538 7539 self._match_texts(("AUTO", "NONE")) 7540 return self.expression( 7541 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7542 ) 7543 7544 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7545 index = self._index - 1 7546 7547 partition_exists = self._parse_exists() 7548 if self._match(TokenType.PARTITION, advance=False): 7549 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7550 7551 self._retreat(index) 7552 return self._parse_csv(self._parse_drop_column) 7553 7554 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7555 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7556 exists = self._parse_exists() 7557 old_column = self._parse_column() 7558 to = self._match_text_seq("TO") 7559 new_column = self._parse_column() 7560 7561 if old_column is None or to is None or new_column is None: 7562 return None 7563 7564 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7565 7566 self._match_text_seq("TO") 7567 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7568 7569 def _parse_alter_table_set(self) -> exp.AlterSet: 7570 alter_set = self.expression(exp.AlterSet) 7571 7572 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7573 "TABLE", "PROPERTIES" 7574 ): 7575 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7576 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7577 alter_set.set("expressions", [self._parse_assignment()]) 7578 elif self._match_texts(("LOGGED", "UNLOGGED")): 7579 alter_set.set("option", exp.var(self._prev.text.upper())) 7580 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7581 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7582 elif self._match_text_seq("LOCATION"): 7583 alter_set.set("location", self._parse_field()) 7584 elif self._match_text_seq("ACCESS", "METHOD"): 7585 alter_set.set("access_method", self._parse_field()) 7586 elif self._match_text_seq("TABLESPACE"): 7587 alter_set.set("tablespace", self._parse_field()) 7588 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7589 alter_set.set("file_format", [self._parse_field()]) 7590 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7591 alter_set.set("file_format", self._parse_wrapped_options()) 7592 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7593 alter_set.set("copy_options", self._parse_wrapped_options()) 7594 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7595 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7596 else: 7597 if self._match_text_seq("SERDE"): 7598 alter_set.set("serde", self._parse_field()) 7599 7600 properties = self._parse_wrapped(self._parse_properties, optional=True) 7601 alter_set.set("expressions", [properties]) 7602 7603 return alter_set 7604 7605 def _parse_alter_session(self) -> exp.AlterSession: 7606 """Parse ALTER SESSION SET/UNSET statements.""" 7607 if self._match(TokenType.SET): 7608 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 7609 return self.expression(exp.AlterSession, expressions=expressions, unset=False) 7610 7611 self._match_text_seq("UNSET") 7612 expressions = self._parse_csv( 7613 lambda: self.expression(exp.SetItem, this=self._parse_id_var(any_token=True)) 7614 ) 7615 return self.expression(exp.AlterSession, expressions=expressions, unset=True) 7616 7617 def _parse_alter(self) -> exp.Alter | exp.Command: 7618 start = self._prev 7619 7620 alter_token = self._match_set(self.ALTERABLES) and self._prev 7621 if not alter_token: 7622 return self._parse_as_command(start) 7623 7624 exists = self._parse_exists() 7625 only = self._match_text_seq("ONLY") 7626 7627 if alter_token.token_type == TokenType.SESSION: 7628 this = None 7629 check = None 7630 cluster = None 7631 else: 7632 this = self._parse_table(schema=True) 7633 check = self._match_text_seq("WITH", "CHECK") 7634 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7635 7636 if self._next: 7637 self._advance() 7638 7639 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7640 if parser: 7641 actions = ensure_list(parser(self)) 7642 not_valid = self._match_text_seq("NOT", "VALID") 7643 options = self._parse_csv(self._parse_property) 7644 7645 if not self._curr and actions: 7646 return self.expression( 7647 exp.Alter, 7648 this=this, 7649 kind=alter_token.text.upper(), 7650 exists=exists, 7651 actions=actions, 7652 only=only, 7653 options=options, 7654 cluster=cluster, 7655 not_valid=not_valid, 7656 check=check, 7657 ) 7658 7659 return self._parse_as_command(start) 7660 7661 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7662 start = self._prev 7663 # https://duckdb.org/docs/sql/statements/analyze 7664 if not self._curr: 7665 return self.expression(exp.Analyze) 7666 7667 options = [] 7668 while self._match_texts(self.ANALYZE_STYLES): 7669 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7670 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7671 else: 7672 options.append(self._prev.text.upper()) 7673 7674 this: t.Optional[exp.Expression] = None 7675 inner_expression: t.Optional[exp.Expression] = None 7676 7677 kind = self._curr and self._curr.text.upper() 7678 7679 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7680 this = self._parse_table_parts() 7681 elif self._match_text_seq("TABLES"): 7682 if self._match_set((TokenType.FROM, TokenType.IN)): 7683 kind = f"{kind} {self._prev.text.upper()}" 7684 this = self._parse_table(schema=True, is_db_reference=True) 7685 elif self._match_text_seq("DATABASE"): 7686 this = self._parse_table(schema=True, is_db_reference=True) 7687 elif self._match_text_seq("CLUSTER"): 7688 this = self._parse_table() 7689 # Try matching inner expr keywords before fallback to parse table. 7690 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7691 kind = None 7692 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7693 else: 7694 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7695 kind = None 7696 this = self._parse_table_parts() 7697 7698 partition = self._try_parse(self._parse_partition) 7699 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7700 return self._parse_as_command(start) 7701 7702 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7703 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7704 "WITH", "ASYNC", "MODE" 7705 ): 7706 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7707 else: 7708 mode = None 7709 7710 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7711 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7712 7713 properties = self._parse_properties() 7714 return self.expression( 7715 exp.Analyze, 7716 kind=kind, 7717 this=this, 7718 mode=mode, 7719 partition=partition, 7720 properties=properties, 7721 expression=inner_expression, 7722 options=options, 7723 ) 7724 7725 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7726 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7727 this = None 7728 kind = self._prev.text.upper() 7729 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7730 expressions = [] 7731 7732 if not self._match_text_seq("STATISTICS"): 7733 self.raise_error("Expecting token STATISTICS") 7734 7735 if self._match_text_seq("NOSCAN"): 7736 this = "NOSCAN" 7737 elif self._match(TokenType.FOR): 7738 if self._match_text_seq("ALL", "COLUMNS"): 7739 this = "FOR ALL COLUMNS" 7740 if self._match_texts("COLUMNS"): 7741 this = "FOR COLUMNS" 7742 expressions = self._parse_csv(self._parse_column_reference) 7743 elif self._match_text_seq("SAMPLE"): 7744 sample = self._parse_number() 7745 expressions = [ 7746 self.expression( 7747 exp.AnalyzeSample, 7748 sample=sample, 7749 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7750 ) 7751 ] 7752 7753 return self.expression( 7754 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7755 ) 7756 7757 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7758 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7759 kind = None 7760 this = None 7761 expression: t.Optional[exp.Expression] = None 7762 if self._match_text_seq("REF", "UPDATE"): 7763 kind = "REF" 7764 this = "UPDATE" 7765 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7766 this = "UPDATE SET DANGLING TO NULL" 7767 elif self._match_text_seq("STRUCTURE"): 7768 kind = "STRUCTURE" 7769 if self._match_text_seq("CASCADE", "FAST"): 7770 this = "CASCADE FAST" 7771 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7772 ("ONLINE", "OFFLINE") 7773 ): 7774 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7775 expression = self._parse_into() 7776 7777 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7778 7779 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7780 this = self._prev.text.upper() 7781 if self._match_text_seq("COLUMNS"): 7782 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7783 return None 7784 7785 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7786 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7787 if self._match_text_seq("STATISTICS"): 7788 return self.expression(exp.AnalyzeDelete, kind=kind) 7789 return None 7790 7791 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7792 if self._match_text_seq("CHAINED", "ROWS"): 7793 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7794 return None 7795 7796 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7797 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7798 this = self._prev.text.upper() 7799 expression: t.Optional[exp.Expression] = None 7800 expressions = [] 7801 update_options = None 7802 7803 if self._match_text_seq("HISTOGRAM", "ON"): 7804 expressions = self._parse_csv(self._parse_column_reference) 7805 with_expressions = [] 7806 while self._match(TokenType.WITH): 7807 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7808 if self._match_texts(("SYNC", "ASYNC")): 7809 if self._match_text_seq("MODE", advance=False): 7810 with_expressions.append(f"{self._prev.text.upper()} MODE") 7811 self._advance() 7812 else: 7813 buckets = self._parse_number() 7814 if self._match_text_seq("BUCKETS"): 7815 with_expressions.append(f"{buckets} BUCKETS") 7816 if with_expressions: 7817 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7818 7819 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7820 TokenType.UPDATE, advance=False 7821 ): 7822 update_options = self._prev.text.upper() 7823 self._advance() 7824 elif self._match_text_seq("USING", "DATA"): 7825 expression = self.expression(exp.UsingData, this=self._parse_string()) 7826 7827 return self.expression( 7828 exp.AnalyzeHistogram, 7829 this=this, 7830 expressions=expressions, 7831 expression=expression, 7832 update_options=update_options, 7833 ) 7834 7835 def _parse_merge(self) -> exp.Merge: 7836 self._match(TokenType.INTO) 7837 target = self._parse_table() 7838 7839 if target and self._match(TokenType.ALIAS, advance=False): 7840 target.set("alias", self._parse_table_alias()) 7841 7842 self._match(TokenType.USING) 7843 using = self._parse_table() 7844 7845 self._match(TokenType.ON) 7846 on = self._parse_assignment() 7847 7848 return self.expression( 7849 exp.Merge, 7850 this=target, 7851 using=using, 7852 on=on, 7853 whens=self._parse_when_matched(), 7854 returning=self._parse_returning(), 7855 ) 7856 7857 def _parse_when_matched(self) -> exp.Whens: 7858 whens = [] 7859 7860 while self._match(TokenType.WHEN): 7861 matched = not self._match(TokenType.NOT) 7862 self._match_text_seq("MATCHED") 7863 source = ( 7864 False 7865 if self._match_text_seq("BY", "TARGET") 7866 else self._match_text_seq("BY", "SOURCE") 7867 ) 7868 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7869 7870 self._match(TokenType.THEN) 7871 7872 if self._match(TokenType.INSERT): 7873 this = self._parse_star() 7874 if this: 7875 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7876 else: 7877 then = self.expression( 7878 exp.Insert, 7879 this=exp.var("ROW") 7880 if self._match_text_seq("ROW") 7881 else self._parse_value(values=False), 7882 expression=self._match_text_seq("VALUES") and self._parse_value(), 7883 ) 7884 elif self._match(TokenType.UPDATE): 7885 expressions = self._parse_star() 7886 if expressions: 7887 then = self.expression(exp.Update, expressions=expressions) 7888 else: 7889 then = self.expression( 7890 exp.Update, 7891 expressions=self._match(TokenType.SET) 7892 and self._parse_csv(self._parse_equality), 7893 ) 7894 elif self._match(TokenType.DELETE): 7895 then = self.expression(exp.Var, this=self._prev.text) 7896 else: 7897 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7898 7899 whens.append( 7900 self.expression( 7901 exp.When, 7902 matched=matched, 7903 source=source, 7904 condition=condition, 7905 then=then, 7906 ) 7907 ) 7908 return self.expression(exp.Whens, expressions=whens) 7909 7910 def _parse_show(self) -> t.Optional[exp.Expression]: 7911 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7912 if parser: 7913 return parser(self) 7914 return self._parse_as_command(self._prev) 7915 7916 def _parse_set_item_assignment( 7917 self, kind: t.Optional[str] = None 7918 ) -> t.Optional[exp.Expression]: 7919 index = self._index 7920 7921 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7922 return self._parse_set_transaction(global_=kind == "GLOBAL") 7923 7924 left = self._parse_primary() or self._parse_column() 7925 assignment_delimiter = self._match_texts(("=", "TO")) 7926 7927 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7928 self._retreat(index) 7929 return None 7930 7931 right = self._parse_statement() or self._parse_id_var() 7932 if isinstance(right, (exp.Column, exp.Identifier)): 7933 right = exp.var(right.name) 7934 7935 this = self.expression(exp.EQ, this=left, expression=right) 7936 return self.expression(exp.SetItem, this=this, kind=kind) 7937 7938 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7939 self._match_text_seq("TRANSACTION") 7940 characteristics = self._parse_csv( 7941 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7942 ) 7943 return self.expression( 7944 exp.SetItem, 7945 expressions=characteristics, 7946 kind="TRANSACTION", 7947 **{"global": global_}, # type: ignore 7948 ) 7949 7950 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7951 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7952 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7953 7954 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7955 index = self._index 7956 set_ = self.expression( 7957 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7958 ) 7959 7960 if self._curr: 7961 self._retreat(index) 7962 return self._parse_as_command(self._prev) 7963 7964 return set_ 7965 7966 def _parse_var_from_options( 7967 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7968 ) -> t.Optional[exp.Var]: 7969 start = self._curr 7970 if not start: 7971 return None 7972 7973 option = start.text.upper() 7974 continuations = options.get(option) 7975 7976 index = self._index 7977 self._advance() 7978 for keywords in continuations or []: 7979 if isinstance(keywords, str): 7980 keywords = (keywords,) 7981 7982 if self._match_text_seq(*keywords): 7983 option = f"{option} {' '.join(keywords)}" 7984 break 7985 else: 7986 if continuations or continuations is None: 7987 if raise_unmatched: 7988 self.raise_error(f"Unknown option {option}") 7989 7990 self._retreat(index) 7991 return None 7992 7993 return exp.var(option) 7994 7995 def _parse_as_command(self, start: Token) -> exp.Command: 7996 while self._curr: 7997 self._advance() 7998 text = self._find_sql(start, self._prev) 7999 size = len(start.text) 8000 self._warn_unsupported() 8001 return exp.Command(this=text[:size], expression=text[size:]) 8002 8003 def _parse_dict_property(self, this: str) -> exp.DictProperty: 8004 settings = [] 8005 8006 self._match_l_paren() 8007 kind = self._parse_id_var() 8008 8009 if self._match(TokenType.L_PAREN): 8010 while True: 8011 key = self._parse_id_var() 8012 value = self._parse_primary() 8013 if not key and value is None: 8014 break 8015 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 8016 self._match(TokenType.R_PAREN) 8017 8018 self._match_r_paren() 8019 8020 return self.expression( 8021 exp.DictProperty, 8022 this=this, 8023 kind=kind.this if kind else None, 8024 settings=settings, 8025 ) 8026 8027 def _parse_dict_range(self, this: str) -> exp.DictRange: 8028 self._match_l_paren() 8029 has_min = self._match_text_seq("MIN") 8030 if has_min: 8031 min = self._parse_var() or self._parse_primary() 8032 self._match_text_seq("MAX") 8033 max = self._parse_var() or self._parse_primary() 8034 else: 8035 max = self._parse_var() or self._parse_primary() 8036 min = exp.Literal.number(0) 8037 self._match_r_paren() 8038 return self.expression(exp.DictRange, this=this, min=min, max=max) 8039 8040 def _parse_comprehension( 8041 self, this: t.Optional[exp.Expression] 8042 ) -> t.Optional[exp.Comprehension]: 8043 index = self._index 8044 expression = self._parse_column() 8045 if not self._match(TokenType.IN): 8046 self._retreat(index - 1) 8047 return None 8048 iterator = self._parse_column() 8049 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8050 return self.expression( 8051 exp.Comprehension, 8052 this=this, 8053 expression=expression, 8054 iterator=iterator, 8055 condition=condition, 8056 ) 8057 8058 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8059 if self._match(TokenType.HEREDOC_STRING): 8060 return self.expression(exp.Heredoc, this=self._prev.text) 8061 8062 if not self._match_text_seq("$"): 8063 return None 8064 8065 tags = ["$"] 8066 tag_text = None 8067 8068 if self._is_connected(): 8069 self._advance() 8070 tags.append(self._prev.text.upper()) 8071 else: 8072 self.raise_error("No closing $ found") 8073 8074 if tags[-1] != "$": 8075 if self._is_connected() and self._match_text_seq("$"): 8076 tag_text = tags[-1] 8077 tags.append("$") 8078 else: 8079 self.raise_error("No closing $ found") 8080 8081 heredoc_start = self._curr 8082 8083 while self._curr: 8084 if self._match_text_seq(*tags, advance=False): 8085 this = self._find_sql(heredoc_start, self._prev) 8086 self._advance(len(tags)) 8087 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8088 8089 self._advance() 8090 8091 self.raise_error(f"No closing {''.join(tags)} found") 8092 return None 8093 8094 def _find_parser( 8095 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8096 ) -> t.Optional[t.Callable]: 8097 if not self._curr: 8098 return None 8099 8100 index = self._index 8101 this = [] 8102 while True: 8103 # The current token might be multiple words 8104 curr = self._curr.text.upper() 8105 key = curr.split(" ") 8106 this.append(curr) 8107 8108 self._advance() 8109 result, trie = in_trie(trie, key) 8110 if result == TrieResult.FAILED: 8111 break 8112 8113 if result == TrieResult.EXISTS: 8114 subparser = parsers[" ".join(this)] 8115 return subparser 8116 8117 self._retreat(index) 8118 return None 8119 8120 def _match(self, token_type, advance=True, expression=None): 8121 if not self._curr: 8122 return None 8123 8124 if self._curr.token_type == token_type: 8125 if advance: 8126 self._advance() 8127 self._add_comments(expression) 8128 return True 8129 8130 return None 8131 8132 def _match_set(self, types, advance=True): 8133 if not self._curr: 8134 return None 8135 8136 if self._curr.token_type in types: 8137 if advance: 8138 self._advance() 8139 return True 8140 8141 return None 8142 8143 def _match_pair(self, token_type_a, token_type_b, advance=True): 8144 if not self._curr or not self._next: 8145 return None 8146 8147 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8148 if advance: 8149 self._advance(2) 8150 return True 8151 8152 return None 8153 8154 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8155 if not self._match(TokenType.L_PAREN, expression=expression): 8156 self.raise_error("Expecting (") 8157 8158 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8159 if not self._match(TokenType.R_PAREN, expression=expression): 8160 self.raise_error("Expecting )") 8161 8162 def _match_texts(self, texts, advance=True): 8163 if ( 8164 self._curr 8165 and self._curr.token_type != TokenType.STRING 8166 and self._curr.text.upper() in texts 8167 ): 8168 if advance: 8169 self._advance() 8170 return True 8171 return None 8172 8173 def _match_text_seq(self, *texts, advance=True): 8174 index = self._index 8175 for text in texts: 8176 if ( 8177 self._curr 8178 and self._curr.token_type != TokenType.STRING 8179 and self._curr.text.upper() == text 8180 ): 8181 self._advance() 8182 else: 8183 self._retreat(index) 8184 return None 8185 8186 if not advance: 8187 self._retreat(index) 8188 8189 return True 8190 8191 def _replace_lambda( 8192 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8193 ) -> t.Optional[exp.Expression]: 8194 if not node: 8195 return node 8196 8197 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8198 8199 for column in node.find_all(exp.Column): 8200 typ = lambda_types.get(column.parts[0].name) 8201 if typ is not None: 8202 dot_or_id = column.to_dot() if column.table else column.this 8203 8204 if typ: 8205 dot_or_id = self.expression( 8206 exp.Cast, 8207 this=dot_or_id, 8208 to=typ, 8209 ) 8210 8211 parent = column.parent 8212 8213 while isinstance(parent, exp.Dot): 8214 if not isinstance(parent.parent, exp.Dot): 8215 parent.replace(dot_or_id) 8216 break 8217 parent = parent.parent 8218 else: 8219 if column is node: 8220 node = dot_or_id 8221 else: 8222 column.replace(dot_or_id) 8223 return node 8224 8225 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8226 start = self._prev 8227 8228 # Not to be confused with TRUNCATE(number, decimals) function call 8229 if self._match(TokenType.L_PAREN): 8230 self._retreat(self._index - 2) 8231 return self._parse_function() 8232 8233 # Clickhouse supports TRUNCATE DATABASE as well 8234 is_database = self._match(TokenType.DATABASE) 8235 8236 self._match(TokenType.TABLE) 8237 8238 exists = self._parse_exists(not_=False) 8239 8240 expressions = self._parse_csv( 8241 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8242 ) 8243 8244 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8245 8246 if self._match_text_seq("RESTART", "IDENTITY"): 8247 identity = "RESTART" 8248 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8249 identity = "CONTINUE" 8250 else: 8251 identity = None 8252 8253 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8254 option = self._prev.text 8255 else: 8256 option = None 8257 8258 partition = self._parse_partition() 8259 8260 # Fallback case 8261 if self._curr: 8262 return self._parse_as_command(start) 8263 8264 return self.expression( 8265 exp.TruncateTable, 8266 expressions=expressions, 8267 is_database=is_database, 8268 exists=exists, 8269 cluster=cluster, 8270 identity=identity, 8271 option=option, 8272 partition=partition, 8273 ) 8274 8275 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8276 this = self._parse_ordered(self._parse_opclass) 8277 8278 if not self._match(TokenType.WITH): 8279 return this 8280 8281 op = self._parse_var(any_token=True) 8282 8283 return self.expression(exp.WithOperator, this=this, op=op) 8284 8285 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8286 self._match(TokenType.EQ) 8287 self._match(TokenType.L_PAREN) 8288 8289 opts: t.List[t.Optional[exp.Expression]] = [] 8290 option: exp.Expression | None 8291 while self._curr and not self._match(TokenType.R_PAREN): 8292 if self._match_text_seq("FORMAT_NAME", "="): 8293 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8294 option = self._parse_format_name() 8295 else: 8296 option = self._parse_property() 8297 8298 if option is None: 8299 self.raise_error("Unable to parse option") 8300 break 8301 8302 opts.append(option) 8303 8304 return opts 8305 8306 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8307 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8308 8309 options = [] 8310 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8311 option = self._parse_var(any_token=True) 8312 prev = self._prev.text.upper() 8313 8314 # Different dialects might separate options and values by white space, "=" and "AS" 8315 self._match(TokenType.EQ) 8316 self._match(TokenType.ALIAS) 8317 8318 param = self.expression(exp.CopyParameter, this=option) 8319 8320 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8321 TokenType.L_PAREN, advance=False 8322 ): 8323 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8324 param.set("expressions", self._parse_wrapped_options()) 8325 elif prev == "FILE_FORMAT": 8326 # T-SQL's external file format case 8327 param.set("expression", self._parse_field()) 8328 else: 8329 param.set("expression", self._parse_unquoted_field()) 8330 8331 options.append(param) 8332 self._match(sep) 8333 8334 return options 8335 8336 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8337 expr = self.expression(exp.Credentials) 8338 8339 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8340 expr.set("storage", self._parse_field()) 8341 if self._match_text_seq("CREDENTIALS"): 8342 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8343 creds = ( 8344 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8345 ) 8346 expr.set("credentials", creds) 8347 if self._match_text_seq("ENCRYPTION"): 8348 expr.set("encryption", self._parse_wrapped_options()) 8349 if self._match_text_seq("IAM_ROLE"): 8350 expr.set("iam_role", self._parse_field()) 8351 if self._match_text_seq("REGION"): 8352 expr.set("region", self._parse_field()) 8353 8354 return expr 8355 8356 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8357 return self._parse_field() 8358 8359 def _parse_copy(self) -> exp.Copy | exp.Command: 8360 start = self._prev 8361 8362 self._match(TokenType.INTO) 8363 8364 this = ( 8365 self._parse_select(nested=True, parse_subquery_alias=False) 8366 if self._match(TokenType.L_PAREN, advance=False) 8367 else self._parse_table(schema=True) 8368 ) 8369 8370 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8371 8372 files = self._parse_csv(self._parse_file_location) 8373 credentials = self._parse_credentials() 8374 8375 self._match_text_seq("WITH") 8376 8377 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8378 8379 # Fallback case 8380 if self._curr: 8381 return self._parse_as_command(start) 8382 8383 return self.expression( 8384 exp.Copy, 8385 this=this, 8386 kind=kind, 8387 credentials=credentials, 8388 files=files, 8389 params=params, 8390 ) 8391 8392 def _parse_normalize(self) -> exp.Normalize: 8393 return self.expression( 8394 exp.Normalize, 8395 this=self._parse_bitwise(), 8396 form=self._match(TokenType.COMMA) and self._parse_var(), 8397 ) 8398 8399 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8400 args = self._parse_csv(lambda: self._parse_lambda()) 8401 8402 this = seq_get(args, 0) 8403 decimals = seq_get(args, 1) 8404 8405 return expr_type( 8406 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8407 ) 8408 8409 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8410 star_token = self._prev 8411 8412 if self._match_text_seq("COLUMNS", "(", advance=False): 8413 this = self._parse_function() 8414 if isinstance(this, exp.Columns): 8415 this.set("unpack", True) 8416 return this 8417 8418 return self.expression( 8419 exp.Star, 8420 **{ # type: ignore 8421 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8422 "replace": self._parse_star_op("REPLACE"), 8423 "rename": self._parse_star_op("RENAME"), 8424 }, 8425 ).update_positions(star_token) 8426 8427 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8428 privilege_parts = [] 8429 8430 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8431 # (end of privilege list) or L_PAREN (start of column list) are met 8432 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8433 privilege_parts.append(self._curr.text.upper()) 8434 self._advance() 8435 8436 this = exp.var(" ".join(privilege_parts)) 8437 expressions = ( 8438 self._parse_wrapped_csv(self._parse_column) 8439 if self._match(TokenType.L_PAREN, advance=False) 8440 else None 8441 ) 8442 8443 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8444 8445 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8446 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8447 principal = self._parse_id_var() 8448 8449 if not principal: 8450 return None 8451 8452 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8453 8454 def _parse_grant_revoke_common( 8455 self, 8456 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8457 privileges = self._parse_csv(self._parse_grant_privilege) 8458 8459 self._match(TokenType.ON) 8460 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8461 8462 # Attempt to parse the securable e.g. MySQL allows names 8463 # such as "foo.*", "*.*" which are not easily parseable yet 8464 securable = self._try_parse(self._parse_table_parts) 8465 8466 return privileges, kind, securable 8467 8468 def _parse_grant(self) -> exp.Grant | exp.Command: 8469 start = self._prev 8470 8471 privileges, kind, securable = self._parse_grant_revoke_common() 8472 8473 if not securable or not self._match_text_seq("TO"): 8474 return self._parse_as_command(start) 8475 8476 principals = self._parse_csv(self._parse_grant_principal) 8477 8478 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8479 8480 if self._curr: 8481 return self._parse_as_command(start) 8482 8483 return self.expression( 8484 exp.Grant, 8485 privileges=privileges, 8486 kind=kind, 8487 securable=securable, 8488 principals=principals, 8489 grant_option=grant_option, 8490 ) 8491 8492 def _parse_revoke(self) -> exp.Revoke | exp.Command: 8493 start = self._prev 8494 8495 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 8496 8497 privileges, kind, securable = self._parse_grant_revoke_common() 8498 8499 if not securable or not self._match_text_seq("FROM"): 8500 return self._parse_as_command(start) 8501 8502 principals = self._parse_csv(self._parse_grant_principal) 8503 8504 cascade = None 8505 if self._match_texts(("CASCADE", "RESTRICT")): 8506 cascade = self._prev.text.upper() 8507 8508 if self._curr: 8509 return self._parse_as_command(start) 8510 8511 return self.expression( 8512 exp.Revoke, 8513 privileges=privileges, 8514 kind=kind, 8515 securable=securable, 8516 principals=principals, 8517 grant_option=grant_option, 8518 cascade=cascade, 8519 ) 8520 8521 def _parse_overlay(self) -> exp.Overlay: 8522 return self.expression( 8523 exp.Overlay, 8524 **{ # type: ignore 8525 "this": self._parse_bitwise(), 8526 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8527 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8528 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8529 }, 8530 ) 8531 8532 def _parse_format_name(self) -> exp.Property: 8533 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8534 # for FILE_FORMAT = <format_name> 8535 return self.expression( 8536 exp.Property, 8537 this=exp.var("FORMAT_NAME"), 8538 value=self._parse_string() or self._parse_table_parts(), 8539 ) 8540 8541 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8542 args: t.List[exp.Expression] = [] 8543 8544 if self._match(TokenType.DISTINCT): 8545 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8546 self._match(TokenType.COMMA) 8547 8548 args.extend(self._parse_csv(self._parse_assignment)) 8549 8550 return self.expression( 8551 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8552 ) 8553 8554 def _identifier_expression( 8555 self, token: t.Optional[Token] = None, **kwargs: t.Any 8556 ) -> exp.Identifier: 8557 token = token or self._prev 8558 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8559 expression.update_positions(token) 8560 return expression 8561 8562 def _build_pipe_cte( 8563 self, 8564 query: exp.Query, 8565 expressions: t.List[exp.Expression], 8566 alias_cte: t.Optional[exp.TableAlias] = None, 8567 ) -> exp.Select: 8568 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8569 if alias_cte: 8570 new_cte = alias_cte 8571 else: 8572 self._pipe_cte_counter += 1 8573 new_cte = f"__tmp{self._pipe_cte_counter}" 8574 8575 with_ = query.args.get("with") 8576 ctes = with_.pop() if with_ else None 8577 8578 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8579 if ctes: 8580 new_select.set("with", ctes) 8581 8582 return new_select.with_(new_cte, as_=query, copy=False) 8583 8584 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8585 select = self._parse_select(consume_pipe=False) 8586 if not select: 8587 return query 8588 8589 return self._build_pipe_cte( 8590 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8591 ) 8592 8593 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8594 limit = self._parse_limit() 8595 offset = self._parse_offset() 8596 if limit: 8597 curr_limit = query.args.get("limit", limit) 8598 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8599 query.limit(limit, copy=False) 8600 if offset: 8601 curr_offset = query.args.get("offset") 8602 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8603 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8604 8605 return query 8606 8607 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8608 this = self._parse_assignment() 8609 if self._match_text_seq("GROUP", "AND", advance=False): 8610 return this 8611 8612 this = self._parse_alias(this) 8613 8614 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8615 return self._parse_ordered(lambda: this) 8616 8617 return this 8618 8619 def _parse_pipe_syntax_aggregate_group_order_by( 8620 self, query: exp.Select, group_by_exists: bool = True 8621 ) -> exp.Select: 8622 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8623 aggregates_or_groups, orders = [], [] 8624 for element in expr: 8625 if isinstance(element, exp.Ordered): 8626 this = element.this 8627 if isinstance(this, exp.Alias): 8628 element.set("this", this.args["alias"]) 8629 orders.append(element) 8630 else: 8631 this = element 8632 aggregates_or_groups.append(this) 8633 8634 if group_by_exists: 8635 query.select(*aggregates_or_groups, copy=False).group_by( 8636 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8637 copy=False, 8638 ) 8639 else: 8640 query.select(*aggregates_or_groups, append=False, copy=False) 8641 8642 if orders: 8643 return query.order_by(*orders, append=False, copy=False) 8644 8645 return query 8646 8647 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8648 self._match_text_seq("AGGREGATE") 8649 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8650 8651 if self._match(TokenType.GROUP_BY) or ( 8652 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8653 ): 8654 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8655 8656 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8657 8658 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8659 first_setop = self.parse_set_operation(this=query) 8660 if not first_setop: 8661 return None 8662 8663 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8664 expr = self._parse_paren() 8665 return expr.assert_is(exp.Subquery).unnest() if expr else None 8666 8667 first_setop.this.pop() 8668 8669 setops = [ 8670 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8671 *self._parse_csv(_parse_and_unwrap_query), 8672 ] 8673 8674 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8675 with_ = query.args.get("with") 8676 ctes = with_.pop() if with_ else None 8677 8678 if isinstance(first_setop, exp.Union): 8679 query = query.union(*setops, copy=False, **first_setop.args) 8680 elif isinstance(first_setop, exp.Except): 8681 query = query.except_(*setops, copy=False, **first_setop.args) 8682 else: 8683 query = query.intersect(*setops, copy=False, **first_setop.args) 8684 8685 query.set("with", ctes) 8686 8687 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8688 8689 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8690 join = self._parse_join() 8691 if not join: 8692 return None 8693 8694 if isinstance(query, exp.Select): 8695 return query.join(join, copy=False) 8696 8697 return query 8698 8699 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8700 pivots = self._parse_pivots() 8701 if not pivots: 8702 return query 8703 8704 from_ = query.args.get("from") 8705 if from_: 8706 from_.this.set("pivots", pivots) 8707 8708 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8709 8710 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8711 self._match_text_seq("EXTEND") 8712 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8713 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8714 8715 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8716 sample = self._parse_table_sample() 8717 8718 with_ = query.args.get("with") 8719 if with_: 8720 with_.expressions[-1].this.set("sample", sample) 8721 else: 8722 query.set("sample", sample) 8723 8724 return query 8725 8726 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8727 if isinstance(query, exp.Subquery): 8728 query = exp.select("*").from_(query, copy=False) 8729 8730 if not query.args.get("from"): 8731 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8732 8733 while self._match(TokenType.PIPE_GT): 8734 start = self._curr 8735 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8736 if not parser: 8737 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8738 # keywords, making it tricky to disambiguate them without lookahead. The approach 8739 # here is to try and parse a set operation and if that fails, then try to parse a 8740 # join operator. If that fails as well, then the operator is not supported. 8741 parsed_query = self._parse_pipe_syntax_set_operator(query) 8742 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8743 if not parsed_query: 8744 self._retreat(start) 8745 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8746 break 8747 query = parsed_query 8748 else: 8749 query = parser(self, query) 8750 8751 return query 8752 8753 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8754 vars = self._parse_csv(self._parse_id_var) 8755 if not vars: 8756 return None 8757 8758 return self.expression( 8759 exp.DeclareItem, 8760 this=vars, 8761 kind=self._parse_types(), 8762 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8763 ) 8764 8765 def _parse_declare(self) -> exp.Declare | exp.Command: 8766 start = self._prev 8767 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8768 8769 if not expressions or self._curr: 8770 return self._parse_as_command(start) 8771 8772 return self.expression(exp.Declare, expressions=expressions) 8773 8774 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8775 exp_class = exp.Cast if strict else exp.TryCast 8776 8777 if exp_class == exp.TryCast: 8778 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8779 8780 return self.expression(exp_class, **kwargs) 8781 8782 def _parse_json_value(self) -> exp.JSONValue: 8783 this = self._parse_bitwise() 8784 self._match(TokenType.COMMA) 8785 path = self._parse_bitwise() 8786 8787 returning = self._match(TokenType.RETURNING) and self._parse_type() 8788 8789 return self.expression( 8790 exp.JSONValue, 8791 this=this, 8792 path=self.dialect.to_json_path(path), 8793 returning=returning, 8794 on_condition=self._parse_on_condition(), 8795 ) 8796 8797 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 8798 def concat_exprs( 8799 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 8800 ) -> exp.Expression: 8801 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 8802 concat_exprs = [ 8803 self.expression(exp.Concat, expressions=node.expressions, safe=True) 8804 ] 8805 node.set("expressions", concat_exprs) 8806 return node 8807 if len(exprs) == 1: 8808 return exprs[0] 8809 return self.expression(exp.Concat, expressions=args, safe=True) 8810 8811 args = self._parse_csv(self._parse_lambda) 8812 8813 if args: 8814 order = args[-1] if isinstance(args[-1], exp.Order) else None 8815 8816 if order: 8817 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 8818 # remove 'expr' from exp.Order and add it back to args 8819 args[-1] = order.this 8820 order.set("this", concat_exprs(order.this, args)) 8821 8822 this = order or concat_exprs(args[0], args) 8823 else: 8824 this = None 8825 8826 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 8827 8828 return self.expression(exp.GroupConcat, this=this, separator=separator)
32def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 33 if len(args) == 1 and args[0].is_star: 34 return exp.StarMap(this=args[0]) 35 36 keys = [] 37 values = [] 38 for i in range(0, len(args), 2): 39 keys.append(args[i]) 40 values.append(args[i + 1]) 41 42 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
50def binary_range_parser( 51 expr_type: t.Type[exp.Expression], reverse_args: bool = False 52) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 53 def _parse_binary_range( 54 self: Parser, this: t.Optional[exp.Expression] 55 ) -> t.Optional[exp.Expression]: 56 expression = self._parse_bitwise() 57 if reverse_args: 58 this, expression = expression, this 59 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 60 61 return _parse_binary_range
64def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 65 # Default argument order is base, expression 66 this = seq_get(args, 0) 67 expression = seq_get(args, 1) 68 69 if expression: 70 if not dialect.LOG_BASE_FIRST: 71 this, expression = expression, this 72 return exp.Log(this=this, expression=expression) 73 74 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
94def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 95 def _builder(args: t.List, dialect: Dialect) -> E: 96 expression = expr_type( 97 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 98 ) 99 if len(args) > 2 and expr_type is exp.JSONExtract: 100 expression.set("expressions", args[2:]) 101 102 return expression 103 104 return _builder
107def build_mod(args: t.List) -> exp.Mod: 108 this = seq_get(args, 0) 109 expression = seq_get(args, 1) 110 111 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 112 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 113 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 114 115 return exp.Mod(this=this, expression=expression)
127def build_array_constructor( 128 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 129) -> exp.Expression: 130 array_exp = exp_class(expressions=args) 131 132 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 133 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 134 135 return array_exp
138def build_convert_timezone( 139 args: t.List, default_source_tz: t.Optional[str] = None 140) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 141 if len(args) == 2: 142 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 143 return exp.ConvertTimezone( 144 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 145 ) 146 147 return exp.ConvertTimezone.from_arg_list(args)
182class Parser(metaclass=_Parser): 183 """ 184 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 185 186 Args: 187 error_level: The desired error level. 188 Default: ErrorLevel.IMMEDIATE 189 error_message_context: The amount of context to capture from a query string when displaying 190 the error message (in number of characters). 191 Default: 100 192 max_errors: Maximum number of error messages to include in a raised ParseError. 193 This is only relevant if error_level is ErrorLevel.RAISE. 194 Default: 3 195 """ 196 197 FUNCTIONS: t.Dict[str, t.Callable] = { 198 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 199 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 200 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 201 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 202 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 203 ), 204 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 205 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 206 ), 207 "CHAR": lambda args: exp.Chr(expressions=args), 208 "CHR": lambda args: exp.Chr(expressions=args), 209 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 210 "CONCAT": lambda args, dialect: exp.Concat( 211 expressions=args, 212 safe=not dialect.STRICT_STRING_CONCAT, 213 coalesce=dialect.CONCAT_COALESCE, 214 ), 215 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 216 expressions=args, 217 safe=not dialect.STRICT_STRING_CONCAT, 218 coalesce=dialect.CONCAT_COALESCE, 219 ), 220 "CONVERT_TIMEZONE": build_convert_timezone, 221 "DATE_TO_DATE_STR": lambda args: exp.Cast( 222 this=seq_get(args, 0), 223 to=exp.DataType(this=exp.DataType.Type.TEXT), 224 ), 225 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 226 start=seq_get(args, 0), 227 end=seq_get(args, 1), 228 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 229 ), 230 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 231 "HEX": build_hex, 232 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 233 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 234 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 235 "LIKE": build_like, 236 "LOG": build_logarithm, 237 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 238 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 239 "LOWER": build_lower, 240 "LPAD": lambda args: build_pad(args), 241 "LEFTPAD": lambda args: build_pad(args), 242 "LTRIM": lambda args: build_trim(args), 243 "MOD": build_mod, 244 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 245 "RPAD": lambda args: build_pad(args, is_left=False), 246 "RTRIM": lambda args: build_trim(args, is_left=False), 247 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 248 if len(args) != 2 249 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 250 "STRPOS": exp.StrPosition.from_arg_list, 251 "CHARINDEX": lambda args: build_locate_strposition(args), 252 "INSTR": exp.StrPosition.from_arg_list, 253 "LOCATE": lambda args: build_locate_strposition(args), 254 "TIME_TO_TIME_STR": lambda args: exp.Cast( 255 this=seq_get(args, 0), 256 to=exp.DataType(this=exp.DataType.Type.TEXT), 257 ), 258 "TO_HEX": build_hex, 259 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 260 this=exp.Cast( 261 this=seq_get(args, 0), 262 to=exp.DataType(this=exp.DataType.Type.TEXT), 263 ), 264 start=exp.Literal.number(1), 265 length=exp.Literal.number(10), 266 ), 267 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 268 "UPPER": build_upper, 269 "VAR_MAP": build_var_map, 270 } 271 272 NO_PAREN_FUNCTIONS = { 273 TokenType.CURRENT_DATE: exp.CurrentDate, 274 TokenType.CURRENT_DATETIME: exp.CurrentDate, 275 TokenType.CURRENT_TIME: exp.CurrentTime, 276 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 277 TokenType.CURRENT_USER: exp.CurrentUser, 278 } 279 280 STRUCT_TYPE_TOKENS = { 281 TokenType.NESTED, 282 TokenType.OBJECT, 283 TokenType.STRUCT, 284 TokenType.UNION, 285 } 286 287 NESTED_TYPE_TOKENS = { 288 TokenType.ARRAY, 289 TokenType.LIST, 290 TokenType.LOWCARDINALITY, 291 TokenType.MAP, 292 TokenType.NULLABLE, 293 TokenType.RANGE, 294 *STRUCT_TYPE_TOKENS, 295 } 296 297 ENUM_TYPE_TOKENS = { 298 TokenType.DYNAMIC, 299 TokenType.ENUM, 300 TokenType.ENUM8, 301 TokenType.ENUM16, 302 } 303 304 AGGREGATE_TYPE_TOKENS = { 305 TokenType.AGGREGATEFUNCTION, 306 TokenType.SIMPLEAGGREGATEFUNCTION, 307 } 308 309 TYPE_TOKENS = { 310 TokenType.BIT, 311 TokenType.BOOLEAN, 312 TokenType.TINYINT, 313 TokenType.UTINYINT, 314 TokenType.SMALLINT, 315 TokenType.USMALLINT, 316 TokenType.INT, 317 TokenType.UINT, 318 TokenType.BIGINT, 319 TokenType.UBIGINT, 320 TokenType.INT128, 321 TokenType.UINT128, 322 TokenType.INT256, 323 TokenType.UINT256, 324 TokenType.MEDIUMINT, 325 TokenType.UMEDIUMINT, 326 TokenType.FIXEDSTRING, 327 TokenType.FLOAT, 328 TokenType.DOUBLE, 329 TokenType.UDOUBLE, 330 TokenType.CHAR, 331 TokenType.NCHAR, 332 TokenType.VARCHAR, 333 TokenType.NVARCHAR, 334 TokenType.BPCHAR, 335 TokenType.TEXT, 336 TokenType.MEDIUMTEXT, 337 TokenType.LONGTEXT, 338 TokenType.BLOB, 339 TokenType.MEDIUMBLOB, 340 TokenType.LONGBLOB, 341 TokenType.BINARY, 342 TokenType.VARBINARY, 343 TokenType.JSON, 344 TokenType.JSONB, 345 TokenType.INTERVAL, 346 TokenType.TINYBLOB, 347 TokenType.TINYTEXT, 348 TokenType.TIME, 349 TokenType.TIMETZ, 350 TokenType.TIMESTAMP, 351 TokenType.TIMESTAMP_S, 352 TokenType.TIMESTAMP_MS, 353 TokenType.TIMESTAMP_NS, 354 TokenType.TIMESTAMPTZ, 355 TokenType.TIMESTAMPLTZ, 356 TokenType.TIMESTAMPNTZ, 357 TokenType.DATETIME, 358 TokenType.DATETIME2, 359 TokenType.DATETIME64, 360 TokenType.SMALLDATETIME, 361 TokenType.DATE, 362 TokenType.DATE32, 363 TokenType.INT4RANGE, 364 TokenType.INT4MULTIRANGE, 365 TokenType.INT8RANGE, 366 TokenType.INT8MULTIRANGE, 367 TokenType.NUMRANGE, 368 TokenType.NUMMULTIRANGE, 369 TokenType.TSRANGE, 370 TokenType.TSMULTIRANGE, 371 TokenType.TSTZRANGE, 372 TokenType.TSTZMULTIRANGE, 373 TokenType.DATERANGE, 374 TokenType.DATEMULTIRANGE, 375 TokenType.DECIMAL, 376 TokenType.DECIMAL32, 377 TokenType.DECIMAL64, 378 TokenType.DECIMAL128, 379 TokenType.DECIMAL256, 380 TokenType.UDECIMAL, 381 TokenType.BIGDECIMAL, 382 TokenType.UUID, 383 TokenType.GEOGRAPHY, 384 TokenType.GEOGRAPHYPOINT, 385 TokenType.GEOMETRY, 386 TokenType.POINT, 387 TokenType.RING, 388 TokenType.LINESTRING, 389 TokenType.MULTILINESTRING, 390 TokenType.POLYGON, 391 TokenType.MULTIPOLYGON, 392 TokenType.HLLSKETCH, 393 TokenType.HSTORE, 394 TokenType.PSEUDO_TYPE, 395 TokenType.SUPER, 396 TokenType.SERIAL, 397 TokenType.SMALLSERIAL, 398 TokenType.BIGSERIAL, 399 TokenType.XML, 400 TokenType.YEAR, 401 TokenType.USERDEFINED, 402 TokenType.MONEY, 403 TokenType.SMALLMONEY, 404 TokenType.ROWVERSION, 405 TokenType.IMAGE, 406 TokenType.VARIANT, 407 TokenType.VECTOR, 408 TokenType.VOID, 409 TokenType.OBJECT, 410 TokenType.OBJECT_IDENTIFIER, 411 TokenType.INET, 412 TokenType.IPADDRESS, 413 TokenType.IPPREFIX, 414 TokenType.IPV4, 415 TokenType.IPV6, 416 TokenType.UNKNOWN, 417 TokenType.NOTHING, 418 TokenType.NULL, 419 TokenType.NAME, 420 TokenType.TDIGEST, 421 TokenType.DYNAMIC, 422 *ENUM_TYPE_TOKENS, 423 *NESTED_TYPE_TOKENS, 424 *AGGREGATE_TYPE_TOKENS, 425 } 426 427 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 428 TokenType.BIGINT: TokenType.UBIGINT, 429 TokenType.INT: TokenType.UINT, 430 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 431 TokenType.SMALLINT: TokenType.USMALLINT, 432 TokenType.TINYINT: TokenType.UTINYINT, 433 TokenType.DECIMAL: TokenType.UDECIMAL, 434 TokenType.DOUBLE: TokenType.UDOUBLE, 435 } 436 437 SUBQUERY_PREDICATES = { 438 TokenType.ANY: exp.Any, 439 TokenType.ALL: exp.All, 440 TokenType.EXISTS: exp.Exists, 441 TokenType.SOME: exp.Any, 442 } 443 444 RESERVED_TOKENS = { 445 *Tokenizer.SINGLE_TOKENS.values(), 446 TokenType.SELECT, 447 } - {TokenType.IDENTIFIER} 448 449 DB_CREATABLES = { 450 TokenType.DATABASE, 451 TokenType.DICTIONARY, 452 TokenType.FILE_FORMAT, 453 TokenType.MODEL, 454 TokenType.NAMESPACE, 455 TokenType.SCHEMA, 456 TokenType.SEMANTIC_VIEW, 457 TokenType.SEQUENCE, 458 TokenType.SINK, 459 TokenType.SOURCE, 460 TokenType.STAGE, 461 TokenType.STORAGE_INTEGRATION, 462 TokenType.STREAMLIT, 463 TokenType.TABLE, 464 TokenType.TAG, 465 TokenType.VIEW, 466 TokenType.WAREHOUSE, 467 } 468 469 CREATABLES = { 470 TokenType.COLUMN, 471 TokenType.CONSTRAINT, 472 TokenType.FOREIGN_KEY, 473 TokenType.FUNCTION, 474 TokenType.INDEX, 475 TokenType.PROCEDURE, 476 *DB_CREATABLES, 477 } 478 479 ALTERABLES = { 480 TokenType.INDEX, 481 TokenType.TABLE, 482 TokenType.VIEW, 483 TokenType.SESSION, 484 } 485 486 # Tokens that can represent identifiers 487 ID_VAR_TOKENS = { 488 TokenType.ALL, 489 TokenType.ATTACH, 490 TokenType.VAR, 491 TokenType.ANTI, 492 TokenType.APPLY, 493 TokenType.ASC, 494 TokenType.ASOF, 495 TokenType.AUTO_INCREMENT, 496 TokenType.BEGIN, 497 TokenType.BPCHAR, 498 TokenType.CACHE, 499 TokenType.CASE, 500 TokenType.COLLATE, 501 TokenType.COMMAND, 502 TokenType.COMMENT, 503 TokenType.COMMIT, 504 TokenType.CONSTRAINT, 505 TokenType.COPY, 506 TokenType.CUBE, 507 TokenType.CURRENT_SCHEMA, 508 TokenType.DEFAULT, 509 TokenType.DELETE, 510 TokenType.DESC, 511 TokenType.DESCRIBE, 512 TokenType.DETACH, 513 TokenType.DICTIONARY, 514 TokenType.DIV, 515 TokenType.END, 516 TokenType.EXECUTE, 517 TokenType.EXPORT, 518 TokenType.ESCAPE, 519 TokenType.FALSE, 520 TokenType.FIRST, 521 TokenType.FILTER, 522 TokenType.FINAL, 523 TokenType.FORMAT, 524 TokenType.FULL, 525 TokenType.GET, 526 TokenType.IDENTIFIER, 527 TokenType.IS, 528 TokenType.ISNULL, 529 TokenType.INTERVAL, 530 TokenType.KEEP, 531 TokenType.KILL, 532 TokenType.LEFT, 533 TokenType.LIMIT, 534 TokenType.LOAD, 535 TokenType.MERGE, 536 TokenType.NATURAL, 537 TokenType.NEXT, 538 TokenType.OFFSET, 539 TokenType.OPERATOR, 540 TokenType.ORDINALITY, 541 TokenType.OVERLAPS, 542 TokenType.OVERWRITE, 543 TokenType.PARTITION, 544 TokenType.PERCENT, 545 TokenType.PIVOT, 546 TokenType.PRAGMA, 547 TokenType.PUT, 548 TokenType.RANGE, 549 TokenType.RECURSIVE, 550 TokenType.REFERENCES, 551 TokenType.REFRESH, 552 TokenType.RENAME, 553 TokenType.REPLACE, 554 TokenType.RIGHT, 555 TokenType.ROLLUP, 556 TokenType.ROW, 557 TokenType.ROWS, 558 TokenType.SEMI, 559 TokenType.SET, 560 TokenType.SETTINGS, 561 TokenType.SHOW, 562 TokenType.TEMPORARY, 563 TokenType.TOP, 564 TokenType.TRUE, 565 TokenType.TRUNCATE, 566 TokenType.UNIQUE, 567 TokenType.UNNEST, 568 TokenType.UNPIVOT, 569 TokenType.UPDATE, 570 TokenType.USE, 571 TokenType.VOLATILE, 572 TokenType.WINDOW, 573 *ALTERABLES, 574 *CREATABLES, 575 *SUBQUERY_PREDICATES, 576 *TYPE_TOKENS, 577 *NO_PAREN_FUNCTIONS, 578 } 579 ID_VAR_TOKENS.remove(TokenType.UNION) 580 581 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 582 TokenType.ANTI, 583 TokenType.ASOF, 584 TokenType.FULL, 585 TokenType.LEFT, 586 TokenType.LOCK, 587 TokenType.NATURAL, 588 TokenType.RIGHT, 589 TokenType.SEMI, 590 TokenType.WINDOW, 591 } 592 593 ALIAS_TOKENS = ID_VAR_TOKENS 594 595 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 596 597 ARRAY_CONSTRUCTORS = { 598 "ARRAY": exp.Array, 599 "LIST": exp.List, 600 } 601 602 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 603 604 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 605 606 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 607 608 FUNC_TOKENS = { 609 TokenType.COLLATE, 610 TokenType.COMMAND, 611 TokenType.CURRENT_DATE, 612 TokenType.CURRENT_DATETIME, 613 TokenType.CURRENT_SCHEMA, 614 TokenType.CURRENT_TIMESTAMP, 615 TokenType.CURRENT_TIME, 616 TokenType.CURRENT_USER, 617 TokenType.FILTER, 618 TokenType.FIRST, 619 TokenType.FORMAT, 620 TokenType.GET, 621 TokenType.GLOB, 622 TokenType.IDENTIFIER, 623 TokenType.INDEX, 624 TokenType.ISNULL, 625 TokenType.ILIKE, 626 TokenType.INSERT, 627 TokenType.LIKE, 628 TokenType.MERGE, 629 TokenType.NEXT, 630 TokenType.OFFSET, 631 TokenType.PRIMARY_KEY, 632 TokenType.RANGE, 633 TokenType.REPLACE, 634 TokenType.RLIKE, 635 TokenType.ROW, 636 TokenType.UNNEST, 637 TokenType.VAR, 638 TokenType.LEFT, 639 TokenType.RIGHT, 640 TokenType.SEQUENCE, 641 TokenType.DATE, 642 TokenType.DATETIME, 643 TokenType.TABLE, 644 TokenType.TIMESTAMP, 645 TokenType.TIMESTAMPTZ, 646 TokenType.TRUNCATE, 647 TokenType.UTC_DATE, 648 TokenType.UTC_TIME, 649 TokenType.UTC_TIMESTAMP, 650 TokenType.WINDOW, 651 TokenType.XOR, 652 *TYPE_TOKENS, 653 *SUBQUERY_PREDICATES, 654 } 655 656 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 657 TokenType.AND: exp.And, 658 } 659 660 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 661 TokenType.COLON_EQ: exp.PropertyEQ, 662 } 663 664 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 665 TokenType.OR: exp.Or, 666 } 667 668 EQUALITY = { 669 TokenType.EQ: exp.EQ, 670 TokenType.NEQ: exp.NEQ, 671 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 672 } 673 674 COMPARISON = { 675 TokenType.GT: exp.GT, 676 TokenType.GTE: exp.GTE, 677 TokenType.LT: exp.LT, 678 TokenType.LTE: exp.LTE, 679 } 680 681 BITWISE = { 682 TokenType.AMP: exp.BitwiseAnd, 683 TokenType.CARET: exp.BitwiseXor, 684 TokenType.PIPE: exp.BitwiseOr, 685 } 686 687 TERM = { 688 TokenType.DASH: exp.Sub, 689 TokenType.PLUS: exp.Add, 690 TokenType.MOD: exp.Mod, 691 TokenType.COLLATE: exp.Collate, 692 } 693 694 FACTOR = { 695 TokenType.DIV: exp.IntDiv, 696 TokenType.LR_ARROW: exp.Distance, 697 TokenType.SLASH: exp.Div, 698 TokenType.STAR: exp.Mul, 699 } 700 701 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 702 703 TIMES = { 704 TokenType.TIME, 705 TokenType.TIMETZ, 706 } 707 708 TIMESTAMPS = { 709 TokenType.TIMESTAMP, 710 TokenType.TIMESTAMPNTZ, 711 TokenType.TIMESTAMPTZ, 712 TokenType.TIMESTAMPLTZ, 713 *TIMES, 714 } 715 716 SET_OPERATIONS = { 717 TokenType.UNION, 718 TokenType.INTERSECT, 719 TokenType.EXCEPT, 720 } 721 722 JOIN_METHODS = { 723 TokenType.ASOF, 724 TokenType.NATURAL, 725 TokenType.POSITIONAL, 726 } 727 728 JOIN_SIDES = { 729 TokenType.LEFT, 730 TokenType.RIGHT, 731 TokenType.FULL, 732 } 733 734 JOIN_KINDS = { 735 TokenType.ANTI, 736 TokenType.CROSS, 737 TokenType.INNER, 738 TokenType.OUTER, 739 TokenType.SEMI, 740 TokenType.STRAIGHT_JOIN, 741 } 742 743 JOIN_HINTS: t.Set[str] = set() 744 745 LAMBDAS = { 746 TokenType.ARROW: lambda self, expressions: self.expression( 747 exp.Lambda, 748 this=self._replace_lambda( 749 self._parse_assignment(), 750 expressions, 751 ), 752 expressions=expressions, 753 ), 754 TokenType.FARROW: lambda self, expressions: self.expression( 755 exp.Kwarg, 756 this=exp.var(expressions[0].name), 757 expression=self._parse_assignment(), 758 ), 759 } 760 761 COLUMN_OPERATORS = { 762 TokenType.DOT: None, 763 TokenType.DOTCOLON: lambda self, this, to: self.expression( 764 exp.JSONCast, 765 this=this, 766 to=to, 767 ), 768 TokenType.DCOLON: lambda self, this, to: self.build_cast( 769 strict=self.STRICT_CAST, this=this, to=to 770 ), 771 TokenType.ARROW: lambda self, this, path: self.expression( 772 exp.JSONExtract, 773 this=this, 774 expression=self.dialect.to_json_path(path), 775 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 776 ), 777 TokenType.DARROW: lambda self, this, path: self.expression( 778 exp.JSONExtractScalar, 779 this=this, 780 expression=self.dialect.to_json_path(path), 781 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 782 ), 783 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 784 exp.JSONBExtract, 785 this=this, 786 expression=path, 787 ), 788 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 789 exp.JSONBExtractScalar, 790 this=this, 791 expression=path, 792 ), 793 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 794 exp.JSONBContains, 795 this=this, 796 expression=key, 797 ), 798 } 799 800 CAST_COLUMN_OPERATORS = { 801 TokenType.DOTCOLON, 802 TokenType.DCOLON, 803 } 804 805 EXPRESSION_PARSERS = { 806 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 807 exp.Column: lambda self: self._parse_column(), 808 exp.Condition: lambda self: self._parse_assignment(), 809 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 810 exp.Expression: lambda self: self._parse_expression(), 811 exp.From: lambda self: self._parse_from(joins=True), 812 exp.Group: lambda self: self._parse_group(), 813 exp.Having: lambda self: self._parse_having(), 814 exp.Hint: lambda self: self._parse_hint_body(), 815 exp.Identifier: lambda self: self._parse_id_var(), 816 exp.Join: lambda self: self._parse_join(), 817 exp.Lambda: lambda self: self._parse_lambda(), 818 exp.Lateral: lambda self: self._parse_lateral(), 819 exp.Limit: lambda self: self._parse_limit(), 820 exp.Offset: lambda self: self._parse_offset(), 821 exp.Order: lambda self: self._parse_order(), 822 exp.Ordered: lambda self: self._parse_ordered(), 823 exp.Properties: lambda self: self._parse_properties(), 824 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 825 exp.Qualify: lambda self: self._parse_qualify(), 826 exp.Returning: lambda self: self._parse_returning(), 827 exp.Select: lambda self: self._parse_select(), 828 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 829 exp.Table: lambda self: self._parse_table_parts(), 830 exp.TableAlias: lambda self: self._parse_table_alias(), 831 exp.Tuple: lambda self: self._parse_value(values=False), 832 exp.Whens: lambda self: self._parse_when_matched(), 833 exp.Where: lambda self: self._parse_where(), 834 exp.Window: lambda self: self._parse_named_window(), 835 exp.With: lambda self: self._parse_with(), 836 "JOIN_TYPE": lambda self: self._parse_join_parts(), 837 } 838 839 STATEMENT_PARSERS = { 840 TokenType.ALTER: lambda self: self._parse_alter(), 841 TokenType.ANALYZE: lambda self: self._parse_analyze(), 842 TokenType.BEGIN: lambda self: self._parse_transaction(), 843 TokenType.CACHE: lambda self: self._parse_cache(), 844 TokenType.COMMENT: lambda self: self._parse_comment(), 845 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 846 TokenType.COPY: lambda self: self._parse_copy(), 847 TokenType.CREATE: lambda self: self._parse_create(), 848 TokenType.DELETE: lambda self: self._parse_delete(), 849 TokenType.DESC: lambda self: self._parse_describe(), 850 TokenType.DESCRIBE: lambda self: self._parse_describe(), 851 TokenType.DROP: lambda self: self._parse_drop(), 852 TokenType.GRANT: lambda self: self._parse_grant(), 853 TokenType.REVOKE: lambda self: self._parse_revoke(), 854 TokenType.INSERT: lambda self: self._parse_insert(), 855 TokenType.KILL: lambda self: self._parse_kill(), 856 TokenType.LOAD: lambda self: self._parse_load(), 857 TokenType.MERGE: lambda self: self._parse_merge(), 858 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 859 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 860 TokenType.REFRESH: lambda self: self._parse_refresh(), 861 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 862 TokenType.SET: lambda self: self._parse_set(), 863 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 864 TokenType.UNCACHE: lambda self: self._parse_uncache(), 865 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 866 TokenType.UPDATE: lambda self: self._parse_update(), 867 TokenType.USE: lambda self: self._parse_use(), 868 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 869 } 870 871 UNARY_PARSERS = { 872 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 873 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 874 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 875 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 876 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 877 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 878 } 879 880 STRING_PARSERS = { 881 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 882 exp.RawString, this=token.text 883 ), 884 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 885 exp.National, this=token.text 886 ), 887 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 888 TokenType.STRING: lambda self, token: self.expression( 889 exp.Literal, this=token.text, is_string=True 890 ), 891 TokenType.UNICODE_STRING: lambda self, token: self.expression( 892 exp.UnicodeString, 893 this=token.text, 894 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 895 ), 896 } 897 898 NUMERIC_PARSERS = { 899 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 900 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 901 TokenType.HEX_STRING: lambda self, token: self.expression( 902 exp.HexString, 903 this=token.text, 904 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 905 ), 906 TokenType.NUMBER: lambda self, token: self.expression( 907 exp.Literal, this=token.text, is_string=False 908 ), 909 } 910 911 PRIMARY_PARSERS = { 912 **STRING_PARSERS, 913 **NUMERIC_PARSERS, 914 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 915 TokenType.NULL: lambda self, _: self.expression(exp.Null), 916 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 917 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 918 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 919 TokenType.STAR: lambda self, _: self._parse_star_ops(), 920 } 921 922 PLACEHOLDER_PARSERS = { 923 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 924 TokenType.PARAMETER: lambda self: self._parse_parameter(), 925 TokenType.COLON: lambda self: ( 926 self.expression(exp.Placeholder, this=self._prev.text) 927 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 928 else None 929 ), 930 } 931 932 RANGE_PARSERS = { 933 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 934 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 935 TokenType.GLOB: binary_range_parser(exp.Glob), 936 TokenType.ILIKE: binary_range_parser(exp.ILike), 937 TokenType.IN: lambda self, this: self._parse_in(this), 938 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 939 TokenType.IS: lambda self, this: self._parse_is(this), 940 TokenType.LIKE: binary_range_parser(exp.Like), 941 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 942 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 943 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 944 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 945 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 946 } 947 948 PIPE_SYNTAX_TRANSFORM_PARSERS = { 949 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 950 "AS": lambda self, query: self._build_pipe_cte( 951 query, [exp.Star()], self._parse_table_alias() 952 ), 953 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 954 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 955 "ORDER BY": lambda self, query: query.order_by( 956 self._parse_order(), append=False, copy=False 957 ), 958 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 959 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 960 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 961 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 962 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 963 } 964 965 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 966 "ALLOWED_VALUES": lambda self: self.expression( 967 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 968 ), 969 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 970 "AUTO": lambda self: self._parse_auto_property(), 971 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 972 "BACKUP": lambda self: self.expression( 973 exp.BackupProperty, this=self._parse_var(any_token=True) 974 ), 975 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 976 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 977 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 978 "CHECKSUM": lambda self: self._parse_checksum(), 979 "CLUSTER BY": lambda self: self._parse_cluster(), 980 "CLUSTERED": lambda self: self._parse_clustered_by(), 981 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 982 exp.CollateProperty, **kwargs 983 ), 984 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 985 "CONTAINS": lambda self: self._parse_contains_property(), 986 "COPY": lambda self: self._parse_copy_property(), 987 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 988 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 989 "DEFINER": lambda self: self._parse_definer(), 990 "DETERMINISTIC": lambda self: self.expression( 991 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 992 ), 993 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 994 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 995 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 996 "DISTKEY": lambda self: self._parse_distkey(), 997 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 998 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 999 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 1000 "ENVIRONMENT": lambda self: self.expression( 1001 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 1002 ), 1003 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1004 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1005 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1006 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1007 "FREESPACE": lambda self: self._parse_freespace(), 1008 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1009 "HEAP": lambda self: self.expression(exp.HeapProperty), 1010 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1011 "IMMUTABLE": lambda self: self.expression( 1012 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1013 ), 1014 "INHERITS": lambda self: self.expression( 1015 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1016 ), 1017 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1018 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1019 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1020 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1021 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1022 "LIKE": lambda self: self._parse_create_like(), 1023 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1024 "LOCK": lambda self: self._parse_locking(), 1025 "LOCKING": lambda self: self._parse_locking(), 1026 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1027 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1028 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1029 "MODIFIES": lambda self: self._parse_modifies_property(), 1030 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1031 "NO": lambda self: self._parse_no_property(), 1032 "ON": lambda self: self._parse_on_property(), 1033 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1034 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1035 "PARTITION": lambda self: self._parse_partitioned_of(), 1036 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1037 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1038 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1039 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1040 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1041 "READS": lambda self: self._parse_reads_property(), 1042 "REMOTE": lambda self: self._parse_remote_with_connection(), 1043 "RETURNS": lambda self: self._parse_returns(), 1044 "STRICT": lambda self: self.expression(exp.StrictProperty), 1045 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1046 "ROW": lambda self: self._parse_row(), 1047 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1048 "SAMPLE": lambda self: self.expression( 1049 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1050 ), 1051 "SECURE": lambda self: self.expression(exp.SecureProperty), 1052 "SECURITY": lambda self: self._parse_security(), 1053 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1054 "SETTINGS": lambda self: self._parse_settings_property(), 1055 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1056 "SORTKEY": lambda self: self._parse_sortkey(), 1057 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1058 "STABLE": lambda self: self.expression( 1059 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1060 ), 1061 "STORED": lambda self: self._parse_stored(), 1062 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1063 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1064 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1065 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1066 "TO": lambda self: self._parse_to_table(), 1067 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1068 "TRANSFORM": lambda self: self.expression( 1069 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1070 ), 1071 "TTL": lambda self: self._parse_ttl(), 1072 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1073 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1074 "VOLATILE": lambda self: self._parse_volatile_property(), 1075 "WITH": lambda self: self._parse_with_property(), 1076 } 1077 1078 CONSTRAINT_PARSERS = { 1079 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1080 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1081 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1082 "CHARACTER SET": lambda self: self.expression( 1083 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1084 ), 1085 "CHECK": lambda self: self.expression( 1086 exp.CheckColumnConstraint, 1087 this=self._parse_wrapped(self._parse_assignment), 1088 enforced=self._match_text_seq("ENFORCED"), 1089 ), 1090 "COLLATE": lambda self: self.expression( 1091 exp.CollateColumnConstraint, 1092 this=self._parse_identifier() or self._parse_column(), 1093 ), 1094 "COMMENT": lambda self: self.expression( 1095 exp.CommentColumnConstraint, this=self._parse_string() 1096 ), 1097 "COMPRESS": lambda self: self._parse_compress(), 1098 "CLUSTERED": lambda self: self.expression( 1099 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1100 ), 1101 "NONCLUSTERED": lambda self: self.expression( 1102 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1103 ), 1104 "DEFAULT": lambda self: self.expression( 1105 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1106 ), 1107 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1108 "EPHEMERAL": lambda self: self.expression( 1109 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1110 ), 1111 "EXCLUDE": lambda self: self.expression( 1112 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1113 ), 1114 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1115 "FORMAT": lambda self: self.expression( 1116 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1117 ), 1118 "GENERATED": lambda self: self._parse_generated_as_identity(), 1119 "IDENTITY": lambda self: self._parse_auto_increment(), 1120 "INLINE": lambda self: self._parse_inline(), 1121 "LIKE": lambda self: self._parse_create_like(), 1122 "NOT": lambda self: self._parse_not_constraint(), 1123 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1124 "ON": lambda self: ( 1125 self._match(TokenType.UPDATE) 1126 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1127 ) 1128 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1129 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1130 "PERIOD": lambda self: self._parse_period_for_system_time(), 1131 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1132 "REFERENCES": lambda self: self._parse_references(match=False), 1133 "TITLE": lambda self: self.expression( 1134 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1135 ), 1136 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1137 "UNIQUE": lambda self: self._parse_unique(), 1138 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1139 "WATERMARK": lambda self: self.expression( 1140 exp.WatermarkColumnConstraint, 1141 this=self._match(TokenType.FOR) and self._parse_column(), 1142 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1143 ), 1144 "WITH": lambda self: self.expression( 1145 exp.Properties, expressions=self._parse_wrapped_properties() 1146 ), 1147 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1148 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1149 } 1150 1151 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1152 if not self._match(TokenType.L_PAREN, advance=False): 1153 # Partitioning by bucket or truncate follows the syntax: 1154 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1155 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1156 self._retreat(self._index - 1) 1157 return None 1158 1159 klass = ( 1160 exp.PartitionedByBucket 1161 if self._prev.text.upper() == "BUCKET" 1162 else exp.PartitionByTruncate 1163 ) 1164 1165 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1166 this, expression = seq_get(args, 0), seq_get(args, 1) 1167 1168 if isinstance(this, exp.Literal): 1169 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1170 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1171 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1172 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1173 # 1174 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1175 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1176 this, expression = expression, this 1177 1178 return self.expression(klass, this=this, expression=expression) 1179 1180 ALTER_PARSERS = { 1181 "ADD": lambda self: self._parse_alter_table_add(), 1182 "AS": lambda self: self._parse_select(), 1183 "ALTER": lambda self: self._parse_alter_table_alter(), 1184 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1185 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1186 "DROP": lambda self: self._parse_alter_table_drop(), 1187 "RENAME": lambda self: self._parse_alter_table_rename(), 1188 "SET": lambda self: self._parse_alter_table_set(), 1189 "SWAP": lambda self: self.expression( 1190 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1191 ), 1192 } 1193 1194 ALTER_ALTER_PARSERS = { 1195 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1196 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1197 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1198 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1199 } 1200 1201 SCHEMA_UNNAMED_CONSTRAINTS = { 1202 "CHECK", 1203 "EXCLUDE", 1204 "FOREIGN KEY", 1205 "LIKE", 1206 "PERIOD", 1207 "PRIMARY KEY", 1208 "UNIQUE", 1209 "WATERMARK", 1210 "BUCKET", 1211 "TRUNCATE", 1212 } 1213 1214 NO_PAREN_FUNCTION_PARSERS = { 1215 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1216 "CASE": lambda self: self._parse_case(), 1217 "CONNECT_BY_ROOT": lambda self: self.expression( 1218 exp.ConnectByRoot, this=self._parse_column() 1219 ), 1220 "IF": lambda self: self._parse_if(), 1221 } 1222 1223 INVALID_FUNC_NAME_TOKENS = { 1224 TokenType.IDENTIFIER, 1225 TokenType.STRING, 1226 } 1227 1228 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1229 1230 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1231 1232 FUNCTION_PARSERS = { 1233 **{ 1234 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1235 }, 1236 **{ 1237 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1238 }, 1239 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1240 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1241 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1242 "DECODE": lambda self: self._parse_decode(), 1243 "EXTRACT": lambda self: self._parse_extract(), 1244 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1245 "GAP_FILL": lambda self: self._parse_gap_fill(), 1246 "JSON_OBJECT": lambda self: self._parse_json_object(), 1247 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1248 "JSON_TABLE": lambda self: self._parse_json_table(), 1249 "MATCH": lambda self: self._parse_match_against(), 1250 "NORMALIZE": lambda self: self._parse_normalize(), 1251 "OPENJSON": lambda self: self._parse_open_json(), 1252 "OVERLAY": lambda self: self._parse_overlay(), 1253 "POSITION": lambda self: self._parse_position(), 1254 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1255 "STRING_AGG": lambda self: self._parse_string_agg(), 1256 "SUBSTRING": lambda self: self._parse_substring(), 1257 "TRIM": lambda self: self._parse_trim(), 1258 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1259 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1260 "XMLELEMENT": lambda self: self.expression( 1261 exp.XMLElement, 1262 this=self._match_text_seq("NAME") and self._parse_id_var(), 1263 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1264 ), 1265 "XMLTABLE": lambda self: self._parse_xml_table(), 1266 } 1267 1268 QUERY_MODIFIER_PARSERS = { 1269 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1270 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1271 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1272 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1273 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1274 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1275 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1276 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1277 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1278 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1279 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1280 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1281 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1282 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1283 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1284 TokenType.CLUSTER_BY: lambda self: ( 1285 "cluster", 1286 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1287 ), 1288 TokenType.DISTRIBUTE_BY: lambda self: ( 1289 "distribute", 1290 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1291 ), 1292 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1293 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1294 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1295 } 1296 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1297 1298 SET_PARSERS = { 1299 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1300 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1301 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1302 "TRANSACTION": lambda self: self._parse_set_transaction(), 1303 } 1304 1305 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1306 1307 TYPE_LITERAL_PARSERS = { 1308 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1309 } 1310 1311 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1312 1313 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1314 1315 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1316 1317 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1318 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1319 "ISOLATION": ( 1320 ("LEVEL", "REPEATABLE", "READ"), 1321 ("LEVEL", "READ", "COMMITTED"), 1322 ("LEVEL", "READ", "UNCOMITTED"), 1323 ("LEVEL", "SERIALIZABLE"), 1324 ), 1325 "READ": ("WRITE", "ONLY"), 1326 } 1327 1328 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1329 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1330 ) 1331 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1332 1333 CREATE_SEQUENCE: OPTIONS_TYPE = { 1334 "SCALE": ("EXTEND", "NOEXTEND"), 1335 "SHARD": ("EXTEND", "NOEXTEND"), 1336 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1337 **dict.fromkeys( 1338 ( 1339 "SESSION", 1340 "GLOBAL", 1341 "KEEP", 1342 "NOKEEP", 1343 "ORDER", 1344 "NOORDER", 1345 "NOCACHE", 1346 "CYCLE", 1347 "NOCYCLE", 1348 "NOMINVALUE", 1349 "NOMAXVALUE", 1350 "NOSCALE", 1351 "NOSHARD", 1352 ), 1353 tuple(), 1354 ), 1355 } 1356 1357 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1358 1359 USABLES: OPTIONS_TYPE = dict.fromkeys( 1360 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1361 ) 1362 1363 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1364 1365 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1366 "TYPE": ("EVOLUTION",), 1367 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1368 } 1369 1370 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1371 1372 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1373 1374 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1375 "NOT": ("ENFORCED",), 1376 "MATCH": ( 1377 "FULL", 1378 "PARTIAL", 1379 "SIMPLE", 1380 ), 1381 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1382 "USING": ( 1383 "BTREE", 1384 "HASH", 1385 ), 1386 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1387 } 1388 1389 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1390 "NO": ("OTHERS",), 1391 "CURRENT": ("ROW",), 1392 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1393 } 1394 1395 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1396 1397 CLONE_KEYWORDS = {"CLONE", "COPY"} 1398 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1399 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1400 1401 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1402 1403 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1404 1405 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1406 1407 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1408 1409 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1410 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1411 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1412 1413 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1414 1415 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1416 1417 ADD_CONSTRAINT_TOKENS = { 1418 TokenType.CONSTRAINT, 1419 TokenType.FOREIGN_KEY, 1420 TokenType.INDEX, 1421 TokenType.KEY, 1422 TokenType.PRIMARY_KEY, 1423 TokenType.UNIQUE, 1424 } 1425 1426 DISTINCT_TOKENS = {TokenType.DISTINCT} 1427 1428 NULL_TOKENS = {TokenType.NULL} 1429 1430 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1431 1432 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1433 1434 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1435 1436 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1437 1438 ODBC_DATETIME_LITERALS = { 1439 "d": exp.Date, 1440 "t": exp.Time, 1441 "ts": exp.Timestamp, 1442 } 1443 1444 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1445 1446 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1447 1448 # The style options for the DESCRIBE statement 1449 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1450 1451 # The style options for the ANALYZE statement 1452 ANALYZE_STYLES = { 1453 "BUFFER_USAGE_LIMIT", 1454 "FULL", 1455 "LOCAL", 1456 "NO_WRITE_TO_BINLOG", 1457 "SAMPLE", 1458 "SKIP_LOCKED", 1459 "VERBOSE", 1460 } 1461 1462 ANALYZE_EXPRESSION_PARSERS = { 1463 "ALL": lambda self: self._parse_analyze_columns(), 1464 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1465 "DELETE": lambda self: self._parse_analyze_delete(), 1466 "DROP": lambda self: self._parse_analyze_histogram(), 1467 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1468 "LIST": lambda self: self._parse_analyze_list(), 1469 "PREDICATE": lambda self: self._parse_analyze_columns(), 1470 "UPDATE": lambda self: self._parse_analyze_histogram(), 1471 "VALIDATE": lambda self: self._parse_analyze_validate(), 1472 } 1473 1474 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1475 1476 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1477 1478 OPERATION_MODIFIERS: t.Set[str] = set() 1479 1480 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1481 1482 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1483 1484 STRICT_CAST = True 1485 1486 PREFIXED_PIVOT_COLUMNS = False 1487 IDENTIFY_PIVOT_STRINGS = False 1488 1489 LOG_DEFAULTS_TO_LN = False 1490 1491 # Whether the table sample clause expects CSV syntax 1492 TABLESAMPLE_CSV = False 1493 1494 # The default method used for table sampling 1495 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1496 1497 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1498 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1499 1500 # Whether the TRIM function expects the characters to trim as its first argument 1501 TRIM_PATTERN_FIRST = False 1502 1503 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1504 STRING_ALIASES = False 1505 1506 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1507 MODIFIERS_ATTACHED_TO_SET_OP = True 1508 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1509 1510 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1511 NO_PAREN_IF_COMMANDS = True 1512 1513 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1514 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1515 1516 # Whether the `:` operator is used to extract a value from a VARIANT column 1517 COLON_IS_VARIANT_EXTRACT = False 1518 1519 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1520 # If this is True and '(' is not found, the keyword will be treated as an identifier 1521 VALUES_FOLLOWED_BY_PAREN = True 1522 1523 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1524 SUPPORTS_IMPLICIT_UNNEST = False 1525 1526 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1527 INTERVAL_SPANS = True 1528 1529 # Whether a PARTITION clause can follow a table reference 1530 SUPPORTS_PARTITION_SELECTION = False 1531 1532 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1533 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1534 1535 # Whether the 'AS' keyword is optional in the CTE definition syntax 1536 OPTIONAL_ALIAS_TOKEN_CTE = True 1537 1538 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1539 ALTER_RENAME_REQUIRES_COLUMN = True 1540 1541 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1542 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1543 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1544 # as BigQuery, where all joins have the same precedence. 1545 JOINS_HAVE_EQUAL_PRECEDENCE = False 1546 1547 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1548 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1549 1550 # Whether map literals support arbitrary expressions as keys. 1551 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1552 # When False, keys are typically restricted to identifiers. 1553 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1554 1555 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1556 # is true for Snowflake but not for BigQuery which can also process strings 1557 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1558 1559 __slots__ = ( 1560 "error_level", 1561 "error_message_context", 1562 "max_errors", 1563 "dialect", 1564 "sql", 1565 "errors", 1566 "_tokens", 1567 "_index", 1568 "_curr", 1569 "_next", 1570 "_prev", 1571 "_prev_comments", 1572 "_pipe_cte_counter", 1573 ) 1574 1575 # Autofilled 1576 SHOW_TRIE: t.Dict = {} 1577 SET_TRIE: t.Dict = {} 1578 1579 def __init__( 1580 self, 1581 error_level: t.Optional[ErrorLevel] = None, 1582 error_message_context: int = 100, 1583 max_errors: int = 3, 1584 dialect: DialectType = None, 1585 ): 1586 from sqlglot.dialects import Dialect 1587 1588 self.error_level = error_level or ErrorLevel.IMMEDIATE 1589 self.error_message_context = error_message_context 1590 self.max_errors = max_errors 1591 self.dialect = Dialect.get_or_raise(dialect) 1592 self.reset() 1593 1594 def reset(self): 1595 self.sql = "" 1596 self.errors = [] 1597 self._tokens = [] 1598 self._index = 0 1599 self._curr = None 1600 self._next = None 1601 self._prev = None 1602 self._prev_comments = None 1603 self._pipe_cte_counter = 0 1604 1605 def parse( 1606 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1607 ) -> t.List[t.Optional[exp.Expression]]: 1608 """ 1609 Parses a list of tokens and returns a list of syntax trees, one tree 1610 per parsed SQL statement. 1611 1612 Args: 1613 raw_tokens: The list of tokens. 1614 sql: The original SQL string, used to produce helpful debug messages. 1615 1616 Returns: 1617 The list of the produced syntax trees. 1618 """ 1619 return self._parse( 1620 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1621 ) 1622 1623 def parse_into( 1624 self, 1625 expression_types: exp.IntoType, 1626 raw_tokens: t.List[Token], 1627 sql: t.Optional[str] = None, 1628 ) -> t.List[t.Optional[exp.Expression]]: 1629 """ 1630 Parses a list of tokens into a given Expression type. If a collection of Expression 1631 types is given instead, this method will try to parse the token list into each one 1632 of them, stopping at the first for which the parsing succeeds. 1633 1634 Args: 1635 expression_types: The expression type(s) to try and parse the token list into. 1636 raw_tokens: The list of tokens. 1637 sql: The original SQL string, used to produce helpful debug messages. 1638 1639 Returns: 1640 The target Expression. 1641 """ 1642 errors = [] 1643 for expression_type in ensure_list(expression_types): 1644 parser = self.EXPRESSION_PARSERS.get(expression_type) 1645 if not parser: 1646 raise TypeError(f"No parser registered for {expression_type}") 1647 1648 try: 1649 return self._parse(parser, raw_tokens, sql) 1650 except ParseError as e: 1651 e.errors[0]["into_expression"] = expression_type 1652 errors.append(e) 1653 1654 raise ParseError( 1655 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1656 errors=merge_errors(errors), 1657 ) from errors[-1] 1658 1659 def _parse( 1660 self, 1661 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1662 raw_tokens: t.List[Token], 1663 sql: t.Optional[str] = None, 1664 ) -> t.List[t.Optional[exp.Expression]]: 1665 self.reset() 1666 self.sql = sql or "" 1667 1668 total = len(raw_tokens) 1669 chunks: t.List[t.List[Token]] = [[]] 1670 1671 for i, token in enumerate(raw_tokens): 1672 if token.token_type == TokenType.SEMICOLON: 1673 if token.comments: 1674 chunks.append([token]) 1675 1676 if i < total - 1: 1677 chunks.append([]) 1678 else: 1679 chunks[-1].append(token) 1680 1681 expressions = [] 1682 1683 for tokens in chunks: 1684 self._index = -1 1685 self._tokens = tokens 1686 self._advance() 1687 1688 expressions.append(parse_method(self)) 1689 1690 if self._index < len(self._tokens): 1691 self.raise_error("Invalid expression / Unexpected token") 1692 1693 self.check_errors() 1694 1695 return expressions 1696 1697 def check_errors(self) -> None: 1698 """Logs or raises any found errors, depending on the chosen error level setting.""" 1699 if self.error_level == ErrorLevel.WARN: 1700 for error in self.errors: 1701 logger.error(str(error)) 1702 elif self.error_level == ErrorLevel.RAISE and self.errors: 1703 raise ParseError( 1704 concat_messages(self.errors, self.max_errors), 1705 errors=merge_errors(self.errors), 1706 ) 1707 1708 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1709 """ 1710 Appends an error in the list of recorded errors or raises it, depending on the chosen 1711 error level setting. 1712 """ 1713 token = token or self._curr or self._prev or Token.string("") 1714 start = token.start 1715 end = token.end + 1 1716 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1717 highlight = self.sql[start:end] 1718 end_context = self.sql[end : end + self.error_message_context] 1719 1720 error = ParseError.new( 1721 f"{message}. Line {token.line}, Col: {token.col}.\n" 1722 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1723 description=message, 1724 line=token.line, 1725 col=token.col, 1726 start_context=start_context, 1727 highlight=highlight, 1728 end_context=end_context, 1729 ) 1730 1731 if self.error_level == ErrorLevel.IMMEDIATE: 1732 raise error 1733 1734 self.errors.append(error) 1735 1736 def expression( 1737 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1738 ) -> E: 1739 """ 1740 Creates a new, validated Expression. 1741 1742 Args: 1743 exp_class: The expression class to instantiate. 1744 comments: An optional list of comments to attach to the expression. 1745 kwargs: The arguments to set for the expression along with their respective values. 1746 1747 Returns: 1748 The target expression. 1749 """ 1750 instance = exp_class(**kwargs) 1751 instance.add_comments(comments) if comments else self._add_comments(instance) 1752 return self.validate_expression(instance) 1753 1754 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1755 if expression and self._prev_comments: 1756 expression.add_comments(self._prev_comments) 1757 self._prev_comments = None 1758 1759 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1760 """ 1761 Validates an Expression, making sure that all its mandatory arguments are set. 1762 1763 Args: 1764 expression: The expression to validate. 1765 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1766 1767 Returns: 1768 The validated expression. 1769 """ 1770 if self.error_level != ErrorLevel.IGNORE: 1771 for error_message in expression.error_messages(args): 1772 self.raise_error(error_message) 1773 1774 return expression 1775 1776 def _find_sql(self, start: Token, end: Token) -> str: 1777 return self.sql[start.start : end.end + 1] 1778 1779 def _is_connected(self) -> bool: 1780 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1781 1782 def _advance(self, times: int = 1) -> None: 1783 self._index += times 1784 self._curr = seq_get(self._tokens, self._index) 1785 self._next = seq_get(self._tokens, self._index + 1) 1786 1787 if self._index > 0: 1788 self._prev = self._tokens[self._index - 1] 1789 self._prev_comments = self._prev.comments 1790 else: 1791 self._prev = None 1792 self._prev_comments = None 1793 1794 def _retreat(self, index: int) -> None: 1795 if index != self._index: 1796 self._advance(index - self._index) 1797 1798 def _warn_unsupported(self) -> None: 1799 if len(self._tokens) <= 1: 1800 return 1801 1802 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1803 # interested in emitting a warning for the one being currently processed. 1804 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1805 1806 logger.warning( 1807 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1808 ) 1809 1810 def _parse_command(self) -> exp.Command: 1811 self._warn_unsupported() 1812 return self.expression( 1813 exp.Command, 1814 comments=self._prev_comments, 1815 this=self._prev.text.upper(), 1816 expression=self._parse_string(), 1817 ) 1818 1819 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1820 """ 1821 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1822 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1823 solve this by setting & resetting the parser state accordingly 1824 """ 1825 index = self._index 1826 error_level = self.error_level 1827 1828 self.error_level = ErrorLevel.IMMEDIATE 1829 try: 1830 this = parse_method() 1831 except ParseError: 1832 this = None 1833 finally: 1834 if not this or retreat: 1835 self._retreat(index) 1836 self.error_level = error_level 1837 1838 return this 1839 1840 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1841 start = self._prev 1842 exists = self._parse_exists() if allow_exists else None 1843 1844 self._match(TokenType.ON) 1845 1846 materialized = self._match_text_seq("MATERIALIZED") 1847 kind = self._match_set(self.CREATABLES) and self._prev 1848 if not kind: 1849 return self._parse_as_command(start) 1850 1851 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1852 this = self._parse_user_defined_function(kind=kind.token_type) 1853 elif kind.token_type == TokenType.TABLE: 1854 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1855 elif kind.token_type == TokenType.COLUMN: 1856 this = self._parse_column() 1857 else: 1858 this = self._parse_id_var() 1859 1860 self._match(TokenType.IS) 1861 1862 return self.expression( 1863 exp.Comment, 1864 this=this, 1865 kind=kind.text, 1866 expression=self._parse_string(), 1867 exists=exists, 1868 materialized=materialized, 1869 ) 1870 1871 def _parse_to_table( 1872 self, 1873 ) -> exp.ToTableProperty: 1874 table = self._parse_table_parts(schema=True) 1875 return self.expression(exp.ToTableProperty, this=table) 1876 1877 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1878 def _parse_ttl(self) -> exp.Expression: 1879 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1880 this = self._parse_bitwise() 1881 1882 if self._match_text_seq("DELETE"): 1883 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1884 if self._match_text_seq("RECOMPRESS"): 1885 return self.expression( 1886 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1887 ) 1888 if self._match_text_seq("TO", "DISK"): 1889 return self.expression( 1890 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1891 ) 1892 if self._match_text_seq("TO", "VOLUME"): 1893 return self.expression( 1894 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1895 ) 1896 1897 return this 1898 1899 expressions = self._parse_csv(_parse_ttl_action) 1900 where = self._parse_where() 1901 group = self._parse_group() 1902 1903 aggregates = None 1904 if group and self._match(TokenType.SET): 1905 aggregates = self._parse_csv(self._parse_set_item) 1906 1907 return self.expression( 1908 exp.MergeTreeTTL, 1909 expressions=expressions, 1910 where=where, 1911 group=group, 1912 aggregates=aggregates, 1913 ) 1914 1915 def _parse_statement(self) -> t.Optional[exp.Expression]: 1916 if self._curr is None: 1917 return None 1918 1919 if self._match_set(self.STATEMENT_PARSERS): 1920 comments = self._prev_comments 1921 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1922 stmt.add_comments(comments, prepend=True) 1923 return stmt 1924 1925 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1926 return self._parse_command() 1927 1928 expression = self._parse_expression() 1929 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1930 return self._parse_query_modifiers(expression) 1931 1932 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1933 start = self._prev 1934 temporary = self._match(TokenType.TEMPORARY) 1935 materialized = self._match_text_seq("MATERIALIZED") 1936 1937 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1938 if not kind: 1939 return self._parse_as_command(start) 1940 1941 concurrently = self._match_text_seq("CONCURRENTLY") 1942 if_exists = exists or self._parse_exists() 1943 1944 if kind == "COLUMN": 1945 this = self._parse_column() 1946 else: 1947 this = self._parse_table_parts( 1948 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1949 ) 1950 1951 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1952 1953 if self._match(TokenType.L_PAREN, advance=False): 1954 expressions = self._parse_wrapped_csv(self._parse_types) 1955 else: 1956 expressions = None 1957 1958 return self.expression( 1959 exp.Drop, 1960 exists=if_exists, 1961 this=this, 1962 expressions=expressions, 1963 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1964 temporary=temporary, 1965 materialized=materialized, 1966 cascade=self._match_text_seq("CASCADE"), 1967 constraints=self._match_text_seq("CONSTRAINTS"), 1968 purge=self._match_text_seq("PURGE"), 1969 cluster=cluster, 1970 concurrently=concurrently, 1971 ) 1972 1973 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1974 return ( 1975 self._match_text_seq("IF") 1976 and (not not_ or self._match(TokenType.NOT)) 1977 and self._match(TokenType.EXISTS) 1978 ) 1979 1980 def _parse_create(self) -> exp.Create | exp.Command: 1981 # Note: this can't be None because we've matched a statement parser 1982 start = self._prev 1983 1984 replace = ( 1985 start.token_type == TokenType.REPLACE 1986 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1987 or self._match_pair(TokenType.OR, TokenType.ALTER) 1988 ) 1989 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1990 1991 unique = self._match(TokenType.UNIQUE) 1992 1993 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1994 clustered = True 1995 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1996 "COLUMNSTORE" 1997 ): 1998 clustered = False 1999 else: 2000 clustered = None 2001 2002 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2003 self._advance() 2004 2005 properties = None 2006 create_token = self._match_set(self.CREATABLES) and self._prev 2007 2008 if not create_token: 2009 # exp.Properties.Location.POST_CREATE 2010 properties = self._parse_properties() 2011 create_token = self._match_set(self.CREATABLES) and self._prev 2012 2013 if not properties or not create_token: 2014 return self._parse_as_command(start) 2015 2016 concurrently = self._match_text_seq("CONCURRENTLY") 2017 exists = self._parse_exists(not_=True) 2018 this = None 2019 expression: t.Optional[exp.Expression] = None 2020 indexes = None 2021 no_schema_binding = None 2022 begin = None 2023 end = None 2024 clone = None 2025 2026 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2027 nonlocal properties 2028 if properties and temp_props: 2029 properties.expressions.extend(temp_props.expressions) 2030 elif temp_props: 2031 properties = temp_props 2032 2033 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2034 this = self._parse_user_defined_function(kind=create_token.token_type) 2035 2036 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2037 extend_props(self._parse_properties()) 2038 2039 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2040 extend_props(self._parse_properties()) 2041 2042 if not expression: 2043 if self._match(TokenType.COMMAND): 2044 expression = self._parse_as_command(self._prev) 2045 else: 2046 begin = self._match(TokenType.BEGIN) 2047 return_ = self._match_text_seq("RETURN") 2048 2049 if self._match(TokenType.STRING, advance=False): 2050 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2051 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2052 expression = self._parse_string() 2053 extend_props(self._parse_properties()) 2054 else: 2055 expression = self._parse_user_defined_function_expression() 2056 2057 end = self._match_text_seq("END") 2058 2059 if return_: 2060 expression = self.expression(exp.Return, this=expression) 2061 elif create_token.token_type == TokenType.INDEX: 2062 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2063 if not self._match(TokenType.ON): 2064 index = self._parse_id_var() 2065 anonymous = False 2066 else: 2067 index = None 2068 anonymous = True 2069 2070 this = self._parse_index(index=index, anonymous=anonymous) 2071 elif create_token.token_type in self.DB_CREATABLES: 2072 table_parts = self._parse_table_parts( 2073 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2074 ) 2075 2076 # exp.Properties.Location.POST_NAME 2077 self._match(TokenType.COMMA) 2078 extend_props(self._parse_properties(before=True)) 2079 2080 this = self._parse_schema(this=table_parts) 2081 2082 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2083 extend_props(self._parse_properties()) 2084 2085 has_alias = self._match(TokenType.ALIAS) 2086 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2087 # exp.Properties.Location.POST_ALIAS 2088 extend_props(self._parse_properties()) 2089 2090 if create_token.token_type == TokenType.SEQUENCE: 2091 expression = self._parse_types() 2092 props = self._parse_properties() 2093 if props: 2094 sequence_props = exp.SequenceProperties() 2095 options = [] 2096 for prop in props: 2097 if isinstance(prop, exp.SequenceProperties): 2098 for arg, value in prop.args.items(): 2099 if arg == "options": 2100 options.extend(value) 2101 else: 2102 sequence_props.set(arg, value) 2103 prop.pop() 2104 2105 if options: 2106 sequence_props.set("options", options) 2107 2108 props.append("expressions", sequence_props) 2109 extend_props(props) 2110 else: 2111 expression = self._parse_ddl_select() 2112 2113 # Some dialects also support using a table as an alias instead of a SELECT. 2114 # Here we fallback to this as an alternative. 2115 if not expression and has_alias: 2116 expression = self._try_parse(self._parse_table_parts) 2117 2118 if create_token.token_type == TokenType.TABLE: 2119 # exp.Properties.Location.POST_EXPRESSION 2120 extend_props(self._parse_properties()) 2121 2122 indexes = [] 2123 while True: 2124 index = self._parse_index() 2125 2126 # exp.Properties.Location.POST_INDEX 2127 extend_props(self._parse_properties()) 2128 if not index: 2129 break 2130 else: 2131 self._match(TokenType.COMMA) 2132 indexes.append(index) 2133 elif create_token.token_type == TokenType.VIEW: 2134 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2135 no_schema_binding = True 2136 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2137 extend_props(self._parse_properties()) 2138 2139 shallow = self._match_text_seq("SHALLOW") 2140 2141 if self._match_texts(self.CLONE_KEYWORDS): 2142 copy = self._prev.text.lower() == "copy" 2143 clone = self.expression( 2144 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2145 ) 2146 2147 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2148 return self._parse_as_command(start) 2149 2150 create_kind_text = create_token.text.upper() 2151 return self.expression( 2152 exp.Create, 2153 this=this, 2154 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2155 replace=replace, 2156 refresh=refresh, 2157 unique=unique, 2158 expression=expression, 2159 exists=exists, 2160 properties=properties, 2161 indexes=indexes, 2162 no_schema_binding=no_schema_binding, 2163 begin=begin, 2164 end=end, 2165 clone=clone, 2166 concurrently=concurrently, 2167 clustered=clustered, 2168 ) 2169 2170 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2171 seq = exp.SequenceProperties() 2172 2173 options = [] 2174 index = self._index 2175 2176 while self._curr: 2177 self._match(TokenType.COMMA) 2178 if self._match_text_seq("INCREMENT"): 2179 self._match_text_seq("BY") 2180 self._match_text_seq("=") 2181 seq.set("increment", self._parse_term()) 2182 elif self._match_text_seq("MINVALUE"): 2183 seq.set("minvalue", self._parse_term()) 2184 elif self._match_text_seq("MAXVALUE"): 2185 seq.set("maxvalue", self._parse_term()) 2186 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2187 self._match_text_seq("=") 2188 seq.set("start", self._parse_term()) 2189 elif self._match_text_seq("CACHE"): 2190 # T-SQL allows empty CACHE which is initialized dynamically 2191 seq.set("cache", self._parse_number() or True) 2192 elif self._match_text_seq("OWNED", "BY"): 2193 # "OWNED BY NONE" is the default 2194 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2195 else: 2196 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2197 if opt: 2198 options.append(opt) 2199 else: 2200 break 2201 2202 seq.set("options", options if options else None) 2203 return None if self._index == index else seq 2204 2205 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2206 # only used for teradata currently 2207 self._match(TokenType.COMMA) 2208 2209 kwargs = { 2210 "no": self._match_text_seq("NO"), 2211 "dual": self._match_text_seq("DUAL"), 2212 "before": self._match_text_seq("BEFORE"), 2213 "default": self._match_text_seq("DEFAULT"), 2214 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2215 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2216 "after": self._match_text_seq("AFTER"), 2217 "minimum": self._match_texts(("MIN", "MINIMUM")), 2218 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2219 } 2220 2221 if self._match_texts(self.PROPERTY_PARSERS): 2222 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2223 try: 2224 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2225 except TypeError: 2226 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2227 2228 return None 2229 2230 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2231 return self._parse_wrapped_csv(self._parse_property) 2232 2233 def _parse_property(self) -> t.Optional[exp.Expression]: 2234 if self._match_texts(self.PROPERTY_PARSERS): 2235 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2236 2237 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2238 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2239 2240 if self._match_text_seq("COMPOUND", "SORTKEY"): 2241 return self._parse_sortkey(compound=True) 2242 2243 if self._match_text_seq("SQL", "SECURITY"): 2244 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2245 2246 index = self._index 2247 2248 seq_props = self._parse_sequence_properties() 2249 if seq_props: 2250 return seq_props 2251 2252 self._retreat(index) 2253 key = self._parse_column() 2254 2255 if not self._match(TokenType.EQ): 2256 self._retreat(index) 2257 return None 2258 2259 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2260 if isinstance(key, exp.Column): 2261 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2262 2263 value = self._parse_bitwise() or self._parse_var(any_token=True) 2264 2265 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2266 if isinstance(value, exp.Column): 2267 value = exp.var(value.name) 2268 2269 return self.expression(exp.Property, this=key, value=value) 2270 2271 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2272 if self._match_text_seq("BY"): 2273 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2274 2275 self._match(TokenType.ALIAS) 2276 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2277 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2278 2279 return self.expression( 2280 exp.FileFormatProperty, 2281 this=( 2282 self.expression( 2283 exp.InputOutputFormat, 2284 input_format=input_format, 2285 output_format=output_format, 2286 ) 2287 if input_format or output_format 2288 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2289 ), 2290 hive_format=True, 2291 ) 2292 2293 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2294 field = self._parse_field() 2295 if isinstance(field, exp.Identifier) and not field.quoted: 2296 field = exp.var(field) 2297 2298 return field 2299 2300 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2301 self._match(TokenType.EQ) 2302 self._match(TokenType.ALIAS) 2303 2304 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2305 2306 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2307 properties = [] 2308 while True: 2309 if before: 2310 prop = self._parse_property_before() 2311 else: 2312 prop = self._parse_property() 2313 if not prop: 2314 break 2315 for p in ensure_list(prop): 2316 properties.append(p) 2317 2318 if properties: 2319 return self.expression(exp.Properties, expressions=properties) 2320 2321 return None 2322 2323 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2324 return self.expression( 2325 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2326 ) 2327 2328 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2329 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2330 security_specifier = self._prev.text.upper() 2331 return self.expression(exp.SecurityProperty, this=security_specifier) 2332 return None 2333 2334 def _parse_settings_property(self) -> exp.SettingsProperty: 2335 return self.expression( 2336 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2337 ) 2338 2339 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2340 if self._index >= 2: 2341 pre_volatile_token = self._tokens[self._index - 2] 2342 else: 2343 pre_volatile_token = None 2344 2345 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2346 return exp.VolatileProperty() 2347 2348 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2349 2350 def _parse_retention_period(self) -> exp.Var: 2351 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2352 number = self._parse_number() 2353 number_str = f"{number} " if number else "" 2354 unit = self._parse_var(any_token=True) 2355 return exp.var(f"{number_str}{unit}") 2356 2357 def _parse_system_versioning_property( 2358 self, with_: bool = False 2359 ) -> exp.WithSystemVersioningProperty: 2360 self._match(TokenType.EQ) 2361 prop = self.expression( 2362 exp.WithSystemVersioningProperty, 2363 **{ # type: ignore 2364 "on": True, 2365 "with": with_, 2366 }, 2367 ) 2368 2369 if self._match_text_seq("OFF"): 2370 prop.set("on", False) 2371 return prop 2372 2373 self._match(TokenType.ON) 2374 if self._match(TokenType.L_PAREN): 2375 while self._curr and not self._match(TokenType.R_PAREN): 2376 if self._match_text_seq("HISTORY_TABLE", "="): 2377 prop.set("this", self._parse_table_parts()) 2378 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2379 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2380 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2381 prop.set("retention_period", self._parse_retention_period()) 2382 2383 self._match(TokenType.COMMA) 2384 2385 return prop 2386 2387 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2388 self._match(TokenType.EQ) 2389 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2390 prop = self.expression(exp.DataDeletionProperty, on=on) 2391 2392 if self._match(TokenType.L_PAREN): 2393 while self._curr and not self._match(TokenType.R_PAREN): 2394 if self._match_text_seq("FILTER_COLUMN", "="): 2395 prop.set("filter_column", self._parse_column()) 2396 elif self._match_text_seq("RETENTION_PERIOD", "="): 2397 prop.set("retention_period", self._parse_retention_period()) 2398 2399 self._match(TokenType.COMMA) 2400 2401 return prop 2402 2403 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2404 kind = "HASH" 2405 expressions: t.Optional[t.List[exp.Expression]] = None 2406 if self._match_text_seq("BY", "HASH"): 2407 expressions = self._parse_wrapped_csv(self._parse_id_var) 2408 elif self._match_text_seq("BY", "RANDOM"): 2409 kind = "RANDOM" 2410 2411 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2412 buckets: t.Optional[exp.Expression] = None 2413 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2414 buckets = self._parse_number() 2415 2416 return self.expression( 2417 exp.DistributedByProperty, 2418 expressions=expressions, 2419 kind=kind, 2420 buckets=buckets, 2421 order=self._parse_order(), 2422 ) 2423 2424 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2425 self._match_text_seq("KEY") 2426 expressions = self._parse_wrapped_id_vars() 2427 return self.expression(expr_type, expressions=expressions) 2428 2429 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2430 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2431 prop = self._parse_system_versioning_property(with_=True) 2432 self._match_r_paren() 2433 return prop 2434 2435 if self._match(TokenType.L_PAREN, advance=False): 2436 return self._parse_wrapped_properties() 2437 2438 if self._match_text_seq("JOURNAL"): 2439 return self._parse_withjournaltable() 2440 2441 if self._match_texts(self.VIEW_ATTRIBUTES): 2442 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2443 2444 if self._match_text_seq("DATA"): 2445 return self._parse_withdata(no=False) 2446 elif self._match_text_seq("NO", "DATA"): 2447 return self._parse_withdata(no=True) 2448 2449 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2450 return self._parse_serde_properties(with_=True) 2451 2452 if self._match(TokenType.SCHEMA): 2453 return self.expression( 2454 exp.WithSchemaBindingProperty, 2455 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2456 ) 2457 2458 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2459 return self.expression( 2460 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2461 ) 2462 2463 if not self._next: 2464 return None 2465 2466 return self._parse_withisolatedloading() 2467 2468 def _parse_procedure_option(self) -> exp.Expression | None: 2469 if self._match_text_seq("EXECUTE", "AS"): 2470 return self.expression( 2471 exp.ExecuteAsProperty, 2472 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2473 or self._parse_string(), 2474 ) 2475 2476 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2477 2478 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2479 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2480 self._match(TokenType.EQ) 2481 2482 user = self._parse_id_var() 2483 self._match(TokenType.PARAMETER) 2484 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2485 2486 if not user or not host: 2487 return None 2488 2489 return exp.DefinerProperty(this=f"{user}@{host}") 2490 2491 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2492 self._match(TokenType.TABLE) 2493 self._match(TokenType.EQ) 2494 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2495 2496 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2497 return self.expression(exp.LogProperty, no=no) 2498 2499 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2500 return self.expression(exp.JournalProperty, **kwargs) 2501 2502 def _parse_checksum(self) -> exp.ChecksumProperty: 2503 self._match(TokenType.EQ) 2504 2505 on = None 2506 if self._match(TokenType.ON): 2507 on = True 2508 elif self._match_text_seq("OFF"): 2509 on = False 2510 2511 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2512 2513 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2514 return self.expression( 2515 exp.Cluster, 2516 expressions=( 2517 self._parse_wrapped_csv(self._parse_ordered) 2518 if wrapped 2519 else self._parse_csv(self._parse_ordered) 2520 ), 2521 ) 2522 2523 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2524 self._match_text_seq("BY") 2525 2526 self._match_l_paren() 2527 expressions = self._parse_csv(self._parse_column) 2528 self._match_r_paren() 2529 2530 if self._match_text_seq("SORTED", "BY"): 2531 self._match_l_paren() 2532 sorted_by = self._parse_csv(self._parse_ordered) 2533 self._match_r_paren() 2534 else: 2535 sorted_by = None 2536 2537 self._match(TokenType.INTO) 2538 buckets = self._parse_number() 2539 self._match_text_seq("BUCKETS") 2540 2541 return self.expression( 2542 exp.ClusteredByProperty, 2543 expressions=expressions, 2544 sorted_by=sorted_by, 2545 buckets=buckets, 2546 ) 2547 2548 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2549 if not self._match_text_seq("GRANTS"): 2550 self._retreat(self._index - 1) 2551 return None 2552 2553 return self.expression(exp.CopyGrantsProperty) 2554 2555 def _parse_freespace(self) -> exp.FreespaceProperty: 2556 self._match(TokenType.EQ) 2557 return self.expression( 2558 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2559 ) 2560 2561 def _parse_mergeblockratio( 2562 self, no: bool = False, default: bool = False 2563 ) -> exp.MergeBlockRatioProperty: 2564 if self._match(TokenType.EQ): 2565 return self.expression( 2566 exp.MergeBlockRatioProperty, 2567 this=self._parse_number(), 2568 percent=self._match(TokenType.PERCENT), 2569 ) 2570 2571 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2572 2573 def _parse_datablocksize( 2574 self, 2575 default: t.Optional[bool] = None, 2576 minimum: t.Optional[bool] = None, 2577 maximum: t.Optional[bool] = None, 2578 ) -> exp.DataBlocksizeProperty: 2579 self._match(TokenType.EQ) 2580 size = self._parse_number() 2581 2582 units = None 2583 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2584 units = self._prev.text 2585 2586 return self.expression( 2587 exp.DataBlocksizeProperty, 2588 size=size, 2589 units=units, 2590 default=default, 2591 minimum=minimum, 2592 maximum=maximum, 2593 ) 2594 2595 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2596 self._match(TokenType.EQ) 2597 always = self._match_text_seq("ALWAYS") 2598 manual = self._match_text_seq("MANUAL") 2599 never = self._match_text_seq("NEVER") 2600 default = self._match_text_seq("DEFAULT") 2601 2602 autotemp = None 2603 if self._match_text_seq("AUTOTEMP"): 2604 autotemp = self._parse_schema() 2605 2606 return self.expression( 2607 exp.BlockCompressionProperty, 2608 always=always, 2609 manual=manual, 2610 never=never, 2611 default=default, 2612 autotemp=autotemp, 2613 ) 2614 2615 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2616 index = self._index 2617 no = self._match_text_seq("NO") 2618 concurrent = self._match_text_seq("CONCURRENT") 2619 2620 if not self._match_text_seq("ISOLATED", "LOADING"): 2621 self._retreat(index) 2622 return None 2623 2624 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2625 return self.expression( 2626 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2627 ) 2628 2629 def _parse_locking(self) -> exp.LockingProperty: 2630 if self._match(TokenType.TABLE): 2631 kind = "TABLE" 2632 elif self._match(TokenType.VIEW): 2633 kind = "VIEW" 2634 elif self._match(TokenType.ROW): 2635 kind = "ROW" 2636 elif self._match_text_seq("DATABASE"): 2637 kind = "DATABASE" 2638 else: 2639 kind = None 2640 2641 if kind in ("DATABASE", "TABLE", "VIEW"): 2642 this = self._parse_table_parts() 2643 else: 2644 this = None 2645 2646 if self._match(TokenType.FOR): 2647 for_or_in = "FOR" 2648 elif self._match(TokenType.IN): 2649 for_or_in = "IN" 2650 else: 2651 for_or_in = None 2652 2653 if self._match_text_seq("ACCESS"): 2654 lock_type = "ACCESS" 2655 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2656 lock_type = "EXCLUSIVE" 2657 elif self._match_text_seq("SHARE"): 2658 lock_type = "SHARE" 2659 elif self._match_text_seq("READ"): 2660 lock_type = "READ" 2661 elif self._match_text_seq("WRITE"): 2662 lock_type = "WRITE" 2663 elif self._match_text_seq("CHECKSUM"): 2664 lock_type = "CHECKSUM" 2665 else: 2666 lock_type = None 2667 2668 override = self._match_text_seq("OVERRIDE") 2669 2670 return self.expression( 2671 exp.LockingProperty, 2672 this=this, 2673 kind=kind, 2674 for_or_in=for_or_in, 2675 lock_type=lock_type, 2676 override=override, 2677 ) 2678 2679 def _parse_partition_by(self) -> t.List[exp.Expression]: 2680 if self._match(TokenType.PARTITION_BY): 2681 return self._parse_csv(self._parse_assignment) 2682 return [] 2683 2684 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2685 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2686 if self._match_text_seq("MINVALUE"): 2687 return exp.var("MINVALUE") 2688 if self._match_text_seq("MAXVALUE"): 2689 return exp.var("MAXVALUE") 2690 return self._parse_bitwise() 2691 2692 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2693 expression = None 2694 from_expressions = None 2695 to_expressions = None 2696 2697 if self._match(TokenType.IN): 2698 this = self._parse_wrapped_csv(self._parse_bitwise) 2699 elif self._match(TokenType.FROM): 2700 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2701 self._match_text_seq("TO") 2702 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2703 elif self._match_text_seq("WITH", "(", "MODULUS"): 2704 this = self._parse_number() 2705 self._match_text_seq(",", "REMAINDER") 2706 expression = self._parse_number() 2707 self._match_r_paren() 2708 else: 2709 self.raise_error("Failed to parse partition bound spec.") 2710 2711 return self.expression( 2712 exp.PartitionBoundSpec, 2713 this=this, 2714 expression=expression, 2715 from_expressions=from_expressions, 2716 to_expressions=to_expressions, 2717 ) 2718 2719 # https://www.postgresql.org/docs/current/sql-createtable.html 2720 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2721 if not self._match_text_seq("OF"): 2722 self._retreat(self._index - 1) 2723 return None 2724 2725 this = self._parse_table(schema=True) 2726 2727 if self._match(TokenType.DEFAULT): 2728 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2729 elif self._match_text_seq("FOR", "VALUES"): 2730 expression = self._parse_partition_bound_spec() 2731 else: 2732 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2733 2734 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2735 2736 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2737 self._match(TokenType.EQ) 2738 return self.expression( 2739 exp.PartitionedByProperty, 2740 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2741 ) 2742 2743 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2744 if self._match_text_seq("AND", "STATISTICS"): 2745 statistics = True 2746 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2747 statistics = False 2748 else: 2749 statistics = None 2750 2751 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2752 2753 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2754 if self._match_text_seq("SQL"): 2755 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2756 return None 2757 2758 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2759 if self._match_text_seq("SQL", "DATA"): 2760 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2761 return None 2762 2763 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2764 if self._match_text_seq("PRIMARY", "INDEX"): 2765 return exp.NoPrimaryIndexProperty() 2766 if self._match_text_seq("SQL"): 2767 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2768 return None 2769 2770 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2771 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2772 return exp.OnCommitProperty() 2773 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2774 return exp.OnCommitProperty(delete=True) 2775 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2776 2777 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2778 if self._match_text_seq("SQL", "DATA"): 2779 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2780 return None 2781 2782 def _parse_distkey(self) -> exp.DistKeyProperty: 2783 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2784 2785 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2786 table = self._parse_table(schema=True) 2787 2788 options = [] 2789 while self._match_texts(("INCLUDING", "EXCLUDING")): 2790 this = self._prev.text.upper() 2791 2792 id_var = self._parse_id_var() 2793 if not id_var: 2794 return None 2795 2796 options.append( 2797 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2798 ) 2799 2800 return self.expression(exp.LikeProperty, this=table, expressions=options) 2801 2802 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2803 return self.expression( 2804 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2805 ) 2806 2807 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2808 self._match(TokenType.EQ) 2809 return self.expression( 2810 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2811 ) 2812 2813 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2814 self._match_text_seq("WITH", "CONNECTION") 2815 return self.expression( 2816 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2817 ) 2818 2819 def _parse_returns(self) -> exp.ReturnsProperty: 2820 value: t.Optional[exp.Expression] 2821 null = None 2822 is_table = self._match(TokenType.TABLE) 2823 2824 if is_table: 2825 if self._match(TokenType.LT): 2826 value = self.expression( 2827 exp.Schema, 2828 this="TABLE", 2829 expressions=self._parse_csv(self._parse_struct_types), 2830 ) 2831 if not self._match(TokenType.GT): 2832 self.raise_error("Expecting >") 2833 else: 2834 value = self._parse_schema(exp.var("TABLE")) 2835 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2836 null = True 2837 value = None 2838 else: 2839 value = self._parse_types() 2840 2841 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2842 2843 def _parse_describe(self) -> exp.Describe: 2844 kind = self._match_set(self.CREATABLES) and self._prev.text 2845 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2846 if self._match(TokenType.DOT): 2847 style = None 2848 self._retreat(self._index - 2) 2849 2850 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2851 2852 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2853 this = self._parse_statement() 2854 else: 2855 this = self._parse_table(schema=True) 2856 2857 properties = self._parse_properties() 2858 expressions = properties.expressions if properties else None 2859 partition = self._parse_partition() 2860 return self.expression( 2861 exp.Describe, 2862 this=this, 2863 style=style, 2864 kind=kind, 2865 expressions=expressions, 2866 partition=partition, 2867 format=format, 2868 ) 2869 2870 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2871 kind = self._prev.text.upper() 2872 expressions = [] 2873 2874 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2875 if self._match(TokenType.WHEN): 2876 expression = self._parse_disjunction() 2877 self._match(TokenType.THEN) 2878 else: 2879 expression = None 2880 2881 else_ = self._match(TokenType.ELSE) 2882 2883 if not self._match(TokenType.INTO): 2884 return None 2885 2886 return self.expression( 2887 exp.ConditionalInsert, 2888 this=self.expression( 2889 exp.Insert, 2890 this=self._parse_table(schema=True), 2891 expression=self._parse_derived_table_values(), 2892 ), 2893 expression=expression, 2894 else_=else_, 2895 ) 2896 2897 expression = parse_conditional_insert() 2898 while expression is not None: 2899 expressions.append(expression) 2900 expression = parse_conditional_insert() 2901 2902 return self.expression( 2903 exp.MultitableInserts, 2904 kind=kind, 2905 comments=comments, 2906 expressions=expressions, 2907 source=self._parse_table(), 2908 ) 2909 2910 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2911 comments = [] 2912 hint = self._parse_hint() 2913 overwrite = self._match(TokenType.OVERWRITE) 2914 ignore = self._match(TokenType.IGNORE) 2915 local = self._match_text_seq("LOCAL") 2916 alternative = None 2917 is_function = None 2918 2919 if self._match_text_seq("DIRECTORY"): 2920 this: t.Optional[exp.Expression] = self.expression( 2921 exp.Directory, 2922 this=self._parse_var_or_string(), 2923 local=local, 2924 row_format=self._parse_row_format(match_row=True), 2925 ) 2926 else: 2927 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2928 comments += ensure_list(self._prev_comments) 2929 return self._parse_multitable_inserts(comments) 2930 2931 if self._match(TokenType.OR): 2932 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2933 2934 self._match(TokenType.INTO) 2935 comments += ensure_list(self._prev_comments) 2936 self._match(TokenType.TABLE) 2937 is_function = self._match(TokenType.FUNCTION) 2938 2939 this = ( 2940 self._parse_table(schema=True, parse_partition=True) 2941 if not is_function 2942 else self._parse_function() 2943 ) 2944 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2945 this.set("alias", self._parse_table_alias()) 2946 2947 returning = self._parse_returning() 2948 2949 return self.expression( 2950 exp.Insert, 2951 comments=comments, 2952 hint=hint, 2953 is_function=is_function, 2954 this=this, 2955 stored=self._match_text_seq("STORED") and self._parse_stored(), 2956 by_name=self._match_text_seq("BY", "NAME"), 2957 exists=self._parse_exists(), 2958 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2959 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2960 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2961 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2962 conflict=self._parse_on_conflict(), 2963 returning=returning or self._parse_returning(), 2964 overwrite=overwrite, 2965 alternative=alternative, 2966 ignore=ignore, 2967 source=self._match(TokenType.TABLE) and self._parse_table(), 2968 ) 2969 2970 def _parse_kill(self) -> exp.Kill: 2971 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2972 2973 return self.expression( 2974 exp.Kill, 2975 this=self._parse_primary(), 2976 kind=kind, 2977 ) 2978 2979 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2980 conflict = self._match_text_seq("ON", "CONFLICT") 2981 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2982 2983 if not conflict and not duplicate: 2984 return None 2985 2986 conflict_keys = None 2987 constraint = None 2988 2989 if conflict: 2990 if self._match_text_seq("ON", "CONSTRAINT"): 2991 constraint = self._parse_id_var() 2992 elif self._match(TokenType.L_PAREN): 2993 conflict_keys = self._parse_csv(self._parse_id_var) 2994 self._match_r_paren() 2995 2996 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2997 if self._prev.token_type == TokenType.UPDATE: 2998 self._match(TokenType.SET) 2999 expressions = self._parse_csv(self._parse_equality) 3000 else: 3001 expressions = None 3002 3003 return self.expression( 3004 exp.OnConflict, 3005 duplicate=duplicate, 3006 expressions=expressions, 3007 action=action, 3008 conflict_keys=conflict_keys, 3009 constraint=constraint, 3010 where=self._parse_where(), 3011 ) 3012 3013 def _parse_returning(self) -> t.Optional[exp.Returning]: 3014 if not self._match(TokenType.RETURNING): 3015 return None 3016 return self.expression( 3017 exp.Returning, 3018 expressions=self._parse_csv(self._parse_expression), 3019 into=self._match(TokenType.INTO) and self._parse_table_part(), 3020 ) 3021 3022 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3023 if not self._match(TokenType.FORMAT): 3024 return None 3025 return self._parse_row_format() 3026 3027 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3028 index = self._index 3029 with_ = with_ or self._match_text_seq("WITH") 3030 3031 if not self._match(TokenType.SERDE_PROPERTIES): 3032 self._retreat(index) 3033 return None 3034 return self.expression( 3035 exp.SerdeProperties, 3036 **{ # type: ignore 3037 "expressions": self._parse_wrapped_properties(), 3038 "with": with_, 3039 }, 3040 ) 3041 3042 def _parse_row_format( 3043 self, match_row: bool = False 3044 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3045 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3046 return None 3047 3048 if self._match_text_seq("SERDE"): 3049 this = self._parse_string() 3050 3051 serde_properties = self._parse_serde_properties() 3052 3053 return self.expression( 3054 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3055 ) 3056 3057 self._match_text_seq("DELIMITED") 3058 3059 kwargs = {} 3060 3061 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3062 kwargs["fields"] = self._parse_string() 3063 if self._match_text_seq("ESCAPED", "BY"): 3064 kwargs["escaped"] = self._parse_string() 3065 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3066 kwargs["collection_items"] = self._parse_string() 3067 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3068 kwargs["map_keys"] = self._parse_string() 3069 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3070 kwargs["lines"] = self._parse_string() 3071 if self._match_text_seq("NULL", "DEFINED", "AS"): 3072 kwargs["null"] = self._parse_string() 3073 3074 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3075 3076 def _parse_load(self) -> exp.LoadData | exp.Command: 3077 if self._match_text_seq("DATA"): 3078 local = self._match_text_seq("LOCAL") 3079 self._match_text_seq("INPATH") 3080 inpath = self._parse_string() 3081 overwrite = self._match(TokenType.OVERWRITE) 3082 self._match_pair(TokenType.INTO, TokenType.TABLE) 3083 3084 return self.expression( 3085 exp.LoadData, 3086 this=self._parse_table(schema=True), 3087 local=local, 3088 overwrite=overwrite, 3089 inpath=inpath, 3090 partition=self._parse_partition(), 3091 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3092 serde=self._match_text_seq("SERDE") and self._parse_string(), 3093 ) 3094 return self._parse_as_command(self._prev) 3095 3096 def _parse_delete(self) -> exp.Delete: 3097 # This handles MySQL's "Multiple-Table Syntax" 3098 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3099 tables = None 3100 if not self._match(TokenType.FROM, advance=False): 3101 tables = self._parse_csv(self._parse_table) or None 3102 3103 returning = self._parse_returning() 3104 3105 return self.expression( 3106 exp.Delete, 3107 tables=tables, 3108 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3109 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3110 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3111 where=self._parse_where(), 3112 returning=returning or self._parse_returning(), 3113 limit=self._parse_limit(), 3114 ) 3115 3116 def _parse_update(self) -> exp.Update: 3117 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3118 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3119 returning = self._parse_returning() 3120 return self.expression( 3121 exp.Update, 3122 **{ # type: ignore 3123 "this": this, 3124 "expressions": expressions, 3125 "from": self._parse_from(joins=True), 3126 "where": self._parse_where(), 3127 "returning": returning or self._parse_returning(), 3128 "order": self._parse_order(), 3129 "limit": self._parse_limit(), 3130 }, 3131 ) 3132 3133 def _parse_use(self) -> exp.Use: 3134 return self.expression( 3135 exp.Use, 3136 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3137 this=self._parse_table(schema=False), 3138 ) 3139 3140 def _parse_uncache(self) -> exp.Uncache: 3141 if not self._match(TokenType.TABLE): 3142 self.raise_error("Expecting TABLE after UNCACHE") 3143 3144 return self.expression( 3145 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3146 ) 3147 3148 def _parse_cache(self) -> exp.Cache: 3149 lazy = self._match_text_seq("LAZY") 3150 self._match(TokenType.TABLE) 3151 table = self._parse_table(schema=True) 3152 3153 options = [] 3154 if self._match_text_seq("OPTIONS"): 3155 self._match_l_paren() 3156 k = self._parse_string() 3157 self._match(TokenType.EQ) 3158 v = self._parse_string() 3159 options = [k, v] 3160 self._match_r_paren() 3161 3162 self._match(TokenType.ALIAS) 3163 return self.expression( 3164 exp.Cache, 3165 this=table, 3166 lazy=lazy, 3167 options=options, 3168 expression=self._parse_select(nested=True), 3169 ) 3170 3171 def _parse_partition(self) -> t.Optional[exp.Partition]: 3172 if not self._match_texts(self.PARTITION_KEYWORDS): 3173 return None 3174 3175 return self.expression( 3176 exp.Partition, 3177 subpartition=self._prev.text.upper() == "SUBPARTITION", 3178 expressions=self._parse_wrapped_csv(self._parse_assignment), 3179 ) 3180 3181 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3182 def _parse_value_expression() -> t.Optional[exp.Expression]: 3183 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3184 return exp.var(self._prev.text.upper()) 3185 return self._parse_expression() 3186 3187 if self._match(TokenType.L_PAREN): 3188 expressions = self._parse_csv(_parse_value_expression) 3189 self._match_r_paren() 3190 return self.expression(exp.Tuple, expressions=expressions) 3191 3192 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3193 expression = self._parse_expression() 3194 if expression: 3195 return self.expression(exp.Tuple, expressions=[expression]) 3196 return None 3197 3198 def _parse_projections(self) -> t.List[exp.Expression]: 3199 return self._parse_expressions() 3200 3201 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3202 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3203 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3204 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3205 ) 3206 elif self._match(TokenType.FROM): 3207 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3208 # Support parentheses for duckdb FROM-first syntax 3209 select = self._parse_select() 3210 if select: 3211 select.set("from", from_) 3212 this = select 3213 else: 3214 this = exp.select("*").from_(t.cast(exp.From, from_)) 3215 else: 3216 this = ( 3217 self._parse_table(consume_pipe=True) 3218 if table 3219 else self._parse_select(nested=True, parse_set_operation=False) 3220 ) 3221 3222 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3223 # in case a modifier (e.g. join) is following 3224 if table and isinstance(this, exp.Values) and this.alias: 3225 alias = this.args["alias"].pop() 3226 this = exp.Table(this=this, alias=alias) 3227 3228 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3229 3230 return this 3231 3232 def _parse_select( 3233 self, 3234 nested: bool = False, 3235 table: bool = False, 3236 parse_subquery_alias: bool = True, 3237 parse_set_operation: bool = True, 3238 consume_pipe: bool = True, 3239 ) -> t.Optional[exp.Expression]: 3240 query = self._parse_select_query( 3241 nested=nested, 3242 table=table, 3243 parse_subquery_alias=parse_subquery_alias, 3244 parse_set_operation=parse_set_operation, 3245 ) 3246 3247 if ( 3248 consume_pipe 3249 and self._match(TokenType.PIPE_GT, advance=False) 3250 and isinstance(query, exp.Query) 3251 ): 3252 query = self._parse_pipe_syntax_query(query) 3253 query = query.subquery(copy=False) if query and table else query 3254 3255 return query 3256 3257 def _parse_select_query( 3258 self, 3259 nested: bool = False, 3260 table: bool = False, 3261 parse_subquery_alias: bool = True, 3262 parse_set_operation: bool = True, 3263 ) -> t.Optional[exp.Expression]: 3264 cte = self._parse_with() 3265 3266 if cte: 3267 this = self._parse_statement() 3268 3269 if not this: 3270 self.raise_error("Failed to parse any statement following CTE") 3271 return cte 3272 3273 if "with" in this.arg_types: 3274 this.set("with", cte) 3275 else: 3276 self.raise_error(f"{this.key} does not support CTE") 3277 this = cte 3278 3279 return this 3280 3281 # duckdb supports leading with FROM x 3282 from_ = ( 3283 self._parse_from(consume_pipe=True) 3284 if self._match(TokenType.FROM, advance=False) 3285 else None 3286 ) 3287 3288 if self._match(TokenType.SELECT): 3289 comments = self._prev_comments 3290 3291 hint = self._parse_hint() 3292 3293 if self._next and not self._next.token_type == TokenType.DOT: 3294 all_ = self._match(TokenType.ALL) 3295 distinct = self._match_set(self.DISTINCT_TOKENS) 3296 else: 3297 all_, distinct = None, None 3298 3299 kind = ( 3300 self._match(TokenType.ALIAS) 3301 and self._match_texts(("STRUCT", "VALUE")) 3302 and self._prev.text.upper() 3303 ) 3304 3305 if distinct: 3306 distinct = self.expression( 3307 exp.Distinct, 3308 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3309 ) 3310 3311 if all_ and distinct: 3312 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3313 3314 operation_modifiers = [] 3315 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3316 operation_modifiers.append(exp.var(self._prev.text.upper())) 3317 3318 limit = self._parse_limit(top=True) 3319 projections = self._parse_projections() 3320 3321 this = self.expression( 3322 exp.Select, 3323 kind=kind, 3324 hint=hint, 3325 distinct=distinct, 3326 expressions=projections, 3327 limit=limit, 3328 operation_modifiers=operation_modifiers or None, 3329 ) 3330 this.comments = comments 3331 3332 into = self._parse_into() 3333 if into: 3334 this.set("into", into) 3335 3336 if not from_: 3337 from_ = self._parse_from() 3338 3339 if from_: 3340 this.set("from", from_) 3341 3342 this = self._parse_query_modifiers(this) 3343 elif (table or nested) and self._match(TokenType.L_PAREN): 3344 this = self._parse_wrapped_select(table=table) 3345 3346 # We return early here so that the UNION isn't attached to the subquery by the 3347 # following call to _parse_set_operations, but instead becomes the parent node 3348 self._match_r_paren() 3349 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3350 elif self._match(TokenType.VALUES, advance=False): 3351 this = self._parse_derived_table_values() 3352 elif from_: 3353 this = exp.select("*").from_(from_.this, copy=False) 3354 elif self._match(TokenType.SUMMARIZE): 3355 table = self._match(TokenType.TABLE) 3356 this = self._parse_select() or self._parse_string() or self._parse_table() 3357 return self.expression(exp.Summarize, this=this, table=table) 3358 elif self._match(TokenType.DESCRIBE): 3359 this = self._parse_describe() 3360 elif self._match_text_seq("STREAM"): 3361 this = self._parse_function() 3362 if this: 3363 this = self.expression(exp.Stream, this=this) 3364 else: 3365 self._retreat(self._index - 1) 3366 else: 3367 this = None 3368 3369 return self._parse_set_operations(this) if parse_set_operation else this 3370 3371 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3372 self._match_text_seq("SEARCH") 3373 3374 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3375 3376 if not kind: 3377 return None 3378 3379 self._match_text_seq("FIRST", "BY") 3380 3381 return self.expression( 3382 exp.RecursiveWithSearch, 3383 kind=kind, 3384 this=self._parse_id_var(), 3385 expression=self._match_text_seq("SET") and self._parse_id_var(), 3386 using=self._match_text_seq("USING") and self._parse_id_var(), 3387 ) 3388 3389 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3390 if not skip_with_token and not self._match(TokenType.WITH): 3391 return None 3392 3393 comments = self._prev_comments 3394 recursive = self._match(TokenType.RECURSIVE) 3395 3396 last_comments = None 3397 expressions = [] 3398 while True: 3399 cte = self._parse_cte() 3400 if isinstance(cte, exp.CTE): 3401 expressions.append(cte) 3402 if last_comments: 3403 cte.add_comments(last_comments) 3404 3405 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3406 break 3407 else: 3408 self._match(TokenType.WITH) 3409 3410 last_comments = self._prev_comments 3411 3412 return self.expression( 3413 exp.With, 3414 comments=comments, 3415 expressions=expressions, 3416 recursive=recursive, 3417 search=self._parse_recursive_with_search(), 3418 ) 3419 3420 def _parse_cte(self) -> t.Optional[exp.CTE]: 3421 index = self._index 3422 3423 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3424 if not alias or not alias.this: 3425 self.raise_error("Expected CTE to have alias") 3426 3427 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3428 self._retreat(index) 3429 return None 3430 3431 comments = self._prev_comments 3432 3433 if self._match_text_seq("NOT", "MATERIALIZED"): 3434 materialized = False 3435 elif self._match_text_seq("MATERIALIZED"): 3436 materialized = True 3437 else: 3438 materialized = None 3439 3440 cte = self.expression( 3441 exp.CTE, 3442 this=self._parse_wrapped(self._parse_statement), 3443 alias=alias, 3444 materialized=materialized, 3445 comments=comments, 3446 ) 3447 3448 values = cte.this 3449 if isinstance(values, exp.Values): 3450 if values.alias: 3451 cte.set("this", exp.select("*").from_(values)) 3452 else: 3453 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3454 3455 return cte 3456 3457 def _parse_table_alias( 3458 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3459 ) -> t.Optional[exp.TableAlias]: 3460 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3461 # so this section tries to parse the clause version and if it fails, it treats the token 3462 # as an identifier (alias) 3463 if self._can_parse_limit_or_offset(): 3464 return None 3465 3466 any_token = self._match(TokenType.ALIAS) 3467 alias = ( 3468 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3469 or self._parse_string_as_identifier() 3470 ) 3471 3472 index = self._index 3473 if self._match(TokenType.L_PAREN): 3474 columns = self._parse_csv(self._parse_function_parameter) 3475 self._match_r_paren() if columns else self._retreat(index) 3476 else: 3477 columns = None 3478 3479 if not alias and not columns: 3480 return None 3481 3482 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3483 3484 # We bubble up comments from the Identifier to the TableAlias 3485 if isinstance(alias, exp.Identifier): 3486 table_alias.add_comments(alias.pop_comments()) 3487 3488 return table_alias 3489 3490 def _parse_subquery( 3491 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3492 ) -> t.Optional[exp.Subquery]: 3493 if not this: 3494 return None 3495 3496 return self.expression( 3497 exp.Subquery, 3498 this=this, 3499 pivots=self._parse_pivots(), 3500 alias=self._parse_table_alias() if parse_alias else None, 3501 sample=self._parse_table_sample(), 3502 ) 3503 3504 def _implicit_unnests_to_explicit(self, this: E) -> E: 3505 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3506 3507 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3508 for i, join in enumerate(this.args.get("joins") or []): 3509 table = join.this 3510 normalized_table = table.copy() 3511 normalized_table.meta["maybe_column"] = True 3512 normalized_table = _norm(normalized_table, dialect=self.dialect) 3513 3514 if isinstance(table, exp.Table) and not join.args.get("on"): 3515 if normalized_table.parts[0].name in refs: 3516 table_as_column = table.to_column() 3517 unnest = exp.Unnest(expressions=[table_as_column]) 3518 3519 # Table.to_column creates a parent Alias node that we want to convert to 3520 # a TableAlias and attach to the Unnest, so it matches the parser's output 3521 if isinstance(table.args.get("alias"), exp.TableAlias): 3522 table_as_column.replace(table_as_column.this) 3523 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3524 3525 table.replace(unnest) 3526 3527 refs.add(normalized_table.alias_or_name) 3528 3529 return this 3530 3531 def _parse_query_modifiers( 3532 self, this: t.Optional[exp.Expression] 3533 ) -> t.Optional[exp.Expression]: 3534 if isinstance(this, self.MODIFIABLES): 3535 for join in self._parse_joins(): 3536 this.append("joins", join) 3537 for lateral in iter(self._parse_lateral, None): 3538 this.append("laterals", lateral) 3539 3540 while True: 3541 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3542 modifier_token = self._curr 3543 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3544 key, expression = parser(self) 3545 3546 if expression: 3547 if this.args.get(key): 3548 self.raise_error( 3549 f"Found multiple '{modifier_token.text.upper()}' clauses", 3550 token=modifier_token, 3551 ) 3552 3553 this.set(key, expression) 3554 if key == "limit": 3555 offset = expression.args.pop("offset", None) 3556 3557 if offset: 3558 offset = exp.Offset(expression=offset) 3559 this.set("offset", offset) 3560 3561 limit_by_expressions = expression.expressions 3562 expression.set("expressions", None) 3563 offset.set("expressions", limit_by_expressions) 3564 continue 3565 break 3566 3567 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3568 this = self._implicit_unnests_to_explicit(this) 3569 3570 return this 3571 3572 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3573 start = self._curr 3574 while self._curr: 3575 self._advance() 3576 3577 end = self._tokens[self._index - 1] 3578 return exp.Hint(expressions=[self._find_sql(start, end)]) 3579 3580 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3581 return self._parse_function_call() 3582 3583 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3584 start_index = self._index 3585 should_fallback_to_string = False 3586 3587 hints = [] 3588 try: 3589 for hint in iter( 3590 lambda: self._parse_csv( 3591 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3592 ), 3593 [], 3594 ): 3595 hints.extend(hint) 3596 except ParseError: 3597 should_fallback_to_string = True 3598 3599 if should_fallback_to_string or self._curr: 3600 self._retreat(start_index) 3601 return self._parse_hint_fallback_to_string() 3602 3603 return self.expression(exp.Hint, expressions=hints) 3604 3605 def _parse_hint(self) -> t.Optional[exp.Hint]: 3606 if self._match(TokenType.HINT) and self._prev_comments: 3607 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3608 3609 return None 3610 3611 def _parse_into(self) -> t.Optional[exp.Into]: 3612 if not self._match(TokenType.INTO): 3613 return None 3614 3615 temp = self._match(TokenType.TEMPORARY) 3616 unlogged = self._match_text_seq("UNLOGGED") 3617 self._match(TokenType.TABLE) 3618 3619 return self.expression( 3620 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3621 ) 3622 3623 def _parse_from( 3624 self, 3625 joins: bool = False, 3626 skip_from_token: bool = False, 3627 consume_pipe: bool = False, 3628 ) -> t.Optional[exp.From]: 3629 if not skip_from_token and not self._match(TokenType.FROM): 3630 return None 3631 3632 return self.expression( 3633 exp.From, 3634 comments=self._prev_comments, 3635 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3636 ) 3637 3638 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3639 return self.expression( 3640 exp.MatchRecognizeMeasure, 3641 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3642 this=self._parse_expression(), 3643 ) 3644 3645 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3646 if not self._match(TokenType.MATCH_RECOGNIZE): 3647 return None 3648 3649 self._match_l_paren() 3650 3651 partition = self._parse_partition_by() 3652 order = self._parse_order() 3653 3654 measures = ( 3655 self._parse_csv(self._parse_match_recognize_measure) 3656 if self._match_text_seq("MEASURES") 3657 else None 3658 ) 3659 3660 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3661 rows = exp.var("ONE ROW PER MATCH") 3662 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3663 text = "ALL ROWS PER MATCH" 3664 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3665 text += " SHOW EMPTY MATCHES" 3666 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3667 text += " OMIT EMPTY MATCHES" 3668 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3669 text += " WITH UNMATCHED ROWS" 3670 rows = exp.var(text) 3671 else: 3672 rows = None 3673 3674 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3675 text = "AFTER MATCH SKIP" 3676 if self._match_text_seq("PAST", "LAST", "ROW"): 3677 text += " PAST LAST ROW" 3678 elif self._match_text_seq("TO", "NEXT", "ROW"): 3679 text += " TO NEXT ROW" 3680 elif self._match_text_seq("TO", "FIRST"): 3681 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3682 elif self._match_text_seq("TO", "LAST"): 3683 text += f" TO LAST {self._advance_any().text}" # type: ignore 3684 after = exp.var(text) 3685 else: 3686 after = None 3687 3688 if self._match_text_seq("PATTERN"): 3689 self._match_l_paren() 3690 3691 if not self._curr: 3692 self.raise_error("Expecting )", self._curr) 3693 3694 paren = 1 3695 start = self._curr 3696 3697 while self._curr and paren > 0: 3698 if self._curr.token_type == TokenType.L_PAREN: 3699 paren += 1 3700 if self._curr.token_type == TokenType.R_PAREN: 3701 paren -= 1 3702 3703 end = self._prev 3704 self._advance() 3705 3706 if paren > 0: 3707 self.raise_error("Expecting )", self._curr) 3708 3709 pattern = exp.var(self._find_sql(start, end)) 3710 else: 3711 pattern = None 3712 3713 define = ( 3714 self._parse_csv(self._parse_name_as_expression) 3715 if self._match_text_seq("DEFINE") 3716 else None 3717 ) 3718 3719 self._match_r_paren() 3720 3721 return self.expression( 3722 exp.MatchRecognize, 3723 partition_by=partition, 3724 order=order, 3725 measures=measures, 3726 rows=rows, 3727 after=after, 3728 pattern=pattern, 3729 define=define, 3730 alias=self._parse_table_alias(), 3731 ) 3732 3733 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3734 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3735 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3736 cross_apply = False 3737 3738 if cross_apply is not None: 3739 this = self._parse_select(table=True) 3740 view = None 3741 outer = None 3742 elif self._match(TokenType.LATERAL): 3743 this = self._parse_select(table=True) 3744 view = self._match(TokenType.VIEW) 3745 outer = self._match(TokenType.OUTER) 3746 else: 3747 return None 3748 3749 if not this: 3750 this = ( 3751 self._parse_unnest() 3752 or self._parse_function() 3753 or self._parse_id_var(any_token=False) 3754 ) 3755 3756 while self._match(TokenType.DOT): 3757 this = exp.Dot( 3758 this=this, 3759 expression=self._parse_function() or self._parse_id_var(any_token=False), 3760 ) 3761 3762 ordinality: t.Optional[bool] = None 3763 3764 if view: 3765 table = self._parse_id_var(any_token=False) 3766 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3767 table_alias: t.Optional[exp.TableAlias] = self.expression( 3768 exp.TableAlias, this=table, columns=columns 3769 ) 3770 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3771 # We move the alias from the lateral's child node to the lateral itself 3772 table_alias = this.args["alias"].pop() 3773 else: 3774 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3775 table_alias = self._parse_table_alias() 3776 3777 return self.expression( 3778 exp.Lateral, 3779 this=this, 3780 view=view, 3781 outer=outer, 3782 alias=table_alias, 3783 cross_apply=cross_apply, 3784 ordinality=ordinality, 3785 ) 3786 3787 def _parse_join_parts( 3788 self, 3789 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3790 return ( 3791 self._match_set(self.JOIN_METHODS) and self._prev, 3792 self._match_set(self.JOIN_SIDES) and self._prev, 3793 self._match_set(self.JOIN_KINDS) and self._prev, 3794 ) 3795 3796 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3797 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3798 this = self._parse_column() 3799 if isinstance(this, exp.Column): 3800 return this.this 3801 return this 3802 3803 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3804 3805 def _parse_join( 3806 self, skip_join_token: bool = False, parse_bracket: bool = False 3807 ) -> t.Optional[exp.Join]: 3808 if self._match(TokenType.COMMA): 3809 table = self._try_parse(self._parse_table) 3810 cross_join = self.expression(exp.Join, this=table) if table else None 3811 3812 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3813 cross_join.set("kind", "CROSS") 3814 3815 return cross_join 3816 3817 index = self._index 3818 method, side, kind = self._parse_join_parts() 3819 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3820 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3821 join_comments = self._prev_comments 3822 3823 if not skip_join_token and not join: 3824 self._retreat(index) 3825 kind = None 3826 method = None 3827 side = None 3828 3829 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3830 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3831 3832 if not skip_join_token and not join and not outer_apply and not cross_apply: 3833 return None 3834 3835 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3836 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3837 kwargs["expressions"] = self._parse_csv( 3838 lambda: self._parse_table(parse_bracket=parse_bracket) 3839 ) 3840 3841 if method: 3842 kwargs["method"] = method.text 3843 if side: 3844 kwargs["side"] = side.text 3845 if kind: 3846 kwargs["kind"] = kind.text 3847 if hint: 3848 kwargs["hint"] = hint 3849 3850 if self._match(TokenType.MATCH_CONDITION): 3851 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3852 3853 if self._match(TokenType.ON): 3854 kwargs["on"] = self._parse_assignment() 3855 elif self._match(TokenType.USING): 3856 kwargs["using"] = self._parse_using_identifiers() 3857 elif ( 3858 not method 3859 and not (outer_apply or cross_apply) 3860 and not isinstance(kwargs["this"], exp.Unnest) 3861 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3862 ): 3863 index = self._index 3864 joins: t.Optional[list] = list(self._parse_joins()) 3865 3866 if joins and self._match(TokenType.ON): 3867 kwargs["on"] = self._parse_assignment() 3868 elif joins and self._match(TokenType.USING): 3869 kwargs["using"] = self._parse_using_identifiers() 3870 else: 3871 joins = None 3872 self._retreat(index) 3873 3874 kwargs["this"].set("joins", joins if joins else None) 3875 3876 kwargs["pivots"] = self._parse_pivots() 3877 3878 comments = [c for token in (method, side, kind) if token for c in token.comments] 3879 comments = (join_comments or []) + comments 3880 return self.expression(exp.Join, comments=comments, **kwargs) 3881 3882 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3883 this = self._parse_assignment() 3884 3885 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3886 return this 3887 3888 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3889 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3890 3891 return this 3892 3893 def _parse_index_params(self) -> exp.IndexParameters: 3894 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3895 3896 if self._match(TokenType.L_PAREN, advance=False): 3897 columns = self._parse_wrapped_csv(self._parse_with_operator) 3898 else: 3899 columns = None 3900 3901 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3902 partition_by = self._parse_partition_by() 3903 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3904 tablespace = ( 3905 self._parse_var(any_token=True) 3906 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3907 else None 3908 ) 3909 where = self._parse_where() 3910 3911 on = self._parse_field() if self._match(TokenType.ON) else None 3912 3913 return self.expression( 3914 exp.IndexParameters, 3915 using=using, 3916 columns=columns, 3917 include=include, 3918 partition_by=partition_by, 3919 where=where, 3920 with_storage=with_storage, 3921 tablespace=tablespace, 3922 on=on, 3923 ) 3924 3925 def _parse_index( 3926 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3927 ) -> t.Optional[exp.Index]: 3928 if index or anonymous: 3929 unique = None 3930 primary = None 3931 amp = None 3932 3933 self._match(TokenType.ON) 3934 self._match(TokenType.TABLE) # hive 3935 table = self._parse_table_parts(schema=True) 3936 else: 3937 unique = self._match(TokenType.UNIQUE) 3938 primary = self._match_text_seq("PRIMARY") 3939 amp = self._match_text_seq("AMP") 3940 3941 if not self._match(TokenType.INDEX): 3942 return None 3943 3944 index = self._parse_id_var() 3945 table = None 3946 3947 params = self._parse_index_params() 3948 3949 return self.expression( 3950 exp.Index, 3951 this=index, 3952 table=table, 3953 unique=unique, 3954 primary=primary, 3955 amp=amp, 3956 params=params, 3957 ) 3958 3959 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3960 hints: t.List[exp.Expression] = [] 3961 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3962 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3963 hints.append( 3964 self.expression( 3965 exp.WithTableHint, 3966 expressions=self._parse_csv( 3967 lambda: self._parse_function() or self._parse_var(any_token=True) 3968 ), 3969 ) 3970 ) 3971 self._match_r_paren() 3972 else: 3973 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3974 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3975 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3976 3977 self._match_set((TokenType.INDEX, TokenType.KEY)) 3978 if self._match(TokenType.FOR): 3979 hint.set("target", self._advance_any() and self._prev.text.upper()) 3980 3981 hint.set("expressions", self._parse_wrapped_id_vars()) 3982 hints.append(hint) 3983 3984 return hints or None 3985 3986 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3987 return ( 3988 (not schema and self._parse_function(optional_parens=False)) 3989 or self._parse_id_var(any_token=False) 3990 or self._parse_string_as_identifier() 3991 or self._parse_placeholder() 3992 ) 3993 3994 def _parse_table_parts( 3995 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3996 ) -> exp.Table: 3997 catalog = None 3998 db = None 3999 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 4000 4001 while self._match(TokenType.DOT): 4002 if catalog: 4003 # This allows nesting the table in arbitrarily many dot expressions if needed 4004 table = self.expression( 4005 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4006 ) 4007 else: 4008 catalog = db 4009 db = table 4010 # "" used for tsql FROM a..b case 4011 table = self._parse_table_part(schema=schema) or "" 4012 4013 if ( 4014 wildcard 4015 and self._is_connected() 4016 and (isinstance(table, exp.Identifier) or not table) 4017 and self._match(TokenType.STAR) 4018 ): 4019 if isinstance(table, exp.Identifier): 4020 table.args["this"] += "*" 4021 else: 4022 table = exp.Identifier(this="*") 4023 4024 # We bubble up comments from the Identifier to the Table 4025 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4026 4027 if is_db_reference: 4028 catalog = db 4029 db = table 4030 table = None 4031 4032 if not table and not is_db_reference: 4033 self.raise_error(f"Expected table name but got {self._curr}") 4034 if not db and is_db_reference: 4035 self.raise_error(f"Expected database name but got {self._curr}") 4036 4037 table = self.expression( 4038 exp.Table, 4039 comments=comments, 4040 this=table, 4041 db=db, 4042 catalog=catalog, 4043 ) 4044 4045 changes = self._parse_changes() 4046 if changes: 4047 table.set("changes", changes) 4048 4049 at_before = self._parse_historical_data() 4050 if at_before: 4051 table.set("when", at_before) 4052 4053 pivots = self._parse_pivots() 4054 if pivots: 4055 table.set("pivots", pivots) 4056 4057 return table 4058 4059 def _parse_table( 4060 self, 4061 schema: bool = False, 4062 joins: bool = False, 4063 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4064 parse_bracket: bool = False, 4065 is_db_reference: bool = False, 4066 parse_partition: bool = False, 4067 consume_pipe: bool = False, 4068 ) -> t.Optional[exp.Expression]: 4069 lateral = self._parse_lateral() 4070 if lateral: 4071 return lateral 4072 4073 unnest = self._parse_unnest() 4074 if unnest: 4075 return unnest 4076 4077 values = self._parse_derived_table_values() 4078 if values: 4079 return values 4080 4081 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4082 if subquery: 4083 if not subquery.args.get("pivots"): 4084 subquery.set("pivots", self._parse_pivots()) 4085 return subquery 4086 4087 bracket = parse_bracket and self._parse_bracket(None) 4088 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4089 4090 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4091 self._parse_table 4092 ) 4093 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4094 4095 only = self._match(TokenType.ONLY) 4096 4097 this = t.cast( 4098 exp.Expression, 4099 bracket 4100 or rows_from 4101 or self._parse_bracket( 4102 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4103 ), 4104 ) 4105 4106 if only: 4107 this.set("only", only) 4108 4109 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4110 self._match_text_seq("*") 4111 4112 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4113 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4114 this.set("partition", self._parse_partition()) 4115 4116 if schema: 4117 return self._parse_schema(this=this) 4118 4119 version = self._parse_version() 4120 4121 if version: 4122 this.set("version", version) 4123 4124 if self.dialect.ALIAS_POST_TABLESAMPLE: 4125 this.set("sample", self._parse_table_sample()) 4126 4127 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4128 if alias: 4129 this.set("alias", alias) 4130 4131 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4132 return self.expression( 4133 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4134 ) 4135 4136 this.set("hints", self._parse_table_hints()) 4137 4138 if not this.args.get("pivots"): 4139 this.set("pivots", self._parse_pivots()) 4140 4141 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4142 this.set("sample", self._parse_table_sample()) 4143 4144 if joins: 4145 for join in self._parse_joins(): 4146 this.append("joins", join) 4147 4148 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4149 this.set("ordinality", True) 4150 this.set("alias", self._parse_table_alias()) 4151 4152 return this 4153 4154 def _parse_version(self) -> t.Optional[exp.Version]: 4155 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4156 this = "TIMESTAMP" 4157 elif self._match(TokenType.VERSION_SNAPSHOT): 4158 this = "VERSION" 4159 else: 4160 return None 4161 4162 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4163 kind = self._prev.text.upper() 4164 start = self._parse_bitwise() 4165 self._match_texts(("TO", "AND")) 4166 end = self._parse_bitwise() 4167 expression: t.Optional[exp.Expression] = self.expression( 4168 exp.Tuple, expressions=[start, end] 4169 ) 4170 elif self._match_text_seq("CONTAINED", "IN"): 4171 kind = "CONTAINED IN" 4172 expression = self.expression( 4173 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4174 ) 4175 elif self._match(TokenType.ALL): 4176 kind = "ALL" 4177 expression = None 4178 else: 4179 self._match_text_seq("AS", "OF") 4180 kind = "AS OF" 4181 expression = self._parse_type() 4182 4183 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4184 4185 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4186 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4187 index = self._index 4188 historical_data = None 4189 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4190 this = self._prev.text.upper() 4191 kind = ( 4192 self._match(TokenType.L_PAREN) 4193 and self._match_texts(self.HISTORICAL_DATA_KIND) 4194 and self._prev.text.upper() 4195 ) 4196 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4197 4198 if expression: 4199 self._match_r_paren() 4200 historical_data = self.expression( 4201 exp.HistoricalData, this=this, kind=kind, expression=expression 4202 ) 4203 else: 4204 self._retreat(index) 4205 4206 return historical_data 4207 4208 def _parse_changes(self) -> t.Optional[exp.Changes]: 4209 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4210 return None 4211 4212 information = self._parse_var(any_token=True) 4213 self._match_r_paren() 4214 4215 return self.expression( 4216 exp.Changes, 4217 information=information, 4218 at_before=self._parse_historical_data(), 4219 end=self._parse_historical_data(), 4220 ) 4221 4222 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4223 if not self._match(TokenType.UNNEST): 4224 return None 4225 4226 expressions = self._parse_wrapped_csv(self._parse_equality) 4227 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4228 4229 alias = self._parse_table_alias() if with_alias else None 4230 4231 if alias: 4232 if self.dialect.UNNEST_COLUMN_ONLY: 4233 if alias.args.get("columns"): 4234 self.raise_error("Unexpected extra column alias in unnest.") 4235 4236 alias.set("columns", [alias.this]) 4237 alias.set("this", None) 4238 4239 columns = alias.args.get("columns") or [] 4240 if offset and len(expressions) < len(columns): 4241 offset = columns.pop() 4242 4243 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4244 self._match(TokenType.ALIAS) 4245 offset = self._parse_id_var( 4246 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4247 ) or exp.to_identifier("offset") 4248 4249 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4250 4251 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4252 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4253 if not is_derived and not ( 4254 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4255 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4256 ): 4257 return None 4258 4259 expressions = self._parse_csv(self._parse_value) 4260 alias = self._parse_table_alias() 4261 4262 if is_derived: 4263 self._match_r_paren() 4264 4265 return self.expression( 4266 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4267 ) 4268 4269 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4270 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4271 as_modifier and self._match_text_seq("USING", "SAMPLE") 4272 ): 4273 return None 4274 4275 bucket_numerator = None 4276 bucket_denominator = None 4277 bucket_field = None 4278 percent = None 4279 size = None 4280 seed = None 4281 4282 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4283 matched_l_paren = self._match(TokenType.L_PAREN) 4284 4285 if self.TABLESAMPLE_CSV: 4286 num = None 4287 expressions = self._parse_csv(self._parse_primary) 4288 else: 4289 expressions = None 4290 num = ( 4291 self._parse_factor() 4292 if self._match(TokenType.NUMBER, advance=False) 4293 else self._parse_primary() or self._parse_placeholder() 4294 ) 4295 4296 if self._match_text_seq("BUCKET"): 4297 bucket_numerator = self._parse_number() 4298 self._match_text_seq("OUT", "OF") 4299 bucket_denominator = bucket_denominator = self._parse_number() 4300 self._match(TokenType.ON) 4301 bucket_field = self._parse_field() 4302 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4303 percent = num 4304 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4305 size = num 4306 else: 4307 percent = num 4308 4309 if matched_l_paren: 4310 self._match_r_paren() 4311 4312 if self._match(TokenType.L_PAREN): 4313 method = self._parse_var(upper=True) 4314 seed = self._match(TokenType.COMMA) and self._parse_number() 4315 self._match_r_paren() 4316 elif self._match_texts(("SEED", "REPEATABLE")): 4317 seed = self._parse_wrapped(self._parse_number) 4318 4319 if not method and self.DEFAULT_SAMPLING_METHOD: 4320 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4321 4322 return self.expression( 4323 exp.TableSample, 4324 expressions=expressions, 4325 method=method, 4326 bucket_numerator=bucket_numerator, 4327 bucket_denominator=bucket_denominator, 4328 bucket_field=bucket_field, 4329 percent=percent, 4330 size=size, 4331 seed=seed, 4332 ) 4333 4334 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4335 return list(iter(self._parse_pivot, None)) or None 4336 4337 def _parse_joins(self) -> t.Iterator[exp.Join]: 4338 return iter(self._parse_join, None) 4339 4340 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4341 if not self._match(TokenType.INTO): 4342 return None 4343 4344 return self.expression( 4345 exp.UnpivotColumns, 4346 this=self._match_text_seq("NAME") and self._parse_column(), 4347 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4348 ) 4349 4350 # https://duckdb.org/docs/sql/statements/pivot 4351 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4352 def _parse_on() -> t.Optional[exp.Expression]: 4353 this = self._parse_bitwise() 4354 4355 if self._match(TokenType.IN): 4356 # PIVOT ... ON col IN (row_val1, row_val2) 4357 return self._parse_in(this) 4358 if self._match(TokenType.ALIAS, advance=False): 4359 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4360 return self._parse_alias(this) 4361 4362 return this 4363 4364 this = self._parse_table() 4365 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4366 into = self._parse_unpivot_columns() 4367 using = self._match(TokenType.USING) and self._parse_csv( 4368 lambda: self._parse_alias(self._parse_function()) 4369 ) 4370 group = self._parse_group() 4371 4372 return self.expression( 4373 exp.Pivot, 4374 this=this, 4375 expressions=expressions, 4376 using=using, 4377 group=group, 4378 unpivot=is_unpivot, 4379 into=into, 4380 ) 4381 4382 def _parse_pivot_in(self) -> exp.In: 4383 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4384 this = self._parse_select_or_expression() 4385 4386 self._match(TokenType.ALIAS) 4387 alias = self._parse_bitwise() 4388 if alias: 4389 if isinstance(alias, exp.Column) and not alias.db: 4390 alias = alias.this 4391 return self.expression(exp.PivotAlias, this=this, alias=alias) 4392 4393 return this 4394 4395 value = self._parse_column() 4396 4397 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4398 self.raise_error("Expecting IN (") 4399 4400 if self._match(TokenType.ANY): 4401 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4402 else: 4403 exprs = self._parse_csv(_parse_aliased_expression) 4404 4405 self._match_r_paren() 4406 return self.expression(exp.In, this=value, expressions=exprs) 4407 4408 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4409 func = self._parse_function() 4410 if not func: 4411 self.raise_error("Expecting an aggregation function in PIVOT") 4412 4413 return self._parse_alias(func) 4414 4415 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4416 index = self._index 4417 include_nulls = None 4418 4419 if self._match(TokenType.PIVOT): 4420 unpivot = False 4421 elif self._match(TokenType.UNPIVOT): 4422 unpivot = True 4423 4424 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4425 if self._match_text_seq("INCLUDE", "NULLS"): 4426 include_nulls = True 4427 elif self._match_text_seq("EXCLUDE", "NULLS"): 4428 include_nulls = False 4429 else: 4430 return None 4431 4432 expressions = [] 4433 4434 if not self._match(TokenType.L_PAREN): 4435 self._retreat(index) 4436 return None 4437 4438 if unpivot: 4439 expressions = self._parse_csv(self._parse_column) 4440 else: 4441 expressions = self._parse_csv(self._parse_pivot_aggregation) 4442 4443 if not expressions: 4444 self.raise_error("Failed to parse PIVOT's aggregation list") 4445 4446 if not self._match(TokenType.FOR): 4447 self.raise_error("Expecting FOR") 4448 4449 fields = [] 4450 while True: 4451 field = self._try_parse(self._parse_pivot_in) 4452 if not field: 4453 break 4454 fields.append(field) 4455 4456 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4457 self._parse_bitwise 4458 ) 4459 4460 group = self._parse_group() 4461 4462 self._match_r_paren() 4463 4464 pivot = self.expression( 4465 exp.Pivot, 4466 expressions=expressions, 4467 fields=fields, 4468 unpivot=unpivot, 4469 include_nulls=include_nulls, 4470 default_on_null=default_on_null, 4471 group=group, 4472 ) 4473 4474 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4475 pivot.set("alias", self._parse_table_alias()) 4476 4477 if not unpivot: 4478 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4479 4480 columns: t.List[exp.Expression] = [] 4481 all_fields = [] 4482 for pivot_field in pivot.fields: 4483 pivot_field_expressions = pivot_field.expressions 4484 4485 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4486 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4487 continue 4488 4489 all_fields.append( 4490 [ 4491 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4492 for fld in pivot_field_expressions 4493 ] 4494 ) 4495 4496 if all_fields: 4497 if names: 4498 all_fields.append(names) 4499 4500 # Generate all possible combinations of the pivot columns 4501 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4502 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4503 for fld_parts_tuple in itertools.product(*all_fields): 4504 fld_parts = list(fld_parts_tuple) 4505 4506 if names and self.PREFIXED_PIVOT_COLUMNS: 4507 # Move the "name" to the front of the list 4508 fld_parts.insert(0, fld_parts.pop(-1)) 4509 4510 columns.append(exp.to_identifier("_".join(fld_parts))) 4511 4512 pivot.set("columns", columns) 4513 4514 return pivot 4515 4516 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4517 return [agg.alias for agg in aggregations if agg.alias] 4518 4519 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4520 if not skip_where_token and not self._match(TokenType.PREWHERE): 4521 return None 4522 4523 return self.expression( 4524 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4525 ) 4526 4527 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4528 if not skip_where_token and not self._match(TokenType.WHERE): 4529 return None 4530 4531 return self.expression( 4532 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4533 ) 4534 4535 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4536 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4537 return None 4538 comments = self._prev_comments 4539 4540 elements: t.Dict[str, t.Any] = defaultdict(list) 4541 4542 if self._match(TokenType.ALL): 4543 elements["all"] = True 4544 elif self._match(TokenType.DISTINCT): 4545 elements["all"] = False 4546 4547 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4548 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4549 4550 while True: 4551 index = self._index 4552 4553 elements["expressions"].extend( 4554 self._parse_csv( 4555 lambda: None 4556 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4557 else self._parse_assignment() 4558 ) 4559 ) 4560 4561 before_with_index = self._index 4562 with_prefix = self._match(TokenType.WITH) 4563 4564 if self._match(TokenType.ROLLUP): 4565 elements["rollup"].append( 4566 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4567 ) 4568 elif self._match(TokenType.CUBE): 4569 elements["cube"].append( 4570 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4571 ) 4572 elif self._match(TokenType.GROUPING_SETS): 4573 elements["grouping_sets"].append( 4574 self.expression( 4575 exp.GroupingSets, 4576 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4577 ) 4578 ) 4579 elif self._match_text_seq("TOTALS"): 4580 elements["totals"] = True # type: ignore 4581 4582 if before_with_index <= self._index <= before_with_index + 1: 4583 self._retreat(before_with_index) 4584 break 4585 4586 if index == self._index: 4587 break 4588 4589 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4590 4591 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4592 return self.expression( 4593 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4594 ) 4595 4596 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4597 if self._match(TokenType.L_PAREN): 4598 grouping_set = self._parse_csv(self._parse_column) 4599 self._match_r_paren() 4600 return self.expression(exp.Tuple, expressions=grouping_set) 4601 4602 return self._parse_column() 4603 4604 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4605 if not skip_having_token and not self._match(TokenType.HAVING): 4606 return None 4607 return self.expression( 4608 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4609 ) 4610 4611 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4612 if not self._match(TokenType.QUALIFY): 4613 return None 4614 return self.expression(exp.Qualify, this=self._parse_assignment()) 4615 4616 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4617 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4618 exp.Prior, this=self._parse_bitwise() 4619 ) 4620 connect = self._parse_assignment() 4621 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4622 return connect 4623 4624 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4625 if skip_start_token: 4626 start = None 4627 elif self._match(TokenType.START_WITH): 4628 start = self._parse_assignment() 4629 else: 4630 return None 4631 4632 self._match(TokenType.CONNECT_BY) 4633 nocycle = self._match_text_seq("NOCYCLE") 4634 connect = self._parse_connect_with_prior() 4635 4636 if not start and self._match(TokenType.START_WITH): 4637 start = self._parse_assignment() 4638 4639 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4640 4641 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4642 this = self._parse_id_var(any_token=True) 4643 if self._match(TokenType.ALIAS): 4644 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4645 return this 4646 4647 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4648 if self._match_text_seq("INTERPOLATE"): 4649 return self._parse_wrapped_csv(self._parse_name_as_expression) 4650 return None 4651 4652 def _parse_order( 4653 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4654 ) -> t.Optional[exp.Expression]: 4655 siblings = None 4656 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4657 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4658 return this 4659 4660 siblings = True 4661 4662 return self.expression( 4663 exp.Order, 4664 comments=self._prev_comments, 4665 this=this, 4666 expressions=self._parse_csv(self._parse_ordered), 4667 siblings=siblings, 4668 ) 4669 4670 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4671 if not self._match(token): 4672 return None 4673 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4674 4675 def _parse_ordered( 4676 self, parse_method: t.Optional[t.Callable] = None 4677 ) -> t.Optional[exp.Ordered]: 4678 this = parse_method() if parse_method else self._parse_assignment() 4679 if not this: 4680 return None 4681 4682 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4683 this = exp.var("ALL") 4684 4685 asc = self._match(TokenType.ASC) 4686 desc = self._match(TokenType.DESC) or (asc and False) 4687 4688 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4689 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4690 4691 nulls_first = is_nulls_first or False 4692 explicitly_null_ordered = is_nulls_first or is_nulls_last 4693 4694 if ( 4695 not explicitly_null_ordered 4696 and ( 4697 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4698 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4699 ) 4700 and self.dialect.NULL_ORDERING != "nulls_are_last" 4701 ): 4702 nulls_first = True 4703 4704 if self._match_text_seq("WITH", "FILL"): 4705 with_fill = self.expression( 4706 exp.WithFill, 4707 **{ # type: ignore 4708 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4709 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4710 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4711 "interpolate": self._parse_interpolate(), 4712 }, 4713 ) 4714 else: 4715 with_fill = None 4716 4717 return self.expression( 4718 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4719 ) 4720 4721 def _parse_limit_options(self) -> exp.LimitOptions: 4722 percent = self._match(TokenType.PERCENT) 4723 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4724 self._match_text_seq("ONLY") 4725 with_ties = self._match_text_seq("WITH", "TIES") 4726 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4727 4728 def _parse_limit( 4729 self, 4730 this: t.Optional[exp.Expression] = None, 4731 top: bool = False, 4732 skip_limit_token: bool = False, 4733 ) -> t.Optional[exp.Expression]: 4734 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4735 comments = self._prev_comments 4736 if top: 4737 limit_paren = self._match(TokenType.L_PAREN) 4738 expression = self._parse_term() if limit_paren else self._parse_number() 4739 4740 if limit_paren: 4741 self._match_r_paren() 4742 4743 limit_options = self._parse_limit_options() 4744 else: 4745 limit_options = None 4746 expression = self._parse_term() 4747 4748 if self._match(TokenType.COMMA): 4749 offset = expression 4750 expression = self._parse_term() 4751 else: 4752 offset = None 4753 4754 limit_exp = self.expression( 4755 exp.Limit, 4756 this=this, 4757 expression=expression, 4758 offset=offset, 4759 comments=comments, 4760 limit_options=limit_options, 4761 expressions=self._parse_limit_by(), 4762 ) 4763 4764 return limit_exp 4765 4766 if self._match(TokenType.FETCH): 4767 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4768 direction = self._prev.text.upper() if direction else "FIRST" 4769 4770 count = self._parse_field(tokens=self.FETCH_TOKENS) 4771 4772 return self.expression( 4773 exp.Fetch, 4774 direction=direction, 4775 count=count, 4776 limit_options=self._parse_limit_options(), 4777 ) 4778 4779 return this 4780 4781 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4782 if not self._match(TokenType.OFFSET): 4783 return this 4784 4785 count = self._parse_term() 4786 self._match_set((TokenType.ROW, TokenType.ROWS)) 4787 4788 return self.expression( 4789 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4790 ) 4791 4792 def _can_parse_limit_or_offset(self) -> bool: 4793 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4794 return False 4795 4796 index = self._index 4797 result = bool( 4798 self._try_parse(self._parse_limit, retreat=True) 4799 or self._try_parse(self._parse_offset, retreat=True) 4800 ) 4801 self._retreat(index) 4802 return result 4803 4804 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4805 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4806 4807 def _parse_locks(self) -> t.List[exp.Lock]: 4808 locks = [] 4809 while True: 4810 update, key = None, None 4811 if self._match_text_seq("FOR", "UPDATE"): 4812 update = True 4813 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4814 "LOCK", "IN", "SHARE", "MODE" 4815 ): 4816 update = False 4817 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4818 update, key = False, True 4819 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4820 update, key = True, True 4821 else: 4822 break 4823 4824 expressions = None 4825 if self._match_text_seq("OF"): 4826 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4827 4828 wait: t.Optional[bool | exp.Expression] = None 4829 if self._match_text_seq("NOWAIT"): 4830 wait = True 4831 elif self._match_text_seq("WAIT"): 4832 wait = self._parse_primary() 4833 elif self._match_text_seq("SKIP", "LOCKED"): 4834 wait = False 4835 4836 locks.append( 4837 self.expression( 4838 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4839 ) 4840 ) 4841 4842 return locks 4843 4844 def parse_set_operation( 4845 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4846 ) -> t.Optional[exp.Expression]: 4847 start = self._index 4848 _, side_token, kind_token = self._parse_join_parts() 4849 4850 side = side_token.text if side_token else None 4851 kind = kind_token.text if kind_token else None 4852 4853 if not self._match_set(self.SET_OPERATIONS): 4854 self._retreat(start) 4855 return None 4856 4857 token_type = self._prev.token_type 4858 4859 if token_type == TokenType.UNION: 4860 operation: t.Type[exp.SetOperation] = exp.Union 4861 elif token_type == TokenType.EXCEPT: 4862 operation = exp.Except 4863 else: 4864 operation = exp.Intersect 4865 4866 comments = self._prev.comments 4867 4868 if self._match(TokenType.DISTINCT): 4869 distinct: t.Optional[bool] = True 4870 elif self._match(TokenType.ALL): 4871 distinct = False 4872 else: 4873 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4874 if distinct is None: 4875 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4876 4877 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4878 "STRICT", "CORRESPONDING" 4879 ) 4880 if self._match_text_seq("CORRESPONDING"): 4881 by_name = True 4882 if not side and not kind: 4883 kind = "INNER" 4884 4885 on_column_list = None 4886 if by_name and self._match_texts(("ON", "BY")): 4887 on_column_list = self._parse_wrapped_csv(self._parse_column) 4888 4889 expression = self._parse_select( 4890 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4891 ) 4892 4893 return self.expression( 4894 operation, 4895 comments=comments, 4896 this=this, 4897 distinct=distinct, 4898 by_name=by_name, 4899 expression=expression, 4900 side=side, 4901 kind=kind, 4902 on=on_column_list, 4903 ) 4904 4905 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4906 while this: 4907 setop = self.parse_set_operation(this) 4908 if not setop: 4909 break 4910 this = setop 4911 4912 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4913 expression = this.expression 4914 4915 if expression: 4916 for arg in self.SET_OP_MODIFIERS: 4917 expr = expression.args.get(arg) 4918 if expr: 4919 this.set(arg, expr.pop()) 4920 4921 return this 4922 4923 def _parse_expression(self) -> t.Optional[exp.Expression]: 4924 return self._parse_alias(self._parse_assignment()) 4925 4926 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4927 this = self._parse_disjunction() 4928 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4929 # This allows us to parse <non-identifier token> := <expr> 4930 this = exp.column( 4931 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4932 ) 4933 4934 while self._match_set(self.ASSIGNMENT): 4935 if isinstance(this, exp.Column) and len(this.parts) == 1: 4936 this = this.this 4937 4938 this = self.expression( 4939 self.ASSIGNMENT[self._prev.token_type], 4940 this=this, 4941 comments=self._prev_comments, 4942 expression=self._parse_assignment(), 4943 ) 4944 4945 return this 4946 4947 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4948 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4949 4950 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4951 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4952 4953 def _parse_equality(self) -> t.Optional[exp.Expression]: 4954 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4955 4956 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4957 return self._parse_tokens(self._parse_range, self.COMPARISON) 4958 4959 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4960 this = this or self._parse_bitwise() 4961 negate = self._match(TokenType.NOT) 4962 4963 if self._match_set(self.RANGE_PARSERS): 4964 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4965 if not expression: 4966 return this 4967 4968 this = expression 4969 elif self._match(TokenType.ISNULL): 4970 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4971 4972 # Postgres supports ISNULL and NOTNULL for conditions. 4973 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4974 if self._match(TokenType.NOTNULL): 4975 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4976 this = self.expression(exp.Not, this=this) 4977 4978 if negate: 4979 this = self._negate_range(this) 4980 4981 if self._match(TokenType.IS): 4982 this = self._parse_is(this) 4983 4984 return this 4985 4986 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4987 if not this: 4988 return this 4989 4990 return self.expression(exp.Not, this=this) 4991 4992 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4993 index = self._index - 1 4994 negate = self._match(TokenType.NOT) 4995 4996 if self._match_text_seq("DISTINCT", "FROM"): 4997 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4998 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4999 5000 if self._match(TokenType.JSON): 5001 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5002 5003 if self._match_text_seq("WITH"): 5004 _with = True 5005 elif self._match_text_seq("WITHOUT"): 5006 _with = False 5007 else: 5008 _with = None 5009 5010 unique = self._match(TokenType.UNIQUE) 5011 self._match_text_seq("KEYS") 5012 expression: t.Optional[exp.Expression] = self.expression( 5013 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5014 ) 5015 else: 5016 expression = self._parse_primary() or self._parse_null() 5017 if not expression: 5018 self._retreat(index) 5019 return None 5020 5021 this = self.expression(exp.Is, this=this, expression=expression) 5022 return self.expression(exp.Not, this=this) if negate else this 5023 5024 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5025 unnest = self._parse_unnest(with_alias=False) 5026 if unnest: 5027 this = self.expression(exp.In, this=this, unnest=unnest) 5028 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5029 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5030 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5031 5032 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 5033 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 5034 else: 5035 this = self.expression(exp.In, this=this, expressions=expressions) 5036 5037 if matched_l_paren: 5038 self._match_r_paren(this) 5039 elif not self._match(TokenType.R_BRACKET, expression=this): 5040 self.raise_error("Expecting ]") 5041 else: 5042 this = self.expression(exp.In, this=this, field=self._parse_column()) 5043 5044 return this 5045 5046 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5047 symmetric = None 5048 if self._match_text_seq("SYMMETRIC"): 5049 symmetric = True 5050 elif self._match_text_seq("ASYMMETRIC"): 5051 symmetric = False 5052 5053 low = self._parse_bitwise() 5054 self._match(TokenType.AND) 5055 high = self._parse_bitwise() 5056 5057 return self.expression( 5058 exp.Between, 5059 this=this, 5060 low=low, 5061 high=high, 5062 symmetric=symmetric, 5063 ) 5064 5065 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5066 if not self._match(TokenType.ESCAPE): 5067 return this 5068 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5069 5070 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5071 index = self._index 5072 5073 if not self._match(TokenType.INTERVAL) and match_interval: 5074 return None 5075 5076 if self._match(TokenType.STRING, advance=False): 5077 this = self._parse_primary() 5078 else: 5079 this = self._parse_term() 5080 5081 if not this or ( 5082 isinstance(this, exp.Column) 5083 and not this.table 5084 and not this.this.quoted 5085 and this.name.upper() == "IS" 5086 ): 5087 self._retreat(index) 5088 return None 5089 5090 unit = self._parse_function() or ( 5091 not self._match(TokenType.ALIAS, advance=False) 5092 and self._parse_var(any_token=True, upper=True) 5093 ) 5094 5095 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5096 # each INTERVAL expression into this canonical form so it's easy to transpile 5097 if this and this.is_number: 5098 this = exp.Literal.string(this.to_py()) 5099 elif this and this.is_string: 5100 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5101 if parts and unit: 5102 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5103 unit = None 5104 self._retreat(self._index - 1) 5105 5106 if len(parts) == 1: 5107 this = exp.Literal.string(parts[0][0]) 5108 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5109 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5110 unit = self.expression( 5111 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5112 ) 5113 5114 interval = self.expression(exp.Interval, this=this, unit=unit) 5115 5116 index = self._index 5117 self._match(TokenType.PLUS) 5118 5119 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5120 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5121 return self.expression( 5122 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5123 ) 5124 5125 self._retreat(index) 5126 return interval 5127 5128 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5129 this = self._parse_term() 5130 5131 while True: 5132 if self._match_set(self.BITWISE): 5133 this = self.expression( 5134 self.BITWISE[self._prev.token_type], 5135 this=this, 5136 expression=self._parse_term(), 5137 ) 5138 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5139 this = self.expression( 5140 exp.DPipe, 5141 this=this, 5142 expression=self._parse_term(), 5143 safe=not self.dialect.STRICT_STRING_CONCAT, 5144 ) 5145 elif self._match(TokenType.DQMARK): 5146 this = self.expression( 5147 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5148 ) 5149 elif self._match_pair(TokenType.LT, TokenType.LT): 5150 this = self.expression( 5151 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5152 ) 5153 elif self._match_pair(TokenType.GT, TokenType.GT): 5154 this = self.expression( 5155 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5156 ) 5157 else: 5158 break 5159 5160 return this 5161 5162 def _parse_term(self) -> t.Optional[exp.Expression]: 5163 this = self._parse_factor() 5164 5165 while self._match_set(self.TERM): 5166 klass = self.TERM[self._prev.token_type] 5167 comments = self._prev_comments 5168 expression = self._parse_factor() 5169 5170 this = self.expression(klass, this=this, comments=comments, expression=expression) 5171 5172 if isinstance(this, exp.Collate): 5173 expr = this.expression 5174 5175 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5176 # fallback to Identifier / Var 5177 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5178 ident = expr.this 5179 if isinstance(ident, exp.Identifier): 5180 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5181 5182 return this 5183 5184 def _parse_factor(self) -> t.Optional[exp.Expression]: 5185 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5186 this = parse_method() 5187 5188 while self._match_set(self.FACTOR): 5189 klass = self.FACTOR[self._prev.token_type] 5190 comments = self._prev_comments 5191 expression = parse_method() 5192 5193 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5194 self._retreat(self._index - 1) 5195 return this 5196 5197 this = self.expression(klass, this=this, comments=comments, expression=expression) 5198 5199 if isinstance(this, exp.Div): 5200 this.args["typed"] = self.dialect.TYPED_DIVISION 5201 this.args["safe"] = self.dialect.SAFE_DIVISION 5202 5203 return this 5204 5205 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5206 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5207 5208 def _parse_unary(self) -> t.Optional[exp.Expression]: 5209 if self._match_set(self.UNARY_PARSERS): 5210 return self.UNARY_PARSERS[self._prev.token_type](self) 5211 return self._parse_at_time_zone(self._parse_type()) 5212 5213 def _parse_type( 5214 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5215 ) -> t.Optional[exp.Expression]: 5216 interval = parse_interval and self._parse_interval() 5217 if interval: 5218 return interval 5219 5220 index = self._index 5221 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5222 5223 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5224 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5225 if isinstance(data_type, exp.Cast): 5226 # This constructor can contain ops directly after it, for instance struct unnesting: 5227 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5228 return self._parse_column_ops(data_type) 5229 5230 if data_type: 5231 index2 = self._index 5232 this = self._parse_primary() 5233 5234 if isinstance(this, exp.Literal): 5235 literal = this.name 5236 this = self._parse_column_ops(this) 5237 5238 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5239 if parser: 5240 return parser(self, this, data_type) 5241 5242 if ( 5243 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5244 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5245 and TIME_ZONE_RE.search(literal) 5246 ): 5247 data_type = exp.DataType.build("TIMESTAMPTZ") 5248 5249 return self.expression(exp.Cast, this=this, to=data_type) 5250 5251 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5252 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5253 # 5254 # If the index difference here is greater than 1, that means the parser itself must have 5255 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5256 # 5257 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5258 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5259 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5260 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5261 # 5262 # In these cases, we don't really want to return the converted type, but instead retreat 5263 # and try to parse a Column or Identifier in the section below. 5264 if data_type.expressions and index2 - index > 1: 5265 self._retreat(index2) 5266 return self._parse_column_ops(data_type) 5267 5268 self._retreat(index) 5269 5270 if fallback_to_identifier: 5271 return self._parse_id_var() 5272 5273 this = self._parse_column() 5274 return this and self._parse_column_ops(this) 5275 5276 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5277 this = self._parse_type() 5278 if not this: 5279 return None 5280 5281 if isinstance(this, exp.Column) and not this.table: 5282 this = exp.var(this.name.upper()) 5283 5284 return self.expression( 5285 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5286 ) 5287 5288 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5289 type_name = identifier.name 5290 5291 while self._match(TokenType.DOT): 5292 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5293 5294 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5295 5296 def _parse_types( 5297 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5298 ) -> t.Optional[exp.Expression]: 5299 index = self._index 5300 5301 this: t.Optional[exp.Expression] = None 5302 prefix = self._match_text_seq("SYSUDTLIB", ".") 5303 5304 if self._match_set(self.TYPE_TOKENS): 5305 type_token = self._prev.token_type 5306 else: 5307 type_token = None 5308 identifier = allow_identifiers and self._parse_id_var( 5309 any_token=False, tokens=(TokenType.VAR,) 5310 ) 5311 if isinstance(identifier, exp.Identifier): 5312 try: 5313 tokens = self.dialect.tokenize(identifier.name) 5314 except TokenError: 5315 tokens = None 5316 5317 if tokens and len(tokens) == 1 and tokens[0].token_type in self.TYPE_TOKENS: 5318 type_token = tokens[0].token_type 5319 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5320 this = self._parse_user_defined_type(identifier) 5321 else: 5322 self._retreat(self._index - 1) 5323 return None 5324 else: 5325 return None 5326 5327 if type_token == TokenType.PSEUDO_TYPE: 5328 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5329 5330 if type_token == TokenType.OBJECT_IDENTIFIER: 5331 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5332 5333 # https://materialize.com/docs/sql/types/map/ 5334 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5335 key_type = self._parse_types( 5336 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5337 ) 5338 if not self._match(TokenType.FARROW): 5339 self._retreat(index) 5340 return None 5341 5342 value_type = self._parse_types( 5343 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5344 ) 5345 if not self._match(TokenType.R_BRACKET): 5346 self._retreat(index) 5347 return None 5348 5349 return exp.DataType( 5350 this=exp.DataType.Type.MAP, 5351 expressions=[key_type, value_type], 5352 nested=True, 5353 prefix=prefix, 5354 ) 5355 5356 nested = type_token in self.NESTED_TYPE_TOKENS 5357 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5358 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5359 expressions = None 5360 maybe_func = False 5361 5362 if self._match(TokenType.L_PAREN): 5363 if is_struct: 5364 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5365 elif nested: 5366 expressions = self._parse_csv( 5367 lambda: self._parse_types( 5368 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5369 ) 5370 ) 5371 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5372 this = expressions[0] 5373 this.set("nullable", True) 5374 self._match_r_paren() 5375 return this 5376 elif type_token in self.ENUM_TYPE_TOKENS: 5377 expressions = self._parse_csv(self._parse_equality) 5378 elif is_aggregate: 5379 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5380 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5381 ) 5382 if not func_or_ident: 5383 return None 5384 expressions = [func_or_ident] 5385 if self._match(TokenType.COMMA): 5386 expressions.extend( 5387 self._parse_csv( 5388 lambda: self._parse_types( 5389 check_func=check_func, 5390 schema=schema, 5391 allow_identifiers=allow_identifiers, 5392 ) 5393 ) 5394 ) 5395 else: 5396 expressions = self._parse_csv(self._parse_type_size) 5397 5398 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5399 if type_token == TokenType.VECTOR and len(expressions) == 2: 5400 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5401 5402 if not self._match(TokenType.R_PAREN): 5403 self._retreat(index) 5404 return None 5405 5406 maybe_func = True 5407 5408 values: t.Optional[t.List[exp.Expression]] = None 5409 5410 if nested and self._match(TokenType.LT): 5411 if is_struct: 5412 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5413 else: 5414 expressions = self._parse_csv( 5415 lambda: self._parse_types( 5416 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5417 ) 5418 ) 5419 5420 if not self._match(TokenType.GT): 5421 self.raise_error("Expecting >") 5422 5423 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5424 values = self._parse_csv(self._parse_assignment) 5425 if not values and is_struct: 5426 values = None 5427 self._retreat(self._index - 1) 5428 else: 5429 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5430 5431 if type_token in self.TIMESTAMPS: 5432 if self._match_text_seq("WITH", "TIME", "ZONE"): 5433 maybe_func = False 5434 tz_type = ( 5435 exp.DataType.Type.TIMETZ 5436 if type_token in self.TIMES 5437 else exp.DataType.Type.TIMESTAMPTZ 5438 ) 5439 this = exp.DataType(this=tz_type, expressions=expressions) 5440 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5441 maybe_func = False 5442 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5443 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5444 maybe_func = False 5445 elif type_token == TokenType.INTERVAL: 5446 unit = self._parse_var(upper=True) 5447 if unit: 5448 if self._match_text_seq("TO"): 5449 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5450 5451 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5452 else: 5453 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5454 elif type_token == TokenType.VOID: 5455 this = exp.DataType(this=exp.DataType.Type.NULL) 5456 5457 if maybe_func and check_func: 5458 index2 = self._index 5459 peek = self._parse_string() 5460 5461 if not peek: 5462 self._retreat(index) 5463 return None 5464 5465 self._retreat(index2) 5466 5467 if not this: 5468 if self._match_text_seq("UNSIGNED"): 5469 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5470 if not unsigned_type_token: 5471 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5472 5473 type_token = unsigned_type_token or type_token 5474 5475 this = exp.DataType( 5476 this=exp.DataType.Type[type_token.value], 5477 expressions=expressions, 5478 nested=nested, 5479 prefix=prefix, 5480 ) 5481 5482 # Empty arrays/structs are allowed 5483 if values is not None: 5484 cls = exp.Struct if is_struct else exp.Array 5485 this = exp.cast(cls(expressions=values), this, copy=False) 5486 5487 elif expressions: 5488 this.set("expressions", expressions) 5489 5490 # https://materialize.com/docs/sql/types/list/#type-name 5491 while self._match(TokenType.LIST): 5492 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5493 5494 index = self._index 5495 5496 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5497 matched_array = self._match(TokenType.ARRAY) 5498 5499 while self._curr: 5500 datatype_token = self._prev.token_type 5501 matched_l_bracket = self._match(TokenType.L_BRACKET) 5502 5503 if (not matched_l_bracket and not matched_array) or ( 5504 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5505 ): 5506 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5507 # not to be confused with the fixed size array parsing 5508 break 5509 5510 matched_array = False 5511 values = self._parse_csv(self._parse_assignment) or None 5512 if ( 5513 values 5514 and not schema 5515 and ( 5516 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5517 ) 5518 ): 5519 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5520 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5521 self._retreat(index) 5522 break 5523 5524 this = exp.DataType( 5525 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5526 ) 5527 self._match(TokenType.R_BRACKET) 5528 5529 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5530 converter = self.TYPE_CONVERTERS.get(this.this) 5531 if converter: 5532 this = converter(t.cast(exp.DataType, this)) 5533 5534 return this 5535 5536 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5537 index = self._index 5538 5539 if ( 5540 self._curr 5541 and self._next 5542 and self._curr.token_type in self.TYPE_TOKENS 5543 and self._next.token_type in self.TYPE_TOKENS 5544 ): 5545 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5546 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5547 this = self._parse_id_var() 5548 else: 5549 this = ( 5550 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5551 or self._parse_id_var() 5552 ) 5553 5554 self._match(TokenType.COLON) 5555 5556 if ( 5557 type_required 5558 and not isinstance(this, exp.DataType) 5559 and not self._match_set(self.TYPE_TOKENS, advance=False) 5560 ): 5561 self._retreat(index) 5562 return self._parse_types() 5563 5564 return self._parse_column_def(this) 5565 5566 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5567 if not self._match_text_seq("AT", "TIME", "ZONE"): 5568 return this 5569 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5570 5571 def _parse_column(self) -> t.Optional[exp.Expression]: 5572 this = self._parse_column_reference() 5573 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5574 5575 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5576 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5577 5578 return column 5579 5580 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5581 this = self._parse_field() 5582 if ( 5583 not this 5584 and self._match(TokenType.VALUES, advance=False) 5585 and self.VALUES_FOLLOWED_BY_PAREN 5586 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5587 ): 5588 this = self._parse_id_var() 5589 5590 if isinstance(this, exp.Identifier): 5591 # We bubble up comments from the Identifier to the Column 5592 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5593 5594 return this 5595 5596 def _parse_colon_as_variant_extract( 5597 self, this: t.Optional[exp.Expression] 5598 ) -> t.Optional[exp.Expression]: 5599 casts = [] 5600 json_path = [] 5601 escape = None 5602 5603 while self._match(TokenType.COLON): 5604 start_index = self._index 5605 5606 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5607 path = self._parse_column_ops( 5608 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5609 ) 5610 5611 # The cast :: operator has a lower precedence than the extraction operator :, so 5612 # we rearrange the AST appropriately to avoid casting the JSON path 5613 while isinstance(path, exp.Cast): 5614 casts.append(path.to) 5615 path = path.this 5616 5617 if casts: 5618 dcolon_offset = next( 5619 i 5620 for i, t in enumerate(self._tokens[start_index:]) 5621 if t.token_type == TokenType.DCOLON 5622 ) 5623 end_token = self._tokens[start_index + dcolon_offset - 1] 5624 else: 5625 end_token = self._prev 5626 5627 if path: 5628 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5629 # it'll roundtrip to a string literal in GET_PATH 5630 if isinstance(path, exp.Identifier) and path.quoted: 5631 escape = True 5632 5633 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5634 5635 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5636 # Databricks transforms it back to the colon/dot notation 5637 if json_path: 5638 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5639 5640 if json_path_expr: 5641 json_path_expr.set("escape", escape) 5642 5643 this = self.expression( 5644 exp.JSONExtract, 5645 this=this, 5646 expression=json_path_expr, 5647 variant_extract=True, 5648 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5649 ) 5650 5651 while casts: 5652 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5653 5654 return this 5655 5656 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5657 return self._parse_types() 5658 5659 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5660 this = self._parse_bracket(this) 5661 5662 while self._match_set(self.COLUMN_OPERATORS): 5663 op_token = self._prev.token_type 5664 op = self.COLUMN_OPERATORS.get(op_token) 5665 5666 if op_token in self.CAST_COLUMN_OPERATORS: 5667 field = self._parse_dcolon() 5668 if not field: 5669 self.raise_error("Expected type") 5670 elif op and self._curr: 5671 field = self._parse_column_reference() or self._parse_bracket() 5672 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5673 field = self._parse_column_ops(field) 5674 else: 5675 field = self._parse_field(any_token=True, anonymous_func=True) 5676 5677 # Function calls can be qualified, e.g., x.y.FOO() 5678 # This converts the final AST to a series of Dots leading to the function call 5679 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5680 if isinstance(field, (exp.Func, exp.Window)) and this: 5681 this = this.transform( 5682 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5683 ) 5684 5685 if op: 5686 this = op(self, this, field) 5687 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5688 this = self.expression( 5689 exp.Column, 5690 comments=this.comments, 5691 this=field, 5692 table=this.this, 5693 db=this.args.get("table"), 5694 catalog=this.args.get("db"), 5695 ) 5696 elif isinstance(field, exp.Window): 5697 # Move the exp.Dot's to the window's function 5698 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5699 field.set("this", window_func) 5700 this = field 5701 else: 5702 this = self.expression(exp.Dot, this=this, expression=field) 5703 5704 if field and field.comments: 5705 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5706 5707 this = self._parse_bracket(this) 5708 5709 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5710 5711 def _parse_paren(self) -> t.Optional[exp.Expression]: 5712 if not self._match(TokenType.L_PAREN): 5713 return None 5714 5715 comments = self._prev_comments 5716 query = self._parse_select() 5717 5718 if query: 5719 expressions = [query] 5720 else: 5721 expressions = self._parse_expressions() 5722 5723 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5724 5725 if not this and self._match(TokenType.R_PAREN, advance=False): 5726 this = self.expression(exp.Tuple) 5727 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5728 this = self._parse_subquery(this=this, parse_alias=False) 5729 elif isinstance(this, exp.Subquery): 5730 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5731 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5732 this = self.expression(exp.Tuple, expressions=expressions) 5733 else: 5734 this = self.expression(exp.Paren, this=this) 5735 5736 if this: 5737 this.add_comments(comments) 5738 5739 self._match_r_paren(expression=this) 5740 return this 5741 5742 def _parse_primary(self) -> t.Optional[exp.Expression]: 5743 if self._match_set(self.PRIMARY_PARSERS): 5744 token_type = self._prev.token_type 5745 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5746 5747 if token_type == TokenType.STRING: 5748 expressions = [primary] 5749 while self._match(TokenType.STRING): 5750 expressions.append(exp.Literal.string(self._prev.text)) 5751 5752 if len(expressions) > 1: 5753 return self.expression(exp.Concat, expressions=expressions) 5754 5755 return primary 5756 5757 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5758 return exp.Literal.number(f"0.{self._prev.text}") 5759 5760 return self._parse_paren() 5761 5762 def _parse_field( 5763 self, 5764 any_token: bool = False, 5765 tokens: t.Optional[t.Collection[TokenType]] = None, 5766 anonymous_func: bool = False, 5767 ) -> t.Optional[exp.Expression]: 5768 if anonymous_func: 5769 field = ( 5770 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5771 or self._parse_primary() 5772 ) 5773 else: 5774 field = self._parse_primary() or self._parse_function( 5775 anonymous=anonymous_func, any_token=any_token 5776 ) 5777 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5778 5779 def _parse_function( 5780 self, 5781 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5782 anonymous: bool = False, 5783 optional_parens: bool = True, 5784 any_token: bool = False, 5785 ) -> t.Optional[exp.Expression]: 5786 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5787 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5788 fn_syntax = False 5789 if ( 5790 self._match(TokenType.L_BRACE, advance=False) 5791 and self._next 5792 and self._next.text.upper() == "FN" 5793 ): 5794 self._advance(2) 5795 fn_syntax = True 5796 5797 func = self._parse_function_call( 5798 functions=functions, 5799 anonymous=anonymous, 5800 optional_parens=optional_parens, 5801 any_token=any_token, 5802 ) 5803 5804 if fn_syntax: 5805 self._match(TokenType.R_BRACE) 5806 5807 return func 5808 5809 def _parse_function_call( 5810 self, 5811 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5812 anonymous: bool = False, 5813 optional_parens: bool = True, 5814 any_token: bool = False, 5815 ) -> t.Optional[exp.Expression]: 5816 if not self._curr: 5817 return None 5818 5819 comments = self._curr.comments 5820 prev = self._prev 5821 token = self._curr 5822 token_type = self._curr.token_type 5823 this = self._curr.text 5824 upper = this.upper() 5825 5826 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5827 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5828 self._advance() 5829 return self._parse_window(parser(self)) 5830 5831 if not self._next or self._next.token_type != TokenType.L_PAREN: 5832 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5833 self._advance() 5834 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5835 5836 return None 5837 5838 if any_token: 5839 if token_type in self.RESERVED_TOKENS: 5840 return None 5841 elif token_type not in self.FUNC_TOKENS: 5842 return None 5843 5844 self._advance(2) 5845 5846 parser = self.FUNCTION_PARSERS.get(upper) 5847 if parser and not anonymous: 5848 this = parser(self) 5849 else: 5850 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5851 5852 if subquery_predicate: 5853 expr = None 5854 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5855 expr = self._parse_select() 5856 self._match_r_paren() 5857 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5858 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5859 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5860 self._advance(-1) 5861 expr = self._parse_bitwise() 5862 5863 if expr: 5864 return self.expression(subquery_predicate, comments=comments, this=expr) 5865 5866 if functions is None: 5867 functions = self.FUNCTIONS 5868 5869 function = functions.get(upper) 5870 known_function = function and not anonymous 5871 5872 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5873 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5874 5875 post_func_comments = self._curr and self._curr.comments 5876 if known_function and post_func_comments: 5877 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5878 # call we'll construct it as exp.Anonymous, even if it's "known" 5879 if any( 5880 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5881 for comment in post_func_comments 5882 ): 5883 known_function = False 5884 5885 if alias and known_function: 5886 args = self._kv_to_prop_eq(args) 5887 5888 if known_function: 5889 func_builder = t.cast(t.Callable, function) 5890 5891 if "dialect" in func_builder.__code__.co_varnames: 5892 func = func_builder(args, dialect=self.dialect) 5893 else: 5894 func = func_builder(args) 5895 5896 func = self.validate_expression(func, args) 5897 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5898 func.meta["name"] = this 5899 5900 this = func 5901 else: 5902 if token_type == TokenType.IDENTIFIER: 5903 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5904 5905 this = self.expression(exp.Anonymous, this=this, expressions=args) 5906 this = this.update_positions(token) 5907 5908 if isinstance(this, exp.Expression): 5909 this.add_comments(comments) 5910 5911 self._match_r_paren(this) 5912 return self._parse_window(this) 5913 5914 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5915 return expression 5916 5917 def _kv_to_prop_eq( 5918 self, expressions: t.List[exp.Expression], parse_map: bool = False 5919 ) -> t.List[exp.Expression]: 5920 transformed = [] 5921 5922 for index, e in enumerate(expressions): 5923 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5924 if isinstance(e, exp.Alias): 5925 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5926 5927 if not isinstance(e, exp.PropertyEQ): 5928 e = self.expression( 5929 exp.PropertyEQ, 5930 this=e.this if parse_map else exp.to_identifier(e.this.name), 5931 expression=e.expression, 5932 ) 5933 5934 if isinstance(e.this, exp.Column): 5935 e.this.replace(e.this.this) 5936 else: 5937 e = self._to_prop_eq(e, index) 5938 5939 transformed.append(e) 5940 5941 return transformed 5942 5943 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5944 return self._parse_statement() 5945 5946 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5947 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5948 5949 def _parse_user_defined_function( 5950 self, kind: t.Optional[TokenType] = None 5951 ) -> t.Optional[exp.Expression]: 5952 this = self._parse_table_parts(schema=True) 5953 5954 if not self._match(TokenType.L_PAREN): 5955 return this 5956 5957 expressions = self._parse_csv(self._parse_function_parameter) 5958 self._match_r_paren() 5959 return self.expression( 5960 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5961 ) 5962 5963 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5964 literal = self._parse_primary() 5965 if literal: 5966 return self.expression(exp.Introducer, this=token.text, expression=literal) 5967 5968 return self._identifier_expression(token) 5969 5970 def _parse_session_parameter(self) -> exp.SessionParameter: 5971 kind = None 5972 this = self._parse_id_var() or self._parse_primary() 5973 5974 if this and self._match(TokenType.DOT): 5975 kind = this.name 5976 this = self._parse_var() or self._parse_primary() 5977 5978 return self.expression(exp.SessionParameter, this=this, kind=kind) 5979 5980 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5981 return self._parse_id_var() 5982 5983 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5984 index = self._index 5985 5986 if self._match(TokenType.L_PAREN): 5987 expressions = t.cast( 5988 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5989 ) 5990 5991 if not self._match(TokenType.R_PAREN): 5992 self._retreat(index) 5993 else: 5994 expressions = [self._parse_lambda_arg()] 5995 5996 if self._match_set(self.LAMBDAS): 5997 return self.LAMBDAS[self._prev.token_type](self, expressions) 5998 5999 self._retreat(index) 6000 6001 this: t.Optional[exp.Expression] 6002 6003 if self._match(TokenType.DISTINCT): 6004 this = self.expression( 6005 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 6006 ) 6007 else: 6008 this = self._parse_select_or_expression(alias=alias) 6009 6010 return self._parse_limit( 6011 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6012 ) 6013 6014 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6015 index = self._index 6016 if not self._match(TokenType.L_PAREN): 6017 return this 6018 6019 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6020 # expr can be of both types 6021 if self._match_set(self.SELECT_START_TOKENS): 6022 self._retreat(index) 6023 return this 6024 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6025 self._match_r_paren() 6026 return self.expression(exp.Schema, this=this, expressions=args) 6027 6028 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6029 return self._parse_column_def(self._parse_field(any_token=True)) 6030 6031 def _parse_column_def( 6032 self, this: t.Optional[exp.Expression], computed_column: bool = True 6033 ) -> t.Optional[exp.Expression]: 6034 # column defs are not really columns, they're identifiers 6035 if isinstance(this, exp.Column): 6036 this = this.this 6037 6038 if not computed_column: 6039 self._match(TokenType.ALIAS) 6040 6041 kind = self._parse_types(schema=True) 6042 6043 if self._match_text_seq("FOR", "ORDINALITY"): 6044 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6045 6046 constraints: t.List[exp.Expression] = [] 6047 6048 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6049 ("ALIAS", "MATERIALIZED") 6050 ): 6051 persisted = self._prev.text.upper() == "MATERIALIZED" 6052 constraint_kind = exp.ComputedColumnConstraint( 6053 this=self._parse_assignment(), 6054 persisted=persisted or self._match_text_seq("PERSISTED"), 6055 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6056 ) 6057 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6058 elif ( 6059 kind 6060 and self._match(TokenType.ALIAS, advance=False) 6061 and ( 6062 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6063 or (self._next and self._next.token_type == TokenType.L_PAREN) 6064 ) 6065 ): 6066 self._advance() 6067 constraints.append( 6068 self.expression( 6069 exp.ColumnConstraint, 6070 kind=exp.ComputedColumnConstraint( 6071 this=self._parse_disjunction(), 6072 persisted=self._match_texts(("STORED", "VIRTUAL")) 6073 and self._prev.text.upper() == "STORED", 6074 ), 6075 ) 6076 ) 6077 6078 while True: 6079 constraint = self._parse_column_constraint() 6080 if not constraint: 6081 break 6082 constraints.append(constraint) 6083 6084 if not kind and not constraints: 6085 return this 6086 6087 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6088 6089 def _parse_auto_increment( 6090 self, 6091 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6092 start = None 6093 increment = None 6094 order = None 6095 6096 if self._match(TokenType.L_PAREN, advance=False): 6097 args = self._parse_wrapped_csv(self._parse_bitwise) 6098 start = seq_get(args, 0) 6099 increment = seq_get(args, 1) 6100 elif self._match_text_seq("START"): 6101 start = self._parse_bitwise() 6102 self._match_text_seq("INCREMENT") 6103 increment = self._parse_bitwise() 6104 if self._match_text_seq("ORDER"): 6105 order = True 6106 elif self._match_text_seq("NOORDER"): 6107 order = False 6108 6109 if start and increment: 6110 return exp.GeneratedAsIdentityColumnConstraint( 6111 start=start, increment=increment, this=False, order=order 6112 ) 6113 6114 return exp.AutoIncrementColumnConstraint() 6115 6116 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6117 if not self._match_text_seq("REFRESH"): 6118 self._retreat(self._index - 1) 6119 return None 6120 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6121 6122 def _parse_compress(self) -> exp.CompressColumnConstraint: 6123 if self._match(TokenType.L_PAREN, advance=False): 6124 return self.expression( 6125 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6126 ) 6127 6128 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6129 6130 def _parse_generated_as_identity( 6131 self, 6132 ) -> ( 6133 exp.GeneratedAsIdentityColumnConstraint 6134 | exp.ComputedColumnConstraint 6135 | exp.GeneratedAsRowColumnConstraint 6136 ): 6137 if self._match_text_seq("BY", "DEFAULT"): 6138 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6139 this = self.expression( 6140 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6141 ) 6142 else: 6143 self._match_text_seq("ALWAYS") 6144 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6145 6146 self._match(TokenType.ALIAS) 6147 6148 if self._match_text_seq("ROW"): 6149 start = self._match_text_seq("START") 6150 if not start: 6151 self._match(TokenType.END) 6152 hidden = self._match_text_seq("HIDDEN") 6153 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6154 6155 identity = self._match_text_seq("IDENTITY") 6156 6157 if self._match(TokenType.L_PAREN): 6158 if self._match(TokenType.START_WITH): 6159 this.set("start", self._parse_bitwise()) 6160 if self._match_text_seq("INCREMENT", "BY"): 6161 this.set("increment", self._parse_bitwise()) 6162 if self._match_text_seq("MINVALUE"): 6163 this.set("minvalue", self._parse_bitwise()) 6164 if self._match_text_seq("MAXVALUE"): 6165 this.set("maxvalue", self._parse_bitwise()) 6166 6167 if self._match_text_seq("CYCLE"): 6168 this.set("cycle", True) 6169 elif self._match_text_seq("NO", "CYCLE"): 6170 this.set("cycle", False) 6171 6172 if not identity: 6173 this.set("expression", self._parse_range()) 6174 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6175 args = self._parse_csv(self._parse_bitwise) 6176 this.set("start", seq_get(args, 0)) 6177 this.set("increment", seq_get(args, 1)) 6178 6179 self._match_r_paren() 6180 6181 return this 6182 6183 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6184 self._match_text_seq("LENGTH") 6185 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6186 6187 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6188 if self._match_text_seq("NULL"): 6189 return self.expression(exp.NotNullColumnConstraint) 6190 if self._match_text_seq("CASESPECIFIC"): 6191 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6192 if self._match_text_seq("FOR", "REPLICATION"): 6193 return self.expression(exp.NotForReplicationColumnConstraint) 6194 6195 # Unconsume the `NOT` token 6196 self._retreat(self._index - 1) 6197 return None 6198 6199 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6200 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6201 6202 procedure_option_follows = ( 6203 self._match(TokenType.WITH, advance=False) 6204 and self._next 6205 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6206 ) 6207 6208 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6209 return self.expression( 6210 exp.ColumnConstraint, 6211 this=this, 6212 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6213 ) 6214 6215 return this 6216 6217 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6218 if not self._match(TokenType.CONSTRAINT): 6219 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6220 6221 return self.expression( 6222 exp.Constraint, 6223 this=self._parse_id_var(), 6224 expressions=self._parse_unnamed_constraints(), 6225 ) 6226 6227 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6228 constraints = [] 6229 while True: 6230 constraint = self._parse_unnamed_constraint() or self._parse_function() 6231 if not constraint: 6232 break 6233 constraints.append(constraint) 6234 6235 return constraints 6236 6237 def _parse_unnamed_constraint( 6238 self, constraints: t.Optional[t.Collection[str]] = None 6239 ) -> t.Optional[exp.Expression]: 6240 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6241 constraints or self.CONSTRAINT_PARSERS 6242 ): 6243 return None 6244 6245 constraint = self._prev.text.upper() 6246 if constraint not in self.CONSTRAINT_PARSERS: 6247 self.raise_error(f"No parser found for schema constraint {constraint}.") 6248 6249 return self.CONSTRAINT_PARSERS[constraint](self) 6250 6251 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6252 return self._parse_id_var(any_token=False) 6253 6254 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6255 self._match_texts(("KEY", "INDEX")) 6256 return self.expression( 6257 exp.UniqueColumnConstraint, 6258 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6259 this=self._parse_schema(self._parse_unique_key()), 6260 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6261 on_conflict=self._parse_on_conflict(), 6262 options=self._parse_key_constraint_options(), 6263 ) 6264 6265 def _parse_key_constraint_options(self) -> t.List[str]: 6266 options = [] 6267 while True: 6268 if not self._curr: 6269 break 6270 6271 if self._match(TokenType.ON): 6272 action = None 6273 on = self._advance_any() and self._prev.text 6274 6275 if self._match_text_seq("NO", "ACTION"): 6276 action = "NO ACTION" 6277 elif self._match_text_seq("CASCADE"): 6278 action = "CASCADE" 6279 elif self._match_text_seq("RESTRICT"): 6280 action = "RESTRICT" 6281 elif self._match_pair(TokenType.SET, TokenType.NULL): 6282 action = "SET NULL" 6283 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6284 action = "SET DEFAULT" 6285 else: 6286 self.raise_error("Invalid key constraint") 6287 6288 options.append(f"ON {on} {action}") 6289 else: 6290 var = self._parse_var_from_options( 6291 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6292 ) 6293 if not var: 6294 break 6295 options.append(var.name) 6296 6297 return options 6298 6299 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6300 if match and not self._match(TokenType.REFERENCES): 6301 return None 6302 6303 expressions = None 6304 this = self._parse_table(schema=True) 6305 options = self._parse_key_constraint_options() 6306 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6307 6308 def _parse_foreign_key(self) -> exp.ForeignKey: 6309 expressions = ( 6310 self._parse_wrapped_id_vars() 6311 if not self._match(TokenType.REFERENCES, advance=False) 6312 else None 6313 ) 6314 reference = self._parse_references() 6315 on_options = {} 6316 6317 while self._match(TokenType.ON): 6318 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6319 self.raise_error("Expected DELETE or UPDATE") 6320 6321 kind = self._prev.text.lower() 6322 6323 if self._match_text_seq("NO", "ACTION"): 6324 action = "NO ACTION" 6325 elif self._match(TokenType.SET): 6326 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6327 action = "SET " + self._prev.text.upper() 6328 else: 6329 self._advance() 6330 action = self._prev.text.upper() 6331 6332 on_options[kind] = action 6333 6334 return self.expression( 6335 exp.ForeignKey, 6336 expressions=expressions, 6337 reference=reference, 6338 options=self._parse_key_constraint_options(), 6339 **on_options, # type: ignore 6340 ) 6341 6342 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6343 return self._parse_ordered() or self._parse_field() 6344 6345 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6346 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6347 self._retreat(self._index - 1) 6348 return None 6349 6350 id_vars = self._parse_wrapped_id_vars() 6351 return self.expression( 6352 exp.PeriodForSystemTimeConstraint, 6353 this=seq_get(id_vars, 0), 6354 expression=seq_get(id_vars, 1), 6355 ) 6356 6357 def _parse_primary_key( 6358 self, wrapped_optional: bool = False, in_props: bool = False 6359 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6360 desc = ( 6361 self._match_set((TokenType.ASC, TokenType.DESC)) 6362 and self._prev.token_type == TokenType.DESC 6363 ) 6364 6365 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6366 return self.expression( 6367 exp.PrimaryKeyColumnConstraint, 6368 desc=desc, 6369 options=self._parse_key_constraint_options(), 6370 ) 6371 6372 expressions = self._parse_wrapped_csv( 6373 self._parse_primary_key_part, optional=wrapped_optional 6374 ) 6375 6376 return self.expression( 6377 exp.PrimaryKey, 6378 expressions=expressions, 6379 include=self._parse_index_params(), 6380 options=self._parse_key_constraint_options(), 6381 ) 6382 6383 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6384 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6385 6386 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6387 """ 6388 Parses a datetime column in ODBC format. We parse the column into the corresponding 6389 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6390 same as we did for `DATE('yyyy-mm-dd')`. 6391 6392 Reference: 6393 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6394 """ 6395 self._match(TokenType.VAR) 6396 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6397 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6398 if not self._match(TokenType.R_BRACE): 6399 self.raise_error("Expected }") 6400 return expression 6401 6402 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6403 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6404 return this 6405 6406 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6407 map_token = seq_get(self._tokens, self._index - 2) 6408 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6409 else: 6410 parse_map = False 6411 6412 bracket_kind = self._prev.token_type 6413 if ( 6414 bracket_kind == TokenType.L_BRACE 6415 and self._curr 6416 and self._curr.token_type == TokenType.VAR 6417 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6418 ): 6419 return self._parse_odbc_datetime_literal() 6420 6421 expressions = self._parse_csv( 6422 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6423 ) 6424 6425 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6426 self.raise_error("Expected ]") 6427 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6428 self.raise_error("Expected }") 6429 6430 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6431 if bracket_kind == TokenType.L_BRACE: 6432 this = self.expression( 6433 exp.Struct, 6434 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6435 ) 6436 elif not this: 6437 this = build_array_constructor( 6438 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6439 ) 6440 else: 6441 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6442 if constructor_type: 6443 return build_array_constructor( 6444 constructor_type, 6445 args=expressions, 6446 bracket_kind=bracket_kind, 6447 dialect=self.dialect, 6448 ) 6449 6450 expressions = apply_index_offset( 6451 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6452 ) 6453 this = self.expression( 6454 exp.Bracket, 6455 this=this, 6456 expressions=expressions, 6457 comments=this.pop_comments(), 6458 ) 6459 6460 self._add_comments(this) 6461 return self._parse_bracket(this) 6462 6463 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6464 if self._match(TokenType.COLON): 6465 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6466 return this 6467 6468 def _parse_case(self) -> t.Optional[exp.Expression]: 6469 if self._match(TokenType.DOT, advance=False): 6470 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 6471 self._retreat(self._index - 1) 6472 return None 6473 6474 ifs = [] 6475 default = None 6476 6477 comments = self._prev_comments 6478 expression = self._parse_assignment() 6479 6480 while self._match(TokenType.WHEN): 6481 this = self._parse_assignment() 6482 self._match(TokenType.THEN) 6483 then = self._parse_assignment() 6484 ifs.append(self.expression(exp.If, this=this, true=then)) 6485 6486 if self._match(TokenType.ELSE): 6487 default = self._parse_assignment() 6488 6489 if not self._match(TokenType.END): 6490 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6491 default = exp.column("interval") 6492 else: 6493 self.raise_error("Expected END after CASE", self._prev) 6494 6495 return self.expression( 6496 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6497 ) 6498 6499 def _parse_if(self) -> t.Optional[exp.Expression]: 6500 if self._match(TokenType.L_PAREN): 6501 args = self._parse_csv( 6502 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6503 ) 6504 this = self.validate_expression(exp.If.from_arg_list(args), args) 6505 self._match_r_paren() 6506 else: 6507 index = self._index - 1 6508 6509 if self.NO_PAREN_IF_COMMANDS and index == 0: 6510 return self._parse_as_command(self._prev) 6511 6512 condition = self._parse_assignment() 6513 6514 if not condition: 6515 self._retreat(index) 6516 return None 6517 6518 self._match(TokenType.THEN) 6519 true = self._parse_assignment() 6520 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6521 self._match(TokenType.END) 6522 this = self.expression(exp.If, this=condition, true=true, false=false) 6523 6524 return this 6525 6526 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6527 if not self._match_text_seq("VALUE", "FOR"): 6528 self._retreat(self._index - 1) 6529 return None 6530 6531 return self.expression( 6532 exp.NextValueFor, 6533 this=self._parse_column(), 6534 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6535 ) 6536 6537 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6538 this = self._parse_function() or self._parse_var_or_string(upper=True) 6539 6540 if self._match(TokenType.FROM): 6541 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6542 6543 if not self._match(TokenType.COMMA): 6544 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6545 6546 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6547 6548 def _parse_gap_fill(self) -> exp.GapFill: 6549 self._match(TokenType.TABLE) 6550 this = self._parse_table() 6551 6552 self._match(TokenType.COMMA) 6553 args = [this, *self._parse_csv(self._parse_lambda)] 6554 6555 gap_fill = exp.GapFill.from_arg_list(args) 6556 return self.validate_expression(gap_fill, args) 6557 6558 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6559 this = self._parse_assignment() 6560 6561 if not self._match(TokenType.ALIAS): 6562 if self._match(TokenType.COMMA): 6563 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6564 6565 self.raise_error("Expected AS after CAST") 6566 6567 fmt = None 6568 to = self._parse_types() 6569 6570 default = self._match(TokenType.DEFAULT) 6571 if default: 6572 default = self._parse_bitwise() 6573 self._match_text_seq("ON", "CONVERSION", "ERROR") 6574 6575 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6576 fmt_string = self._parse_string() 6577 fmt = self._parse_at_time_zone(fmt_string) 6578 6579 if not to: 6580 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6581 if to.this in exp.DataType.TEMPORAL_TYPES: 6582 this = self.expression( 6583 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6584 this=this, 6585 format=exp.Literal.string( 6586 format_time( 6587 fmt_string.this if fmt_string else "", 6588 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6589 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6590 ) 6591 ), 6592 safe=safe, 6593 ) 6594 6595 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6596 this.set("zone", fmt.args["zone"]) 6597 return this 6598 elif not to: 6599 self.raise_error("Expected TYPE after CAST") 6600 elif isinstance(to, exp.Identifier): 6601 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6602 elif to.this == exp.DataType.Type.CHAR: 6603 if self._match(TokenType.CHARACTER_SET): 6604 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6605 6606 return self.build_cast( 6607 strict=strict, 6608 this=this, 6609 to=to, 6610 format=fmt, 6611 safe=safe, 6612 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6613 default=default, 6614 ) 6615 6616 def _parse_string_agg(self) -> exp.GroupConcat: 6617 if self._match(TokenType.DISTINCT): 6618 args: t.List[t.Optional[exp.Expression]] = [ 6619 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6620 ] 6621 if self._match(TokenType.COMMA): 6622 args.extend(self._parse_csv(self._parse_assignment)) 6623 else: 6624 args = self._parse_csv(self._parse_assignment) # type: ignore 6625 6626 if self._match_text_seq("ON", "OVERFLOW"): 6627 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6628 if self._match_text_seq("ERROR"): 6629 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6630 else: 6631 self._match_text_seq("TRUNCATE") 6632 on_overflow = self.expression( 6633 exp.OverflowTruncateBehavior, 6634 this=self._parse_string(), 6635 with_count=( 6636 self._match_text_seq("WITH", "COUNT") 6637 or not self._match_text_seq("WITHOUT", "COUNT") 6638 ), 6639 ) 6640 else: 6641 on_overflow = None 6642 6643 index = self._index 6644 if not self._match(TokenType.R_PAREN) and args: 6645 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6646 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6647 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6648 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6649 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6650 6651 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6652 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6653 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6654 if not self._match_text_seq("WITHIN", "GROUP"): 6655 self._retreat(index) 6656 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6657 6658 # The corresponding match_r_paren will be called in parse_function (caller) 6659 self._match_l_paren() 6660 6661 return self.expression( 6662 exp.GroupConcat, 6663 this=self._parse_order(this=seq_get(args, 0)), 6664 separator=seq_get(args, 1), 6665 on_overflow=on_overflow, 6666 ) 6667 6668 def _parse_convert( 6669 self, strict: bool, safe: t.Optional[bool] = None 6670 ) -> t.Optional[exp.Expression]: 6671 this = self._parse_bitwise() 6672 6673 if self._match(TokenType.USING): 6674 to: t.Optional[exp.Expression] = self.expression( 6675 exp.CharacterSet, this=self._parse_var() 6676 ) 6677 elif self._match(TokenType.COMMA): 6678 to = self._parse_types() 6679 else: 6680 to = None 6681 6682 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6683 6684 def _parse_xml_table(self) -> exp.XMLTable: 6685 namespaces = None 6686 passing = None 6687 columns = None 6688 6689 if self._match_text_seq("XMLNAMESPACES", "("): 6690 namespaces = self._parse_xml_namespace() 6691 self._match_text_seq(")", ",") 6692 6693 this = self._parse_string() 6694 6695 if self._match_text_seq("PASSING"): 6696 # The BY VALUE keywords are optional and are provided for semantic clarity 6697 self._match_text_seq("BY", "VALUE") 6698 passing = self._parse_csv(self._parse_column) 6699 6700 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6701 6702 if self._match_text_seq("COLUMNS"): 6703 columns = self._parse_csv(self._parse_field_def) 6704 6705 return self.expression( 6706 exp.XMLTable, 6707 this=this, 6708 namespaces=namespaces, 6709 passing=passing, 6710 columns=columns, 6711 by_ref=by_ref, 6712 ) 6713 6714 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6715 namespaces = [] 6716 6717 while True: 6718 if self._match(TokenType.DEFAULT): 6719 uri = self._parse_string() 6720 else: 6721 uri = self._parse_alias(self._parse_string()) 6722 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6723 if not self._match(TokenType.COMMA): 6724 break 6725 6726 return namespaces 6727 6728 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6729 args = self._parse_csv(self._parse_assignment) 6730 6731 if len(args) < 3: 6732 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6733 6734 return self.expression(exp.DecodeCase, expressions=args) 6735 6736 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6737 self._match_text_seq("KEY") 6738 key = self._parse_column() 6739 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6740 self._match_text_seq("VALUE") 6741 value = self._parse_bitwise() 6742 6743 if not key and not value: 6744 return None 6745 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6746 6747 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6748 if not this or not self._match_text_seq("FORMAT", "JSON"): 6749 return this 6750 6751 return self.expression(exp.FormatJson, this=this) 6752 6753 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6754 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6755 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6756 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6757 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6758 else: 6759 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6760 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6761 6762 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6763 6764 if not empty and not error and not null: 6765 return None 6766 6767 return self.expression( 6768 exp.OnCondition, 6769 empty=empty, 6770 error=error, 6771 null=null, 6772 ) 6773 6774 def _parse_on_handling( 6775 self, on: str, *values: str 6776 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6777 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6778 for value in values: 6779 if self._match_text_seq(value, "ON", on): 6780 return f"{value} ON {on}" 6781 6782 index = self._index 6783 if self._match(TokenType.DEFAULT): 6784 default_value = self._parse_bitwise() 6785 if self._match_text_seq("ON", on): 6786 return default_value 6787 6788 self._retreat(index) 6789 6790 return None 6791 6792 @t.overload 6793 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6794 6795 @t.overload 6796 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6797 6798 def _parse_json_object(self, agg=False): 6799 star = self._parse_star() 6800 expressions = ( 6801 [star] 6802 if star 6803 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6804 ) 6805 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6806 6807 unique_keys = None 6808 if self._match_text_seq("WITH", "UNIQUE"): 6809 unique_keys = True 6810 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6811 unique_keys = False 6812 6813 self._match_text_seq("KEYS") 6814 6815 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6816 self._parse_type() 6817 ) 6818 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6819 6820 return self.expression( 6821 exp.JSONObjectAgg if agg else exp.JSONObject, 6822 expressions=expressions, 6823 null_handling=null_handling, 6824 unique_keys=unique_keys, 6825 return_type=return_type, 6826 encoding=encoding, 6827 ) 6828 6829 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6830 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6831 if not self._match_text_seq("NESTED"): 6832 this = self._parse_id_var() 6833 kind = self._parse_types(allow_identifiers=False) 6834 nested = None 6835 else: 6836 this = None 6837 kind = None 6838 nested = True 6839 6840 path = self._match_text_seq("PATH") and self._parse_string() 6841 nested_schema = nested and self._parse_json_schema() 6842 6843 return self.expression( 6844 exp.JSONColumnDef, 6845 this=this, 6846 kind=kind, 6847 path=path, 6848 nested_schema=nested_schema, 6849 ) 6850 6851 def _parse_json_schema(self) -> exp.JSONSchema: 6852 self._match_text_seq("COLUMNS") 6853 return self.expression( 6854 exp.JSONSchema, 6855 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6856 ) 6857 6858 def _parse_json_table(self) -> exp.JSONTable: 6859 this = self._parse_format_json(self._parse_bitwise()) 6860 path = self._match(TokenType.COMMA) and self._parse_string() 6861 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6862 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6863 schema = self._parse_json_schema() 6864 6865 return exp.JSONTable( 6866 this=this, 6867 schema=schema, 6868 path=path, 6869 error_handling=error_handling, 6870 empty_handling=empty_handling, 6871 ) 6872 6873 def _parse_match_against(self) -> exp.MatchAgainst: 6874 if self._match_text_seq("TABLE"): 6875 # parse SingleStore MATCH(TABLE ...) syntax 6876 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 6877 expressions = [] 6878 table = self._parse_table() 6879 if table: 6880 expressions = [table] 6881 else: 6882 expressions = self._parse_csv(self._parse_column) 6883 6884 self._match_text_seq(")", "AGAINST", "(") 6885 6886 this = self._parse_string() 6887 6888 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6889 modifier = "IN NATURAL LANGUAGE MODE" 6890 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6891 modifier = f"{modifier} WITH QUERY EXPANSION" 6892 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6893 modifier = "IN BOOLEAN MODE" 6894 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6895 modifier = "WITH QUERY EXPANSION" 6896 else: 6897 modifier = None 6898 6899 return self.expression( 6900 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6901 ) 6902 6903 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6904 def _parse_open_json(self) -> exp.OpenJSON: 6905 this = self._parse_bitwise() 6906 path = self._match(TokenType.COMMA) and self._parse_string() 6907 6908 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6909 this = self._parse_field(any_token=True) 6910 kind = self._parse_types() 6911 path = self._parse_string() 6912 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6913 6914 return self.expression( 6915 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6916 ) 6917 6918 expressions = None 6919 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6920 self._match_l_paren() 6921 expressions = self._parse_csv(_parse_open_json_column_def) 6922 6923 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6924 6925 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6926 args = self._parse_csv(self._parse_bitwise) 6927 6928 if self._match(TokenType.IN): 6929 return self.expression( 6930 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6931 ) 6932 6933 if haystack_first: 6934 haystack = seq_get(args, 0) 6935 needle = seq_get(args, 1) 6936 else: 6937 haystack = seq_get(args, 1) 6938 needle = seq_get(args, 0) 6939 6940 return self.expression( 6941 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6942 ) 6943 6944 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6945 args = self._parse_csv(self._parse_table) 6946 return exp.JoinHint(this=func_name.upper(), expressions=args) 6947 6948 def _parse_substring(self) -> exp.Substring: 6949 # Postgres supports the form: substring(string [from int] [for int]) 6950 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6951 6952 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6953 6954 if self._match(TokenType.FROM): 6955 args.append(self._parse_bitwise()) 6956 if self._match(TokenType.FOR): 6957 if len(args) == 1: 6958 args.append(exp.Literal.number(1)) 6959 args.append(self._parse_bitwise()) 6960 6961 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6962 6963 def _parse_trim(self) -> exp.Trim: 6964 # https://www.w3resource.com/sql/character-functions/trim.php 6965 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6966 6967 position = None 6968 collation = None 6969 expression = None 6970 6971 if self._match_texts(self.TRIM_TYPES): 6972 position = self._prev.text.upper() 6973 6974 this = self._parse_bitwise() 6975 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6976 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6977 expression = self._parse_bitwise() 6978 6979 if invert_order: 6980 this, expression = expression, this 6981 6982 if self._match(TokenType.COLLATE): 6983 collation = self._parse_bitwise() 6984 6985 return self.expression( 6986 exp.Trim, this=this, position=position, expression=expression, collation=collation 6987 ) 6988 6989 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6990 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6991 6992 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6993 return self._parse_window(self._parse_id_var(), alias=True) 6994 6995 def _parse_respect_or_ignore_nulls( 6996 self, this: t.Optional[exp.Expression] 6997 ) -> t.Optional[exp.Expression]: 6998 if self._match_text_seq("IGNORE", "NULLS"): 6999 return self.expression(exp.IgnoreNulls, this=this) 7000 if self._match_text_seq("RESPECT", "NULLS"): 7001 return self.expression(exp.RespectNulls, this=this) 7002 return this 7003 7004 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 7005 if self._match(TokenType.HAVING): 7006 self._match_texts(("MAX", "MIN")) 7007 max = self._prev.text.upper() != "MIN" 7008 return self.expression( 7009 exp.HavingMax, this=this, expression=self._parse_column(), max=max 7010 ) 7011 7012 return this 7013 7014 def _parse_window( 7015 self, this: t.Optional[exp.Expression], alias: bool = False 7016 ) -> t.Optional[exp.Expression]: 7017 func = this 7018 comments = func.comments if isinstance(func, exp.Expression) else None 7019 7020 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7021 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7022 if self._match_text_seq("WITHIN", "GROUP"): 7023 order = self._parse_wrapped(self._parse_order) 7024 this = self.expression(exp.WithinGroup, this=this, expression=order) 7025 7026 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7027 self._match(TokenType.WHERE) 7028 this = self.expression( 7029 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7030 ) 7031 self._match_r_paren() 7032 7033 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7034 # Some dialects choose to implement and some do not. 7035 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7036 7037 # There is some code above in _parse_lambda that handles 7038 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7039 7040 # The below changes handle 7041 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7042 7043 # Oracle allows both formats 7044 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7045 # and Snowflake chose to do the same for familiarity 7046 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7047 if isinstance(this, exp.AggFunc): 7048 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7049 7050 if ignore_respect and ignore_respect is not this: 7051 ignore_respect.replace(ignore_respect.this) 7052 this = self.expression(ignore_respect.__class__, this=this) 7053 7054 this = self._parse_respect_or_ignore_nulls(this) 7055 7056 # bigquery select from window x AS (partition by ...) 7057 if alias: 7058 over = None 7059 self._match(TokenType.ALIAS) 7060 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7061 return this 7062 else: 7063 over = self._prev.text.upper() 7064 7065 if comments and isinstance(func, exp.Expression): 7066 func.pop_comments() 7067 7068 if not self._match(TokenType.L_PAREN): 7069 return self.expression( 7070 exp.Window, 7071 comments=comments, 7072 this=this, 7073 alias=self._parse_id_var(False), 7074 over=over, 7075 ) 7076 7077 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7078 7079 first = self._match(TokenType.FIRST) 7080 if self._match_text_seq("LAST"): 7081 first = False 7082 7083 partition, order = self._parse_partition_and_order() 7084 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7085 7086 if kind: 7087 self._match(TokenType.BETWEEN) 7088 start = self._parse_window_spec() 7089 7090 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 7091 exclude = ( 7092 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7093 if self._match_text_seq("EXCLUDE") 7094 else None 7095 ) 7096 7097 spec = self.expression( 7098 exp.WindowSpec, 7099 kind=kind, 7100 start=start["value"], 7101 start_side=start["side"], 7102 end=end.get("value"), 7103 end_side=end.get("side"), 7104 exclude=exclude, 7105 ) 7106 else: 7107 spec = None 7108 7109 self._match_r_paren() 7110 7111 window = self.expression( 7112 exp.Window, 7113 comments=comments, 7114 this=this, 7115 partition_by=partition, 7116 order=order, 7117 spec=spec, 7118 alias=window_alias, 7119 over=over, 7120 first=first, 7121 ) 7122 7123 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7124 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7125 return self._parse_window(window, alias=alias) 7126 7127 return window 7128 7129 def _parse_partition_and_order( 7130 self, 7131 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7132 return self._parse_partition_by(), self._parse_order() 7133 7134 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7135 self._match(TokenType.BETWEEN) 7136 7137 return { 7138 "value": ( 7139 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7140 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7141 or self._parse_type() 7142 ), 7143 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7144 } 7145 7146 def _parse_alias( 7147 self, this: t.Optional[exp.Expression], explicit: bool = False 7148 ) -> t.Optional[exp.Expression]: 7149 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7150 # so this section tries to parse the clause version and if it fails, it treats the token 7151 # as an identifier (alias) 7152 if self._can_parse_limit_or_offset(): 7153 return this 7154 7155 any_token = self._match(TokenType.ALIAS) 7156 comments = self._prev_comments or [] 7157 7158 if explicit and not any_token: 7159 return this 7160 7161 if self._match(TokenType.L_PAREN): 7162 aliases = self.expression( 7163 exp.Aliases, 7164 comments=comments, 7165 this=this, 7166 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7167 ) 7168 self._match_r_paren(aliases) 7169 return aliases 7170 7171 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7172 self.STRING_ALIASES and self._parse_string_as_identifier() 7173 ) 7174 7175 if alias: 7176 comments.extend(alias.pop_comments()) 7177 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7178 column = this.this 7179 7180 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7181 if not this.comments and column and column.comments: 7182 this.comments = column.pop_comments() 7183 7184 return this 7185 7186 def _parse_id_var( 7187 self, 7188 any_token: bool = True, 7189 tokens: t.Optional[t.Collection[TokenType]] = None, 7190 ) -> t.Optional[exp.Expression]: 7191 expression = self._parse_identifier() 7192 if not expression and ( 7193 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7194 ): 7195 quoted = self._prev.token_type == TokenType.STRING 7196 expression = self._identifier_expression(quoted=quoted) 7197 7198 return expression 7199 7200 def _parse_string(self) -> t.Optional[exp.Expression]: 7201 if self._match_set(self.STRING_PARSERS): 7202 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7203 return self._parse_placeholder() 7204 7205 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7206 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7207 if output: 7208 output.update_positions(self._prev) 7209 return output 7210 7211 def _parse_number(self) -> t.Optional[exp.Expression]: 7212 if self._match_set(self.NUMERIC_PARSERS): 7213 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7214 return self._parse_placeholder() 7215 7216 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7217 if self._match(TokenType.IDENTIFIER): 7218 return self._identifier_expression(quoted=True) 7219 return self._parse_placeholder() 7220 7221 def _parse_var( 7222 self, 7223 any_token: bool = False, 7224 tokens: t.Optional[t.Collection[TokenType]] = None, 7225 upper: bool = False, 7226 ) -> t.Optional[exp.Expression]: 7227 if ( 7228 (any_token and self._advance_any()) 7229 or self._match(TokenType.VAR) 7230 or (self._match_set(tokens) if tokens else False) 7231 ): 7232 return self.expression( 7233 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7234 ) 7235 return self._parse_placeholder() 7236 7237 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7238 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7239 self._advance() 7240 return self._prev 7241 return None 7242 7243 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7244 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7245 7246 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7247 return self._parse_primary() or self._parse_var(any_token=True) 7248 7249 def _parse_null(self) -> t.Optional[exp.Expression]: 7250 if self._match_set(self.NULL_TOKENS): 7251 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7252 return self._parse_placeholder() 7253 7254 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7255 if self._match(TokenType.TRUE): 7256 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7257 if self._match(TokenType.FALSE): 7258 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7259 return self._parse_placeholder() 7260 7261 def _parse_star(self) -> t.Optional[exp.Expression]: 7262 if self._match(TokenType.STAR): 7263 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7264 return self._parse_placeholder() 7265 7266 def _parse_parameter(self) -> exp.Parameter: 7267 this = self._parse_identifier() or self._parse_primary_or_var() 7268 return self.expression(exp.Parameter, this=this) 7269 7270 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7271 if self._match_set(self.PLACEHOLDER_PARSERS): 7272 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7273 if placeholder: 7274 return placeholder 7275 self._advance(-1) 7276 return None 7277 7278 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7279 if not self._match_texts(keywords): 7280 return None 7281 if self._match(TokenType.L_PAREN, advance=False): 7282 return self._parse_wrapped_csv(self._parse_expression) 7283 7284 expression = self._parse_expression() 7285 return [expression] if expression else None 7286 7287 def _parse_csv( 7288 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7289 ) -> t.List[exp.Expression]: 7290 parse_result = parse_method() 7291 items = [parse_result] if parse_result is not None else [] 7292 7293 while self._match(sep): 7294 self._add_comments(parse_result) 7295 parse_result = parse_method() 7296 if parse_result is not None: 7297 items.append(parse_result) 7298 7299 return items 7300 7301 def _parse_tokens( 7302 self, parse_method: t.Callable, expressions: t.Dict 7303 ) -> t.Optional[exp.Expression]: 7304 this = parse_method() 7305 7306 while self._match_set(expressions): 7307 this = self.expression( 7308 expressions[self._prev.token_type], 7309 this=this, 7310 comments=self._prev_comments, 7311 expression=parse_method(), 7312 ) 7313 7314 return this 7315 7316 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7317 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7318 7319 def _parse_wrapped_csv( 7320 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7321 ) -> t.List[exp.Expression]: 7322 return self._parse_wrapped( 7323 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7324 ) 7325 7326 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7327 wrapped = self._match(TokenType.L_PAREN) 7328 if not wrapped and not optional: 7329 self.raise_error("Expecting (") 7330 parse_result = parse_method() 7331 if wrapped: 7332 self._match_r_paren() 7333 return parse_result 7334 7335 def _parse_expressions(self) -> t.List[exp.Expression]: 7336 return self._parse_csv(self._parse_expression) 7337 7338 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7339 return ( 7340 self._parse_set_operations( 7341 self._parse_alias(self._parse_assignment(), explicit=True) 7342 if alias 7343 else self._parse_assignment() 7344 ) 7345 or self._parse_select() 7346 ) 7347 7348 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7349 return self._parse_query_modifiers( 7350 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7351 ) 7352 7353 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7354 this = None 7355 if self._match_texts(self.TRANSACTION_KIND): 7356 this = self._prev.text 7357 7358 self._match_texts(("TRANSACTION", "WORK")) 7359 7360 modes = [] 7361 while True: 7362 mode = [] 7363 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 7364 mode.append(self._prev.text) 7365 7366 if mode: 7367 modes.append(" ".join(mode)) 7368 if not self._match(TokenType.COMMA): 7369 break 7370 7371 return self.expression(exp.Transaction, this=this, modes=modes) 7372 7373 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7374 chain = None 7375 savepoint = None 7376 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7377 7378 self._match_texts(("TRANSACTION", "WORK")) 7379 7380 if self._match_text_seq("TO"): 7381 self._match_text_seq("SAVEPOINT") 7382 savepoint = self._parse_id_var() 7383 7384 if self._match(TokenType.AND): 7385 chain = not self._match_text_seq("NO") 7386 self._match_text_seq("CHAIN") 7387 7388 if is_rollback: 7389 return self.expression(exp.Rollback, savepoint=savepoint) 7390 7391 return self.expression(exp.Commit, chain=chain) 7392 7393 def _parse_refresh(self) -> exp.Refresh: 7394 self._match(TokenType.TABLE) 7395 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7396 7397 def _parse_column_def_with_exists(self): 7398 start = self._index 7399 self._match(TokenType.COLUMN) 7400 7401 exists_column = self._parse_exists(not_=True) 7402 expression = self._parse_field_def() 7403 7404 if not isinstance(expression, exp.ColumnDef): 7405 self._retreat(start) 7406 return None 7407 7408 expression.set("exists", exists_column) 7409 7410 return expression 7411 7412 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7413 if not self._prev.text.upper() == "ADD": 7414 return None 7415 7416 expression = self._parse_column_def_with_exists() 7417 if not expression: 7418 return None 7419 7420 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7421 if self._match_texts(("FIRST", "AFTER")): 7422 position = self._prev.text 7423 column_position = self.expression( 7424 exp.ColumnPosition, this=self._parse_column(), position=position 7425 ) 7426 expression.set("position", column_position) 7427 7428 return expression 7429 7430 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7431 drop = self._match(TokenType.DROP) and self._parse_drop() 7432 if drop and not isinstance(drop, exp.Command): 7433 drop.set("kind", drop.args.get("kind", "COLUMN")) 7434 return drop 7435 7436 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7437 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7438 return self.expression( 7439 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7440 ) 7441 7442 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7443 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7444 self._match_text_seq("ADD") 7445 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7446 return self.expression( 7447 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7448 ) 7449 7450 column_def = self._parse_add_column() 7451 if isinstance(column_def, exp.ColumnDef): 7452 return column_def 7453 7454 exists = self._parse_exists(not_=True) 7455 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7456 return self.expression( 7457 exp.AddPartition, 7458 exists=exists, 7459 this=self._parse_field(any_token=True), 7460 location=self._match_text_seq("LOCATION", advance=False) 7461 and self._parse_property(), 7462 ) 7463 7464 return None 7465 7466 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7467 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7468 or self._match_text_seq("COLUMNS") 7469 ): 7470 schema = self._parse_schema() 7471 7472 return ( 7473 ensure_list(schema) 7474 if schema 7475 else self._parse_csv(self._parse_column_def_with_exists) 7476 ) 7477 7478 return self._parse_csv(_parse_add_alteration) 7479 7480 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7481 if self._match_texts(self.ALTER_ALTER_PARSERS): 7482 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7483 7484 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7485 # keyword after ALTER we default to parsing this statement 7486 self._match(TokenType.COLUMN) 7487 column = self._parse_field(any_token=True) 7488 7489 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7490 return self.expression(exp.AlterColumn, this=column, drop=True) 7491 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7492 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7493 if self._match(TokenType.COMMENT): 7494 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7495 if self._match_text_seq("DROP", "NOT", "NULL"): 7496 return self.expression( 7497 exp.AlterColumn, 7498 this=column, 7499 drop=True, 7500 allow_null=True, 7501 ) 7502 if self._match_text_seq("SET", "NOT", "NULL"): 7503 return self.expression( 7504 exp.AlterColumn, 7505 this=column, 7506 allow_null=False, 7507 ) 7508 7509 if self._match_text_seq("SET", "VISIBLE"): 7510 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7511 if self._match_text_seq("SET", "INVISIBLE"): 7512 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7513 7514 self._match_text_seq("SET", "DATA") 7515 self._match_text_seq("TYPE") 7516 return self.expression( 7517 exp.AlterColumn, 7518 this=column, 7519 dtype=self._parse_types(), 7520 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7521 using=self._match(TokenType.USING) and self._parse_assignment(), 7522 ) 7523 7524 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7525 if self._match_texts(("ALL", "EVEN", "AUTO")): 7526 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7527 7528 self._match_text_seq("KEY", "DISTKEY") 7529 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7530 7531 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7532 if compound: 7533 self._match_text_seq("SORTKEY") 7534 7535 if self._match(TokenType.L_PAREN, advance=False): 7536 return self.expression( 7537 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7538 ) 7539 7540 self._match_texts(("AUTO", "NONE")) 7541 return self.expression( 7542 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7543 ) 7544 7545 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7546 index = self._index - 1 7547 7548 partition_exists = self._parse_exists() 7549 if self._match(TokenType.PARTITION, advance=False): 7550 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7551 7552 self._retreat(index) 7553 return self._parse_csv(self._parse_drop_column) 7554 7555 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7556 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7557 exists = self._parse_exists() 7558 old_column = self._parse_column() 7559 to = self._match_text_seq("TO") 7560 new_column = self._parse_column() 7561 7562 if old_column is None or to is None or new_column is None: 7563 return None 7564 7565 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7566 7567 self._match_text_seq("TO") 7568 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7569 7570 def _parse_alter_table_set(self) -> exp.AlterSet: 7571 alter_set = self.expression(exp.AlterSet) 7572 7573 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7574 "TABLE", "PROPERTIES" 7575 ): 7576 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7577 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7578 alter_set.set("expressions", [self._parse_assignment()]) 7579 elif self._match_texts(("LOGGED", "UNLOGGED")): 7580 alter_set.set("option", exp.var(self._prev.text.upper())) 7581 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7582 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7583 elif self._match_text_seq("LOCATION"): 7584 alter_set.set("location", self._parse_field()) 7585 elif self._match_text_seq("ACCESS", "METHOD"): 7586 alter_set.set("access_method", self._parse_field()) 7587 elif self._match_text_seq("TABLESPACE"): 7588 alter_set.set("tablespace", self._parse_field()) 7589 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7590 alter_set.set("file_format", [self._parse_field()]) 7591 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7592 alter_set.set("file_format", self._parse_wrapped_options()) 7593 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7594 alter_set.set("copy_options", self._parse_wrapped_options()) 7595 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7596 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7597 else: 7598 if self._match_text_seq("SERDE"): 7599 alter_set.set("serde", self._parse_field()) 7600 7601 properties = self._parse_wrapped(self._parse_properties, optional=True) 7602 alter_set.set("expressions", [properties]) 7603 7604 return alter_set 7605 7606 def _parse_alter_session(self) -> exp.AlterSession: 7607 """Parse ALTER SESSION SET/UNSET statements.""" 7608 if self._match(TokenType.SET): 7609 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 7610 return self.expression(exp.AlterSession, expressions=expressions, unset=False) 7611 7612 self._match_text_seq("UNSET") 7613 expressions = self._parse_csv( 7614 lambda: self.expression(exp.SetItem, this=self._parse_id_var(any_token=True)) 7615 ) 7616 return self.expression(exp.AlterSession, expressions=expressions, unset=True) 7617 7618 def _parse_alter(self) -> exp.Alter | exp.Command: 7619 start = self._prev 7620 7621 alter_token = self._match_set(self.ALTERABLES) and self._prev 7622 if not alter_token: 7623 return self._parse_as_command(start) 7624 7625 exists = self._parse_exists() 7626 only = self._match_text_seq("ONLY") 7627 7628 if alter_token.token_type == TokenType.SESSION: 7629 this = None 7630 check = None 7631 cluster = None 7632 else: 7633 this = self._parse_table(schema=True) 7634 check = self._match_text_seq("WITH", "CHECK") 7635 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7636 7637 if self._next: 7638 self._advance() 7639 7640 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7641 if parser: 7642 actions = ensure_list(parser(self)) 7643 not_valid = self._match_text_seq("NOT", "VALID") 7644 options = self._parse_csv(self._parse_property) 7645 7646 if not self._curr and actions: 7647 return self.expression( 7648 exp.Alter, 7649 this=this, 7650 kind=alter_token.text.upper(), 7651 exists=exists, 7652 actions=actions, 7653 only=only, 7654 options=options, 7655 cluster=cluster, 7656 not_valid=not_valid, 7657 check=check, 7658 ) 7659 7660 return self._parse_as_command(start) 7661 7662 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7663 start = self._prev 7664 # https://duckdb.org/docs/sql/statements/analyze 7665 if not self._curr: 7666 return self.expression(exp.Analyze) 7667 7668 options = [] 7669 while self._match_texts(self.ANALYZE_STYLES): 7670 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7671 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7672 else: 7673 options.append(self._prev.text.upper()) 7674 7675 this: t.Optional[exp.Expression] = None 7676 inner_expression: t.Optional[exp.Expression] = None 7677 7678 kind = self._curr and self._curr.text.upper() 7679 7680 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7681 this = self._parse_table_parts() 7682 elif self._match_text_seq("TABLES"): 7683 if self._match_set((TokenType.FROM, TokenType.IN)): 7684 kind = f"{kind} {self._prev.text.upper()}" 7685 this = self._parse_table(schema=True, is_db_reference=True) 7686 elif self._match_text_seq("DATABASE"): 7687 this = self._parse_table(schema=True, is_db_reference=True) 7688 elif self._match_text_seq("CLUSTER"): 7689 this = self._parse_table() 7690 # Try matching inner expr keywords before fallback to parse table. 7691 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7692 kind = None 7693 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7694 else: 7695 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7696 kind = None 7697 this = self._parse_table_parts() 7698 7699 partition = self._try_parse(self._parse_partition) 7700 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7701 return self._parse_as_command(start) 7702 7703 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7704 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7705 "WITH", "ASYNC", "MODE" 7706 ): 7707 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7708 else: 7709 mode = None 7710 7711 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7712 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7713 7714 properties = self._parse_properties() 7715 return self.expression( 7716 exp.Analyze, 7717 kind=kind, 7718 this=this, 7719 mode=mode, 7720 partition=partition, 7721 properties=properties, 7722 expression=inner_expression, 7723 options=options, 7724 ) 7725 7726 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7727 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7728 this = None 7729 kind = self._prev.text.upper() 7730 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7731 expressions = [] 7732 7733 if not self._match_text_seq("STATISTICS"): 7734 self.raise_error("Expecting token STATISTICS") 7735 7736 if self._match_text_seq("NOSCAN"): 7737 this = "NOSCAN" 7738 elif self._match(TokenType.FOR): 7739 if self._match_text_seq("ALL", "COLUMNS"): 7740 this = "FOR ALL COLUMNS" 7741 if self._match_texts("COLUMNS"): 7742 this = "FOR COLUMNS" 7743 expressions = self._parse_csv(self._parse_column_reference) 7744 elif self._match_text_seq("SAMPLE"): 7745 sample = self._parse_number() 7746 expressions = [ 7747 self.expression( 7748 exp.AnalyzeSample, 7749 sample=sample, 7750 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7751 ) 7752 ] 7753 7754 return self.expression( 7755 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7756 ) 7757 7758 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7759 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7760 kind = None 7761 this = None 7762 expression: t.Optional[exp.Expression] = None 7763 if self._match_text_seq("REF", "UPDATE"): 7764 kind = "REF" 7765 this = "UPDATE" 7766 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7767 this = "UPDATE SET DANGLING TO NULL" 7768 elif self._match_text_seq("STRUCTURE"): 7769 kind = "STRUCTURE" 7770 if self._match_text_seq("CASCADE", "FAST"): 7771 this = "CASCADE FAST" 7772 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7773 ("ONLINE", "OFFLINE") 7774 ): 7775 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7776 expression = self._parse_into() 7777 7778 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7779 7780 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7781 this = self._prev.text.upper() 7782 if self._match_text_seq("COLUMNS"): 7783 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7784 return None 7785 7786 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7787 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7788 if self._match_text_seq("STATISTICS"): 7789 return self.expression(exp.AnalyzeDelete, kind=kind) 7790 return None 7791 7792 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7793 if self._match_text_seq("CHAINED", "ROWS"): 7794 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7795 return None 7796 7797 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7798 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7799 this = self._prev.text.upper() 7800 expression: t.Optional[exp.Expression] = None 7801 expressions = [] 7802 update_options = None 7803 7804 if self._match_text_seq("HISTOGRAM", "ON"): 7805 expressions = self._parse_csv(self._parse_column_reference) 7806 with_expressions = [] 7807 while self._match(TokenType.WITH): 7808 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7809 if self._match_texts(("SYNC", "ASYNC")): 7810 if self._match_text_seq("MODE", advance=False): 7811 with_expressions.append(f"{self._prev.text.upper()} MODE") 7812 self._advance() 7813 else: 7814 buckets = self._parse_number() 7815 if self._match_text_seq("BUCKETS"): 7816 with_expressions.append(f"{buckets} BUCKETS") 7817 if with_expressions: 7818 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7819 7820 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7821 TokenType.UPDATE, advance=False 7822 ): 7823 update_options = self._prev.text.upper() 7824 self._advance() 7825 elif self._match_text_seq("USING", "DATA"): 7826 expression = self.expression(exp.UsingData, this=self._parse_string()) 7827 7828 return self.expression( 7829 exp.AnalyzeHistogram, 7830 this=this, 7831 expressions=expressions, 7832 expression=expression, 7833 update_options=update_options, 7834 ) 7835 7836 def _parse_merge(self) -> exp.Merge: 7837 self._match(TokenType.INTO) 7838 target = self._parse_table() 7839 7840 if target and self._match(TokenType.ALIAS, advance=False): 7841 target.set("alias", self._parse_table_alias()) 7842 7843 self._match(TokenType.USING) 7844 using = self._parse_table() 7845 7846 self._match(TokenType.ON) 7847 on = self._parse_assignment() 7848 7849 return self.expression( 7850 exp.Merge, 7851 this=target, 7852 using=using, 7853 on=on, 7854 whens=self._parse_when_matched(), 7855 returning=self._parse_returning(), 7856 ) 7857 7858 def _parse_when_matched(self) -> exp.Whens: 7859 whens = [] 7860 7861 while self._match(TokenType.WHEN): 7862 matched = not self._match(TokenType.NOT) 7863 self._match_text_seq("MATCHED") 7864 source = ( 7865 False 7866 if self._match_text_seq("BY", "TARGET") 7867 else self._match_text_seq("BY", "SOURCE") 7868 ) 7869 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7870 7871 self._match(TokenType.THEN) 7872 7873 if self._match(TokenType.INSERT): 7874 this = self._parse_star() 7875 if this: 7876 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7877 else: 7878 then = self.expression( 7879 exp.Insert, 7880 this=exp.var("ROW") 7881 if self._match_text_seq("ROW") 7882 else self._parse_value(values=False), 7883 expression=self._match_text_seq("VALUES") and self._parse_value(), 7884 ) 7885 elif self._match(TokenType.UPDATE): 7886 expressions = self._parse_star() 7887 if expressions: 7888 then = self.expression(exp.Update, expressions=expressions) 7889 else: 7890 then = self.expression( 7891 exp.Update, 7892 expressions=self._match(TokenType.SET) 7893 and self._parse_csv(self._parse_equality), 7894 ) 7895 elif self._match(TokenType.DELETE): 7896 then = self.expression(exp.Var, this=self._prev.text) 7897 else: 7898 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7899 7900 whens.append( 7901 self.expression( 7902 exp.When, 7903 matched=matched, 7904 source=source, 7905 condition=condition, 7906 then=then, 7907 ) 7908 ) 7909 return self.expression(exp.Whens, expressions=whens) 7910 7911 def _parse_show(self) -> t.Optional[exp.Expression]: 7912 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7913 if parser: 7914 return parser(self) 7915 return self._parse_as_command(self._prev) 7916 7917 def _parse_set_item_assignment( 7918 self, kind: t.Optional[str] = None 7919 ) -> t.Optional[exp.Expression]: 7920 index = self._index 7921 7922 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7923 return self._parse_set_transaction(global_=kind == "GLOBAL") 7924 7925 left = self._parse_primary() or self._parse_column() 7926 assignment_delimiter = self._match_texts(("=", "TO")) 7927 7928 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7929 self._retreat(index) 7930 return None 7931 7932 right = self._parse_statement() or self._parse_id_var() 7933 if isinstance(right, (exp.Column, exp.Identifier)): 7934 right = exp.var(right.name) 7935 7936 this = self.expression(exp.EQ, this=left, expression=right) 7937 return self.expression(exp.SetItem, this=this, kind=kind) 7938 7939 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7940 self._match_text_seq("TRANSACTION") 7941 characteristics = self._parse_csv( 7942 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7943 ) 7944 return self.expression( 7945 exp.SetItem, 7946 expressions=characteristics, 7947 kind="TRANSACTION", 7948 **{"global": global_}, # type: ignore 7949 ) 7950 7951 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7952 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7953 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7954 7955 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7956 index = self._index 7957 set_ = self.expression( 7958 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7959 ) 7960 7961 if self._curr: 7962 self._retreat(index) 7963 return self._parse_as_command(self._prev) 7964 7965 return set_ 7966 7967 def _parse_var_from_options( 7968 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7969 ) -> t.Optional[exp.Var]: 7970 start = self._curr 7971 if not start: 7972 return None 7973 7974 option = start.text.upper() 7975 continuations = options.get(option) 7976 7977 index = self._index 7978 self._advance() 7979 for keywords in continuations or []: 7980 if isinstance(keywords, str): 7981 keywords = (keywords,) 7982 7983 if self._match_text_seq(*keywords): 7984 option = f"{option} {' '.join(keywords)}" 7985 break 7986 else: 7987 if continuations or continuations is None: 7988 if raise_unmatched: 7989 self.raise_error(f"Unknown option {option}") 7990 7991 self._retreat(index) 7992 return None 7993 7994 return exp.var(option) 7995 7996 def _parse_as_command(self, start: Token) -> exp.Command: 7997 while self._curr: 7998 self._advance() 7999 text = self._find_sql(start, self._prev) 8000 size = len(start.text) 8001 self._warn_unsupported() 8002 return exp.Command(this=text[:size], expression=text[size:]) 8003 8004 def _parse_dict_property(self, this: str) -> exp.DictProperty: 8005 settings = [] 8006 8007 self._match_l_paren() 8008 kind = self._parse_id_var() 8009 8010 if self._match(TokenType.L_PAREN): 8011 while True: 8012 key = self._parse_id_var() 8013 value = self._parse_primary() 8014 if not key and value is None: 8015 break 8016 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 8017 self._match(TokenType.R_PAREN) 8018 8019 self._match_r_paren() 8020 8021 return self.expression( 8022 exp.DictProperty, 8023 this=this, 8024 kind=kind.this if kind else None, 8025 settings=settings, 8026 ) 8027 8028 def _parse_dict_range(self, this: str) -> exp.DictRange: 8029 self._match_l_paren() 8030 has_min = self._match_text_seq("MIN") 8031 if has_min: 8032 min = self._parse_var() or self._parse_primary() 8033 self._match_text_seq("MAX") 8034 max = self._parse_var() or self._parse_primary() 8035 else: 8036 max = self._parse_var() or self._parse_primary() 8037 min = exp.Literal.number(0) 8038 self._match_r_paren() 8039 return self.expression(exp.DictRange, this=this, min=min, max=max) 8040 8041 def _parse_comprehension( 8042 self, this: t.Optional[exp.Expression] 8043 ) -> t.Optional[exp.Comprehension]: 8044 index = self._index 8045 expression = self._parse_column() 8046 if not self._match(TokenType.IN): 8047 self._retreat(index - 1) 8048 return None 8049 iterator = self._parse_column() 8050 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8051 return self.expression( 8052 exp.Comprehension, 8053 this=this, 8054 expression=expression, 8055 iterator=iterator, 8056 condition=condition, 8057 ) 8058 8059 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8060 if self._match(TokenType.HEREDOC_STRING): 8061 return self.expression(exp.Heredoc, this=self._prev.text) 8062 8063 if not self._match_text_seq("$"): 8064 return None 8065 8066 tags = ["$"] 8067 tag_text = None 8068 8069 if self._is_connected(): 8070 self._advance() 8071 tags.append(self._prev.text.upper()) 8072 else: 8073 self.raise_error("No closing $ found") 8074 8075 if tags[-1] != "$": 8076 if self._is_connected() and self._match_text_seq("$"): 8077 tag_text = tags[-1] 8078 tags.append("$") 8079 else: 8080 self.raise_error("No closing $ found") 8081 8082 heredoc_start = self._curr 8083 8084 while self._curr: 8085 if self._match_text_seq(*tags, advance=False): 8086 this = self._find_sql(heredoc_start, self._prev) 8087 self._advance(len(tags)) 8088 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8089 8090 self._advance() 8091 8092 self.raise_error(f"No closing {''.join(tags)} found") 8093 return None 8094 8095 def _find_parser( 8096 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8097 ) -> t.Optional[t.Callable]: 8098 if not self._curr: 8099 return None 8100 8101 index = self._index 8102 this = [] 8103 while True: 8104 # The current token might be multiple words 8105 curr = self._curr.text.upper() 8106 key = curr.split(" ") 8107 this.append(curr) 8108 8109 self._advance() 8110 result, trie = in_trie(trie, key) 8111 if result == TrieResult.FAILED: 8112 break 8113 8114 if result == TrieResult.EXISTS: 8115 subparser = parsers[" ".join(this)] 8116 return subparser 8117 8118 self._retreat(index) 8119 return None 8120 8121 def _match(self, token_type, advance=True, expression=None): 8122 if not self._curr: 8123 return None 8124 8125 if self._curr.token_type == token_type: 8126 if advance: 8127 self._advance() 8128 self._add_comments(expression) 8129 return True 8130 8131 return None 8132 8133 def _match_set(self, types, advance=True): 8134 if not self._curr: 8135 return None 8136 8137 if self._curr.token_type in types: 8138 if advance: 8139 self._advance() 8140 return True 8141 8142 return None 8143 8144 def _match_pair(self, token_type_a, token_type_b, advance=True): 8145 if not self._curr or not self._next: 8146 return None 8147 8148 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8149 if advance: 8150 self._advance(2) 8151 return True 8152 8153 return None 8154 8155 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8156 if not self._match(TokenType.L_PAREN, expression=expression): 8157 self.raise_error("Expecting (") 8158 8159 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8160 if not self._match(TokenType.R_PAREN, expression=expression): 8161 self.raise_error("Expecting )") 8162 8163 def _match_texts(self, texts, advance=True): 8164 if ( 8165 self._curr 8166 and self._curr.token_type != TokenType.STRING 8167 and self._curr.text.upper() in texts 8168 ): 8169 if advance: 8170 self._advance() 8171 return True 8172 return None 8173 8174 def _match_text_seq(self, *texts, advance=True): 8175 index = self._index 8176 for text in texts: 8177 if ( 8178 self._curr 8179 and self._curr.token_type != TokenType.STRING 8180 and self._curr.text.upper() == text 8181 ): 8182 self._advance() 8183 else: 8184 self._retreat(index) 8185 return None 8186 8187 if not advance: 8188 self._retreat(index) 8189 8190 return True 8191 8192 def _replace_lambda( 8193 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8194 ) -> t.Optional[exp.Expression]: 8195 if not node: 8196 return node 8197 8198 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8199 8200 for column in node.find_all(exp.Column): 8201 typ = lambda_types.get(column.parts[0].name) 8202 if typ is not None: 8203 dot_or_id = column.to_dot() if column.table else column.this 8204 8205 if typ: 8206 dot_or_id = self.expression( 8207 exp.Cast, 8208 this=dot_or_id, 8209 to=typ, 8210 ) 8211 8212 parent = column.parent 8213 8214 while isinstance(parent, exp.Dot): 8215 if not isinstance(parent.parent, exp.Dot): 8216 parent.replace(dot_or_id) 8217 break 8218 parent = parent.parent 8219 else: 8220 if column is node: 8221 node = dot_or_id 8222 else: 8223 column.replace(dot_or_id) 8224 return node 8225 8226 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8227 start = self._prev 8228 8229 # Not to be confused with TRUNCATE(number, decimals) function call 8230 if self._match(TokenType.L_PAREN): 8231 self._retreat(self._index - 2) 8232 return self._parse_function() 8233 8234 # Clickhouse supports TRUNCATE DATABASE as well 8235 is_database = self._match(TokenType.DATABASE) 8236 8237 self._match(TokenType.TABLE) 8238 8239 exists = self._parse_exists(not_=False) 8240 8241 expressions = self._parse_csv( 8242 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8243 ) 8244 8245 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8246 8247 if self._match_text_seq("RESTART", "IDENTITY"): 8248 identity = "RESTART" 8249 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8250 identity = "CONTINUE" 8251 else: 8252 identity = None 8253 8254 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8255 option = self._prev.text 8256 else: 8257 option = None 8258 8259 partition = self._parse_partition() 8260 8261 # Fallback case 8262 if self._curr: 8263 return self._parse_as_command(start) 8264 8265 return self.expression( 8266 exp.TruncateTable, 8267 expressions=expressions, 8268 is_database=is_database, 8269 exists=exists, 8270 cluster=cluster, 8271 identity=identity, 8272 option=option, 8273 partition=partition, 8274 ) 8275 8276 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8277 this = self._parse_ordered(self._parse_opclass) 8278 8279 if not self._match(TokenType.WITH): 8280 return this 8281 8282 op = self._parse_var(any_token=True) 8283 8284 return self.expression(exp.WithOperator, this=this, op=op) 8285 8286 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8287 self._match(TokenType.EQ) 8288 self._match(TokenType.L_PAREN) 8289 8290 opts: t.List[t.Optional[exp.Expression]] = [] 8291 option: exp.Expression | None 8292 while self._curr and not self._match(TokenType.R_PAREN): 8293 if self._match_text_seq("FORMAT_NAME", "="): 8294 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8295 option = self._parse_format_name() 8296 else: 8297 option = self._parse_property() 8298 8299 if option is None: 8300 self.raise_error("Unable to parse option") 8301 break 8302 8303 opts.append(option) 8304 8305 return opts 8306 8307 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8308 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8309 8310 options = [] 8311 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8312 option = self._parse_var(any_token=True) 8313 prev = self._prev.text.upper() 8314 8315 # Different dialects might separate options and values by white space, "=" and "AS" 8316 self._match(TokenType.EQ) 8317 self._match(TokenType.ALIAS) 8318 8319 param = self.expression(exp.CopyParameter, this=option) 8320 8321 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8322 TokenType.L_PAREN, advance=False 8323 ): 8324 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8325 param.set("expressions", self._parse_wrapped_options()) 8326 elif prev == "FILE_FORMAT": 8327 # T-SQL's external file format case 8328 param.set("expression", self._parse_field()) 8329 else: 8330 param.set("expression", self._parse_unquoted_field()) 8331 8332 options.append(param) 8333 self._match(sep) 8334 8335 return options 8336 8337 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8338 expr = self.expression(exp.Credentials) 8339 8340 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8341 expr.set("storage", self._parse_field()) 8342 if self._match_text_seq("CREDENTIALS"): 8343 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8344 creds = ( 8345 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8346 ) 8347 expr.set("credentials", creds) 8348 if self._match_text_seq("ENCRYPTION"): 8349 expr.set("encryption", self._parse_wrapped_options()) 8350 if self._match_text_seq("IAM_ROLE"): 8351 expr.set("iam_role", self._parse_field()) 8352 if self._match_text_seq("REGION"): 8353 expr.set("region", self._parse_field()) 8354 8355 return expr 8356 8357 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8358 return self._parse_field() 8359 8360 def _parse_copy(self) -> exp.Copy | exp.Command: 8361 start = self._prev 8362 8363 self._match(TokenType.INTO) 8364 8365 this = ( 8366 self._parse_select(nested=True, parse_subquery_alias=False) 8367 if self._match(TokenType.L_PAREN, advance=False) 8368 else self._parse_table(schema=True) 8369 ) 8370 8371 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8372 8373 files = self._parse_csv(self._parse_file_location) 8374 credentials = self._parse_credentials() 8375 8376 self._match_text_seq("WITH") 8377 8378 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8379 8380 # Fallback case 8381 if self._curr: 8382 return self._parse_as_command(start) 8383 8384 return self.expression( 8385 exp.Copy, 8386 this=this, 8387 kind=kind, 8388 credentials=credentials, 8389 files=files, 8390 params=params, 8391 ) 8392 8393 def _parse_normalize(self) -> exp.Normalize: 8394 return self.expression( 8395 exp.Normalize, 8396 this=self._parse_bitwise(), 8397 form=self._match(TokenType.COMMA) and self._parse_var(), 8398 ) 8399 8400 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8401 args = self._parse_csv(lambda: self._parse_lambda()) 8402 8403 this = seq_get(args, 0) 8404 decimals = seq_get(args, 1) 8405 8406 return expr_type( 8407 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8408 ) 8409 8410 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8411 star_token = self._prev 8412 8413 if self._match_text_seq("COLUMNS", "(", advance=False): 8414 this = self._parse_function() 8415 if isinstance(this, exp.Columns): 8416 this.set("unpack", True) 8417 return this 8418 8419 return self.expression( 8420 exp.Star, 8421 **{ # type: ignore 8422 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8423 "replace": self._parse_star_op("REPLACE"), 8424 "rename": self._parse_star_op("RENAME"), 8425 }, 8426 ).update_positions(star_token) 8427 8428 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8429 privilege_parts = [] 8430 8431 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8432 # (end of privilege list) or L_PAREN (start of column list) are met 8433 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8434 privilege_parts.append(self._curr.text.upper()) 8435 self._advance() 8436 8437 this = exp.var(" ".join(privilege_parts)) 8438 expressions = ( 8439 self._parse_wrapped_csv(self._parse_column) 8440 if self._match(TokenType.L_PAREN, advance=False) 8441 else None 8442 ) 8443 8444 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8445 8446 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8447 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8448 principal = self._parse_id_var() 8449 8450 if not principal: 8451 return None 8452 8453 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8454 8455 def _parse_grant_revoke_common( 8456 self, 8457 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8458 privileges = self._parse_csv(self._parse_grant_privilege) 8459 8460 self._match(TokenType.ON) 8461 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8462 8463 # Attempt to parse the securable e.g. MySQL allows names 8464 # such as "foo.*", "*.*" which are not easily parseable yet 8465 securable = self._try_parse(self._parse_table_parts) 8466 8467 return privileges, kind, securable 8468 8469 def _parse_grant(self) -> exp.Grant | exp.Command: 8470 start = self._prev 8471 8472 privileges, kind, securable = self._parse_grant_revoke_common() 8473 8474 if not securable or not self._match_text_seq("TO"): 8475 return self._parse_as_command(start) 8476 8477 principals = self._parse_csv(self._parse_grant_principal) 8478 8479 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8480 8481 if self._curr: 8482 return self._parse_as_command(start) 8483 8484 return self.expression( 8485 exp.Grant, 8486 privileges=privileges, 8487 kind=kind, 8488 securable=securable, 8489 principals=principals, 8490 grant_option=grant_option, 8491 ) 8492 8493 def _parse_revoke(self) -> exp.Revoke | exp.Command: 8494 start = self._prev 8495 8496 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 8497 8498 privileges, kind, securable = self._parse_grant_revoke_common() 8499 8500 if not securable or not self._match_text_seq("FROM"): 8501 return self._parse_as_command(start) 8502 8503 principals = self._parse_csv(self._parse_grant_principal) 8504 8505 cascade = None 8506 if self._match_texts(("CASCADE", "RESTRICT")): 8507 cascade = self._prev.text.upper() 8508 8509 if self._curr: 8510 return self._parse_as_command(start) 8511 8512 return self.expression( 8513 exp.Revoke, 8514 privileges=privileges, 8515 kind=kind, 8516 securable=securable, 8517 principals=principals, 8518 grant_option=grant_option, 8519 cascade=cascade, 8520 ) 8521 8522 def _parse_overlay(self) -> exp.Overlay: 8523 return self.expression( 8524 exp.Overlay, 8525 **{ # type: ignore 8526 "this": self._parse_bitwise(), 8527 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8528 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8529 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8530 }, 8531 ) 8532 8533 def _parse_format_name(self) -> exp.Property: 8534 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8535 # for FILE_FORMAT = <format_name> 8536 return self.expression( 8537 exp.Property, 8538 this=exp.var("FORMAT_NAME"), 8539 value=self._parse_string() or self._parse_table_parts(), 8540 ) 8541 8542 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8543 args: t.List[exp.Expression] = [] 8544 8545 if self._match(TokenType.DISTINCT): 8546 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8547 self._match(TokenType.COMMA) 8548 8549 args.extend(self._parse_csv(self._parse_assignment)) 8550 8551 return self.expression( 8552 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8553 ) 8554 8555 def _identifier_expression( 8556 self, token: t.Optional[Token] = None, **kwargs: t.Any 8557 ) -> exp.Identifier: 8558 token = token or self._prev 8559 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8560 expression.update_positions(token) 8561 return expression 8562 8563 def _build_pipe_cte( 8564 self, 8565 query: exp.Query, 8566 expressions: t.List[exp.Expression], 8567 alias_cte: t.Optional[exp.TableAlias] = None, 8568 ) -> exp.Select: 8569 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8570 if alias_cte: 8571 new_cte = alias_cte 8572 else: 8573 self._pipe_cte_counter += 1 8574 new_cte = f"__tmp{self._pipe_cte_counter}" 8575 8576 with_ = query.args.get("with") 8577 ctes = with_.pop() if with_ else None 8578 8579 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8580 if ctes: 8581 new_select.set("with", ctes) 8582 8583 return new_select.with_(new_cte, as_=query, copy=False) 8584 8585 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8586 select = self._parse_select(consume_pipe=False) 8587 if not select: 8588 return query 8589 8590 return self._build_pipe_cte( 8591 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8592 ) 8593 8594 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8595 limit = self._parse_limit() 8596 offset = self._parse_offset() 8597 if limit: 8598 curr_limit = query.args.get("limit", limit) 8599 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8600 query.limit(limit, copy=False) 8601 if offset: 8602 curr_offset = query.args.get("offset") 8603 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8604 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8605 8606 return query 8607 8608 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8609 this = self._parse_assignment() 8610 if self._match_text_seq("GROUP", "AND", advance=False): 8611 return this 8612 8613 this = self._parse_alias(this) 8614 8615 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8616 return self._parse_ordered(lambda: this) 8617 8618 return this 8619 8620 def _parse_pipe_syntax_aggregate_group_order_by( 8621 self, query: exp.Select, group_by_exists: bool = True 8622 ) -> exp.Select: 8623 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8624 aggregates_or_groups, orders = [], [] 8625 for element in expr: 8626 if isinstance(element, exp.Ordered): 8627 this = element.this 8628 if isinstance(this, exp.Alias): 8629 element.set("this", this.args["alias"]) 8630 orders.append(element) 8631 else: 8632 this = element 8633 aggregates_or_groups.append(this) 8634 8635 if group_by_exists: 8636 query.select(*aggregates_or_groups, copy=False).group_by( 8637 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8638 copy=False, 8639 ) 8640 else: 8641 query.select(*aggregates_or_groups, append=False, copy=False) 8642 8643 if orders: 8644 return query.order_by(*orders, append=False, copy=False) 8645 8646 return query 8647 8648 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8649 self._match_text_seq("AGGREGATE") 8650 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8651 8652 if self._match(TokenType.GROUP_BY) or ( 8653 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8654 ): 8655 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8656 8657 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8658 8659 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8660 first_setop = self.parse_set_operation(this=query) 8661 if not first_setop: 8662 return None 8663 8664 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8665 expr = self._parse_paren() 8666 return expr.assert_is(exp.Subquery).unnest() if expr else None 8667 8668 first_setop.this.pop() 8669 8670 setops = [ 8671 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8672 *self._parse_csv(_parse_and_unwrap_query), 8673 ] 8674 8675 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8676 with_ = query.args.get("with") 8677 ctes = with_.pop() if with_ else None 8678 8679 if isinstance(first_setop, exp.Union): 8680 query = query.union(*setops, copy=False, **first_setop.args) 8681 elif isinstance(first_setop, exp.Except): 8682 query = query.except_(*setops, copy=False, **first_setop.args) 8683 else: 8684 query = query.intersect(*setops, copy=False, **first_setop.args) 8685 8686 query.set("with", ctes) 8687 8688 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8689 8690 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8691 join = self._parse_join() 8692 if not join: 8693 return None 8694 8695 if isinstance(query, exp.Select): 8696 return query.join(join, copy=False) 8697 8698 return query 8699 8700 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8701 pivots = self._parse_pivots() 8702 if not pivots: 8703 return query 8704 8705 from_ = query.args.get("from") 8706 if from_: 8707 from_.this.set("pivots", pivots) 8708 8709 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8710 8711 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8712 self._match_text_seq("EXTEND") 8713 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8714 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8715 8716 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8717 sample = self._parse_table_sample() 8718 8719 with_ = query.args.get("with") 8720 if with_: 8721 with_.expressions[-1].this.set("sample", sample) 8722 else: 8723 query.set("sample", sample) 8724 8725 return query 8726 8727 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8728 if isinstance(query, exp.Subquery): 8729 query = exp.select("*").from_(query, copy=False) 8730 8731 if not query.args.get("from"): 8732 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8733 8734 while self._match(TokenType.PIPE_GT): 8735 start = self._curr 8736 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8737 if not parser: 8738 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8739 # keywords, making it tricky to disambiguate them without lookahead. The approach 8740 # here is to try and parse a set operation and if that fails, then try to parse a 8741 # join operator. If that fails as well, then the operator is not supported. 8742 parsed_query = self._parse_pipe_syntax_set_operator(query) 8743 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8744 if not parsed_query: 8745 self._retreat(start) 8746 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8747 break 8748 query = parsed_query 8749 else: 8750 query = parser(self, query) 8751 8752 return query 8753 8754 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8755 vars = self._parse_csv(self._parse_id_var) 8756 if not vars: 8757 return None 8758 8759 return self.expression( 8760 exp.DeclareItem, 8761 this=vars, 8762 kind=self._parse_types(), 8763 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8764 ) 8765 8766 def _parse_declare(self) -> exp.Declare | exp.Command: 8767 start = self._prev 8768 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8769 8770 if not expressions or self._curr: 8771 return self._parse_as_command(start) 8772 8773 return self.expression(exp.Declare, expressions=expressions) 8774 8775 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8776 exp_class = exp.Cast if strict else exp.TryCast 8777 8778 if exp_class == exp.TryCast: 8779 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8780 8781 return self.expression(exp_class, **kwargs) 8782 8783 def _parse_json_value(self) -> exp.JSONValue: 8784 this = self._parse_bitwise() 8785 self._match(TokenType.COMMA) 8786 path = self._parse_bitwise() 8787 8788 returning = self._match(TokenType.RETURNING) and self._parse_type() 8789 8790 return self.expression( 8791 exp.JSONValue, 8792 this=this, 8793 path=self.dialect.to_json_path(path), 8794 returning=returning, 8795 on_condition=self._parse_on_condition(), 8796 ) 8797 8798 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 8799 def concat_exprs( 8800 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 8801 ) -> exp.Expression: 8802 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 8803 concat_exprs = [ 8804 self.expression(exp.Concat, expressions=node.expressions, safe=True) 8805 ] 8806 node.set("expressions", concat_exprs) 8807 return node 8808 if len(exprs) == 1: 8809 return exprs[0] 8810 return self.expression(exp.Concat, expressions=args, safe=True) 8811 8812 args = self._parse_csv(self._parse_lambda) 8813 8814 if args: 8815 order = args[-1] if isinstance(args[-1], exp.Order) else None 8816 8817 if order: 8818 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 8819 # remove 'expr' from exp.Order and add it back to args 8820 args[-1] = order.this 8821 order.set("this", concat_exprs(order.this, args)) 8822 8823 this = order or concat_exprs(args[0], args) 8824 else: 8825 this = None 8826 8827 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 8828 8829 return self.expression(exp.GroupConcat, this=this, separator=separator)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1579 def __init__( 1580 self, 1581 error_level: t.Optional[ErrorLevel] = None, 1582 error_message_context: int = 100, 1583 max_errors: int = 3, 1584 dialect: DialectType = None, 1585 ): 1586 from sqlglot.dialects import Dialect 1587 1588 self.error_level = error_level or ErrorLevel.IMMEDIATE 1589 self.error_message_context = error_message_context 1590 self.max_errors = max_errors 1591 self.dialect = Dialect.get_or_raise(dialect) 1592 self.reset()
1605 def parse( 1606 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1607 ) -> t.List[t.Optional[exp.Expression]]: 1608 """ 1609 Parses a list of tokens and returns a list of syntax trees, one tree 1610 per parsed SQL statement. 1611 1612 Args: 1613 raw_tokens: The list of tokens. 1614 sql: The original SQL string, used to produce helpful debug messages. 1615 1616 Returns: 1617 The list of the produced syntax trees. 1618 """ 1619 return self._parse( 1620 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1621 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1623 def parse_into( 1624 self, 1625 expression_types: exp.IntoType, 1626 raw_tokens: t.List[Token], 1627 sql: t.Optional[str] = None, 1628 ) -> t.List[t.Optional[exp.Expression]]: 1629 """ 1630 Parses a list of tokens into a given Expression type. If a collection of Expression 1631 types is given instead, this method will try to parse the token list into each one 1632 of them, stopping at the first for which the parsing succeeds. 1633 1634 Args: 1635 expression_types: The expression type(s) to try and parse the token list into. 1636 raw_tokens: The list of tokens. 1637 sql: The original SQL string, used to produce helpful debug messages. 1638 1639 Returns: 1640 The target Expression. 1641 """ 1642 errors = [] 1643 for expression_type in ensure_list(expression_types): 1644 parser = self.EXPRESSION_PARSERS.get(expression_type) 1645 if not parser: 1646 raise TypeError(f"No parser registered for {expression_type}") 1647 1648 try: 1649 return self._parse(parser, raw_tokens, sql) 1650 except ParseError as e: 1651 e.errors[0]["into_expression"] = expression_type 1652 errors.append(e) 1653 1654 raise ParseError( 1655 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1656 errors=merge_errors(errors), 1657 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1697 def check_errors(self) -> None: 1698 """Logs or raises any found errors, depending on the chosen error level setting.""" 1699 if self.error_level == ErrorLevel.WARN: 1700 for error in self.errors: 1701 logger.error(str(error)) 1702 elif self.error_level == ErrorLevel.RAISE and self.errors: 1703 raise ParseError( 1704 concat_messages(self.errors, self.max_errors), 1705 errors=merge_errors(self.errors), 1706 )
Logs or raises any found errors, depending on the chosen error level setting.
1708 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1709 """ 1710 Appends an error in the list of recorded errors or raises it, depending on the chosen 1711 error level setting. 1712 """ 1713 token = token or self._curr or self._prev or Token.string("") 1714 start = token.start 1715 end = token.end + 1 1716 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1717 highlight = self.sql[start:end] 1718 end_context = self.sql[end : end + self.error_message_context] 1719 1720 error = ParseError.new( 1721 f"{message}. Line {token.line}, Col: {token.col}.\n" 1722 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1723 description=message, 1724 line=token.line, 1725 col=token.col, 1726 start_context=start_context, 1727 highlight=highlight, 1728 end_context=end_context, 1729 ) 1730 1731 if self.error_level == ErrorLevel.IMMEDIATE: 1732 raise error 1733 1734 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1736 def expression( 1737 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1738 ) -> E: 1739 """ 1740 Creates a new, validated Expression. 1741 1742 Args: 1743 exp_class: The expression class to instantiate. 1744 comments: An optional list of comments to attach to the expression. 1745 kwargs: The arguments to set for the expression along with their respective values. 1746 1747 Returns: 1748 The target expression. 1749 """ 1750 instance = exp_class(**kwargs) 1751 instance.add_comments(comments) if comments else self._add_comments(instance) 1752 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1759 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1760 """ 1761 Validates an Expression, making sure that all its mandatory arguments are set. 1762 1763 Args: 1764 expression: The expression to validate. 1765 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1766 1767 Returns: 1768 The validated expression. 1769 """ 1770 if self.error_level != ErrorLevel.IGNORE: 1771 for error_message in expression.error_messages(args): 1772 self.raise_error(error_message) 1773 1774 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4844 def parse_set_operation( 4845 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4846 ) -> t.Optional[exp.Expression]: 4847 start = self._index 4848 _, side_token, kind_token = self._parse_join_parts() 4849 4850 side = side_token.text if side_token else None 4851 kind = kind_token.text if kind_token else None 4852 4853 if not self._match_set(self.SET_OPERATIONS): 4854 self._retreat(start) 4855 return None 4856 4857 token_type = self._prev.token_type 4858 4859 if token_type == TokenType.UNION: 4860 operation: t.Type[exp.SetOperation] = exp.Union 4861 elif token_type == TokenType.EXCEPT: 4862 operation = exp.Except 4863 else: 4864 operation = exp.Intersect 4865 4866 comments = self._prev.comments 4867 4868 if self._match(TokenType.DISTINCT): 4869 distinct: t.Optional[bool] = True 4870 elif self._match(TokenType.ALL): 4871 distinct = False 4872 else: 4873 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4874 if distinct is None: 4875 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4876 4877 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4878 "STRICT", "CORRESPONDING" 4879 ) 4880 if self._match_text_seq("CORRESPONDING"): 4881 by_name = True 4882 if not side and not kind: 4883 kind = "INNER" 4884 4885 on_column_list = None 4886 if by_name and self._match_texts(("ON", "BY")): 4887 on_column_list = self._parse_wrapped_csv(self._parse_column) 4888 4889 expression = self._parse_select( 4890 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4891 ) 4892 4893 return self.expression( 4894 operation, 4895 comments=comments, 4896 this=this, 4897 distinct=distinct, 4898 by_name=by_name, 4899 expression=expression, 4900 side=side, 4901 kind=kind, 4902 on=on_column_list, 4903 )