sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.MEDIUMINT, 140 TokenType.UMEDIUMINT, 141 TokenType.FIXEDSTRING, 142 TokenType.FLOAT, 143 TokenType.DOUBLE, 144 TokenType.CHAR, 145 TokenType.NCHAR, 146 TokenType.VARCHAR, 147 TokenType.NVARCHAR, 148 TokenType.TEXT, 149 TokenType.MEDIUMTEXT, 150 TokenType.LONGTEXT, 151 TokenType.MEDIUMBLOB, 152 TokenType.LONGBLOB, 153 TokenType.BINARY, 154 TokenType.VARBINARY, 155 TokenType.JSON, 156 TokenType.JSONB, 157 TokenType.INTERVAL, 158 TokenType.TINYBLOB, 159 TokenType.TINYTEXT, 160 TokenType.TIME, 161 TokenType.TIMETZ, 162 TokenType.TIMESTAMP, 163 TokenType.TIMESTAMP_S, 164 TokenType.TIMESTAMP_MS, 165 TokenType.TIMESTAMP_NS, 166 TokenType.TIMESTAMPTZ, 167 TokenType.TIMESTAMPLTZ, 168 TokenType.DATETIME, 169 TokenType.DATETIME64, 170 TokenType.DATE, 171 TokenType.INT4RANGE, 172 TokenType.INT4MULTIRANGE, 173 TokenType.INT8RANGE, 174 TokenType.INT8MULTIRANGE, 175 TokenType.NUMRANGE, 176 TokenType.NUMMULTIRANGE, 177 TokenType.TSRANGE, 178 TokenType.TSMULTIRANGE, 179 TokenType.TSTZRANGE, 180 TokenType.TSTZMULTIRANGE, 181 TokenType.DATERANGE, 182 TokenType.DATEMULTIRANGE, 183 TokenType.DECIMAL, 184 TokenType.UDECIMAL, 185 TokenType.BIGDECIMAL, 186 TokenType.UUID, 187 TokenType.GEOGRAPHY, 188 TokenType.GEOMETRY, 189 TokenType.HLLSKETCH, 190 TokenType.HSTORE, 191 TokenType.PSEUDO_TYPE, 192 TokenType.SUPER, 193 TokenType.SERIAL, 194 TokenType.SMALLSERIAL, 195 TokenType.BIGSERIAL, 196 TokenType.XML, 197 TokenType.YEAR, 198 TokenType.UNIQUEIDENTIFIER, 199 TokenType.USERDEFINED, 200 TokenType.MONEY, 201 TokenType.SMALLMONEY, 202 TokenType.ROWVERSION, 203 TokenType.IMAGE, 204 TokenType.VARIANT, 205 TokenType.OBJECT, 206 TokenType.OBJECT_IDENTIFIER, 207 TokenType.INET, 208 TokenType.IPADDRESS, 209 TokenType.IPPREFIX, 210 TokenType.UNKNOWN, 211 TokenType.NULL, 212 *ENUM_TYPE_TOKENS, 213 *NESTED_TYPE_TOKENS, 214 } 215 216 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 217 TokenType.BIGINT: TokenType.UBIGINT, 218 TokenType.INT: TokenType.UINT, 219 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 220 TokenType.SMALLINT: TokenType.USMALLINT, 221 TokenType.TINYINT: TokenType.UTINYINT, 222 TokenType.DECIMAL: TokenType.UDECIMAL, 223 } 224 225 SUBQUERY_PREDICATES = { 226 TokenType.ANY: exp.Any, 227 TokenType.ALL: exp.All, 228 TokenType.EXISTS: exp.Exists, 229 TokenType.SOME: exp.Any, 230 } 231 232 RESERVED_KEYWORDS = { 233 *Tokenizer.SINGLE_TOKENS.values(), 234 TokenType.SELECT, 235 } 236 237 DB_CREATABLES = { 238 TokenType.DATABASE, 239 TokenType.SCHEMA, 240 TokenType.TABLE, 241 TokenType.VIEW, 242 TokenType.MODEL, 243 TokenType.DICTIONARY, 244 } 245 246 CREATABLES = { 247 TokenType.COLUMN, 248 TokenType.FUNCTION, 249 TokenType.INDEX, 250 TokenType.PROCEDURE, 251 *DB_CREATABLES, 252 } 253 254 # Tokens that can represent identifiers 255 ID_VAR_TOKENS = { 256 TokenType.VAR, 257 TokenType.ANTI, 258 TokenType.APPLY, 259 TokenType.ASC, 260 TokenType.AUTO_INCREMENT, 261 TokenType.BEGIN, 262 TokenType.CACHE, 263 TokenType.CASE, 264 TokenType.COLLATE, 265 TokenType.COMMAND, 266 TokenType.COMMENT, 267 TokenType.COMMIT, 268 TokenType.CONSTRAINT, 269 TokenType.DEFAULT, 270 TokenType.DELETE, 271 TokenType.DESC, 272 TokenType.DESCRIBE, 273 TokenType.DICTIONARY, 274 TokenType.DIV, 275 TokenType.END, 276 TokenType.EXECUTE, 277 TokenType.ESCAPE, 278 TokenType.FALSE, 279 TokenType.FIRST, 280 TokenType.FILTER, 281 TokenType.FORMAT, 282 TokenType.FULL, 283 TokenType.IS, 284 TokenType.ISNULL, 285 TokenType.INTERVAL, 286 TokenType.KEEP, 287 TokenType.KILL, 288 TokenType.LEFT, 289 TokenType.LOAD, 290 TokenType.MERGE, 291 TokenType.NATURAL, 292 TokenType.NEXT, 293 TokenType.OFFSET, 294 TokenType.ORDINALITY, 295 TokenType.OVERLAPS, 296 TokenType.OVERWRITE, 297 TokenType.PARTITION, 298 TokenType.PERCENT, 299 TokenType.PIVOT, 300 TokenType.PRAGMA, 301 TokenType.RANGE, 302 TokenType.REFERENCES, 303 TokenType.RIGHT, 304 TokenType.ROW, 305 TokenType.ROWS, 306 TokenType.SEMI, 307 TokenType.SET, 308 TokenType.SETTINGS, 309 TokenType.SHOW, 310 TokenType.TEMPORARY, 311 TokenType.TOP, 312 TokenType.TRUE, 313 TokenType.UNIQUE, 314 TokenType.UNPIVOT, 315 TokenType.UPDATE, 316 TokenType.VOLATILE, 317 TokenType.WINDOW, 318 *CREATABLES, 319 *SUBQUERY_PREDICATES, 320 *TYPE_TOKENS, 321 *NO_PAREN_FUNCTIONS, 322 } 323 324 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 325 326 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 327 TokenType.ANTI, 328 TokenType.APPLY, 329 TokenType.ASOF, 330 TokenType.FULL, 331 TokenType.LEFT, 332 TokenType.LOCK, 333 TokenType.NATURAL, 334 TokenType.OFFSET, 335 TokenType.RIGHT, 336 TokenType.SEMI, 337 TokenType.WINDOW, 338 } 339 340 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 341 342 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 343 344 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 345 346 FUNC_TOKENS = { 347 TokenType.COLLATE, 348 TokenType.COMMAND, 349 TokenType.CURRENT_DATE, 350 TokenType.CURRENT_DATETIME, 351 TokenType.CURRENT_TIMESTAMP, 352 TokenType.CURRENT_TIME, 353 TokenType.CURRENT_USER, 354 TokenType.FILTER, 355 TokenType.FIRST, 356 TokenType.FORMAT, 357 TokenType.GLOB, 358 TokenType.IDENTIFIER, 359 TokenType.INDEX, 360 TokenType.ISNULL, 361 TokenType.ILIKE, 362 TokenType.INSERT, 363 TokenType.LIKE, 364 TokenType.MERGE, 365 TokenType.OFFSET, 366 TokenType.PRIMARY_KEY, 367 TokenType.RANGE, 368 TokenType.REPLACE, 369 TokenType.RLIKE, 370 TokenType.ROW, 371 TokenType.UNNEST, 372 TokenType.VAR, 373 TokenType.LEFT, 374 TokenType.RIGHT, 375 TokenType.DATE, 376 TokenType.DATETIME, 377 TokenType.TABLE, 378 TokenType.TIMESTAMP, 379 TokenType.TIMESTAMPTZ, 380 TokenType.WINDOW, 381 TokenType.XOR, 382 *TYPE_TOKENS, 383 *SUBQUERY_PREDICATES, 384 } 385 386 CONJUNCTION = { 387 TokenType.AND: exp.And, 388 TokenType.OR: exp.Or, 389 } 390 391 EQUALITY = { 392 TokenType.EQ: exp.EQ, 393 TokenType.NEQ: exp.NEQ, 394 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 395 } 396 397 COMPARISON = { 398 TokenType.GT: exp.GT, 399 TokenType.GTE: exp.GTE, 400 TokenType.LT: exp.LT, 401 TokenType.LTE: exp.LTE, 402 } 403 404 BITWISE = { 405 TokenType.AMP: exp.BitwiseAnd, 406 TokenType.CARET: exp.BitwiseXor, 407 TokenType.PIPE: exp.BitwiseOr, 408 TokenType.DPIPE: exp.DPipe, 409 } 410 411 TERM = { 412 TokenType.DASH: exp.Sub, 413 TokenType.PLUS: exp.Add, 414 TokenType.MOD: exp.Mod, 415 TokenType.COLLATE: exp.Collate, 416 } 417 418 FACTOR = { 419 TokenType.DIV: exp.IntDiv, 420 TokenType.LR_ARROW: exp.Distance, 421 TokenType.SLASH: exp.Div, 422 TokenType.STAR: exp.Mul, 423 } 424 425 TIMES = { 426 TokenType.TIME, 427 TokenType.TIMETZ, 428 } 429 430 TIMESTAMPS = { 431 TokenType.TIMESTAMP, 432 TokenType.TIMESTAMPTZ, 433 TokenType.TIMESTAMPLTZ, 434 *TIMES, 435 } 436 437 SET_OPERATIONS = { 438 TokenType.UNION, 439 TokenType.INTERSECT, 440 TokenType.EXCEPT, 441 } 442 443 JOIN_METHODS = { 444 TokenType.NATURAL, 445 TokenType.ASOF, 446 } 447 448 JOIN_SIDES = { 449 TokenType.LEFT, 450 TokenType.RIGHT, 451 TokenType.FULL, 452 } 453 454 JOIN_KINDS = { 455 TokenType.INNER, 456 TokenType.OUTER, 457 TokenType.CROSS, 458 TokenType.SEMI, 459 TokenType.ANTI, 460 } 461 462 JOIN_HINTS: t.Set[str] = set() 463 464 LAMBDAS = { 465 TokenType.ARROW: lambda self, expressions: self.expression( 466 exp.Lambda, 467 this=self._replace_lambda( 468 self._parse_conjunction(), 469 {node.name for node in expressions}, 470 ), 471 expressions=expressions, 472 ), 473 TokenType.FARROW: lambda self, expressions: self.expression( 474 exp.Kwarg, 475 this=exp.var(expressions[0].name), 476 expression=self._parse_conjunction(), 477 ), 478 } 479 480 COLUMN_OPERATORS = { 481 TokenType.DOT: None, 482 TokenType.DCOLON: lambda self, this, to: self.expression( 483 exp.Cast if self.STRICT_CAST else exp.TryCast, 484 this=this, 485 to=to, 486 ), 487 TokenType.ARROW: lambda self, this, path: self.expression( 488 exp.JSONExtract, 489 this=this, 490 expression=path, 491 ), 492 TokenType.DARROW: lambda self, this, path: self.expression( 493 exp.JSONExtractScalar, 494 this=this, 495 expression=path, 496 ), 497 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 498 exp.JSONBExtract, 499 this=this, 500 expression=path, 501 ), 502 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 503 exp.JSONBExtractScalar, 504 this=this, 505 expression=path, 506 ), 507 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 508 exp.JSONBContains, 509 this=this, 510 expression=key, 511 ), 512 } 513 514 EXPRESSION_PARSERS = { 515 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 516 exp.Column: lambda self: self._parse_column(), 517 exp.Condition: lambda self: self._parse_conjunction(), 518 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 519 exp.Expression: lambda self: self._parse_statement(), 520 exp.From: lambda self: self._parse_from(), 521 exp.Group: lambda self: self._parse_group(), 522 exp.Having: lambda self: self._parse_having(), 523 exp.Identifier: lambda self: self._parse_id_var(), 524 exp.Join: lambda self: self._parse_join(), 525 exp.Lambda: lambda self: self._parse_lambda(), 526 exp.Lateral: lambda self: self._parse_lateral(), 527 exp.Limit: lambda self: self._parse_limit(), 528 exp.Offset: lambda self: self._parse_offset(), 529 exp.Order: lambda self: self._parse_order(), 530 exp.Ordered: lambda self: self._parse_ordered(), 531 exp.Properties: lambda self: self._parse_properties(), 532 exp.Qualify: lambda self: self._parse_qualify(), 533 exp.Returning: lambda self: self._parse_returning(), 534 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 535 exp.Table: lambda self: self._parse_table_parts(), 536 exp.TableAlias: lambda self: self._parse_table_alias(), 537 exp.Where: lambda self: self._parse_where(), 538 exp.Window: lambda self: self._parse_named_window(), 539 exp.With: lambda self: self._parse_with(), 540 "JOIN_TYPE": lambda self: self._parse_join_parts(), 541 } 542 543 STATEMENT_PARSERS = { 544 TokenType.ALTER: lambda self: self._parse_alter(), 545 TokenType.BEGIN: lambda self: self._parse_transaction(), 546 TokenType.CACHE: lambda self: self._parse_cache(), 547 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 548 TokenType.COMMENT: lambda self: self._parse_comment(), 549 TokenType.CREATE: lambda self: self._parse_create(), 550 TokenType.DELETE: lambda self: self._parse_delete(), 551 TokenType.DESC: lambda self: self._parse_describe(), 552 TokenType.DESCRIBE: lambda self: self._parse_describe(), 553 TokenType.DROP: lambda self: self._parse_drop(), 554 TokenType.INSERT: lambda self: self._parse_insert(), 555 TokenType.KILL: lambda self: self._parse_kill(), 556 TokenType.LOAD: lambda self: self._parse_load(), 557 TokenType.MERGE: lambda self: self._parse_merge(), 558 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 559 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 560 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 561 TokenType.SET: lambda self: self._parse_set(), 562 TokenType.UNCACHE: lambda self: self._parse_uncache(), 563 TokenType.UPDATE: lambda self: self._parse_update(), 564 TokenType.USE: lambda self: self.expression( 565 exp.Use, 566 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 567 and exp.var(self._prev.text), 568 this=self._parse_table(schema=False), 569 ), 570 } 571 572 UNARY_PARSERS = { 573 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 574 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 575 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 576 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 577 } 578 579 PRIMARY_PARSERS = { 580 TokenType.STRING: lambda self, token: self.expression( 581 exp.Literal, this=token.text, is_string=True 582 ), 583 TokenType.NUMBER: lambda self, token: self.expression( 584 exp.Literal, this=token.text, is_string=False 585 ), 586 TokenType.STAR: lambda self, _: self.expression( 587 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 588 ), 589 TokenType.NULL: lambda self, _: self.expression(exp.Null), 590 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 591 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 592 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 593 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 594 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 595 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 596 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 597 exp.National, this=token.text 598 ), 599 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 600 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 601 exp.RawString, this=token.text 602 ), 603 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 604 } 605 606 PLACEHOLDER_PARSERS = { 607 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 608 TokenType.PARAMETER: lambda self: self._parse_parameter(), 609 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 610 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 611 else None, 612 } 613 614 RANGE_PARSERS = { 615 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 616 TokenType.GLOB: binary_range_parser(exp.Glob), 617 TokenType.ILIKE: binary_range_parser(exp.ILike), 618 TokenType.IN: lambda self, this: self._parse_in(this), 619 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 620 TokenType.IS: lambda self, this: self._parse_is(this), 621 TokenType.LIKE: binary_range_parser(exp.Like), 622 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 623 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 624 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 625 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 626 } 627 628 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 629 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 630 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 631 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 632 "CHARACTER SET": lambda self: self._parse_character_set(), 633 "CHECKSUM": lambda self: self._parse_checksum(), 634 "CLUSTER BY": lambda self: self._parse_cluster(), 635 "CLUSTERED": lambda self: self._parse_clustered_by(), 636 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 637 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 638 "COPY": lambda self: self._parse_copy_property(), 639 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 640 "DEFINER": lambda self: self._parse_definer(), 641 "DETERMINISTIC": lambda self: self.expression( 642 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 643 ), 644 "DISTKEY": lambda self: self._parse_distkey(), 645 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 646 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 647 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 648 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 649 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 650 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 651 "FREESPACE": lambda self: self._parse_freespace(), 652 "HEAP": lambda self: self.expression(exp.HeapProperty), 653 "IMMUTABLE": lambda self: self.expression( 654 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 655 ), 656 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 657 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 658 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 659 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 660 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 661 "LIKE": lambda self: self._parse_create_like(), 662 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 663 "LOCK": lambda self: self._parse_locking(), 664 "LOCKING": lambda self: self._parse_locking(), 665 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 666 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 667 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 668 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 669 "NO": lambda self: self._parse_no_property(), 670 "ON": lambda self: self._parse_on_property(), 671 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 672 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 673 "PARTITION BY": lambda self: self._parse_partitioned_by(), 674 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 675 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 676 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 677 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 678 "REMOTE": lambda self: self._parse_remote_with_connection(), 679 "RETURNS": lambda self: self._parse_returns(), 680 "ROW": lambda self: self._parse_row(), 681 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 682 "SAMPLE": lambda self: self.expression( 683 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 684 ), 685 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 686 "SETTINGS": lambda self: self.expression( 687 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 688 ), 689 "SORTKEY": lambda self: self._parse_sortkey(), 690 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 691 "STABLE": lambda self: self.expression( 692 exp.StabilityProperty, this=exp.Literal.string("STABLE") 693 ), 694 "STORED": lambda self: self._parse_stored(), 695 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 696 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 697 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 698 "TO": lambda self: self._parse_to_table(), 699 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 700 "TRANSFORM": lambda self: self.expression( 701 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 702 ), 703 "TTL": lambda self: self._parse_ttl(), 704 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 705 "VOLATILE": lambda self: self._parse_volatile_property(), 706 "WITH": lambda self: self._parse_with_property(), 707 } 708 709 CONSTRAINT_PARSERS = { 710 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 711 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 712 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 713 "CHARACTER SET": lambda self: self.expression( 714 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 715 ), 716 "CHECK": lambda self: self.expression( 717 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 718 ), 719 "COLLATE": lambda self: self.expression( 720 exp.CollateColumnConstraint, this=self._parse_var() 721 ), 722 "COMMENT": lambda self: self.expression( 723 exp.CommentColumnConstraint, this=self._parse_string() 724 ), 725 "COMPRESS": lambda self: self._parse_compress(), 726 "CLUSTERED": lambda self: self.expression( 727 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 728 ), 729 "NONCLUSTERED": lambda self: self.expression( 730 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 731 ), 732 "DEFAULT": lambda self: self.expression( 733 exp.DefaultColumnConstraint, this=self._parse_bitwise() 734 ), 735 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 736 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 737 "FORMAT": lambda self: self.expression( 738 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 739 ), 740 "GENERATED": lambda self: self._parse_generated_as_identity(), 741 "IDENTITY": lambda self: self._parse_auto_increment(), 742 "INLINE": lambda self: self._parse_inline(), 743 "LIKE": lambda self: self._parse_create_like(), 744 "NOT": lambda self: self._parse_not_constraint(), 745 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 746 "ON": lambda self: ( 747 self._match(TokenType.UPDATE) 748 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 749 ) 750 or self.expression(exp.OnProperty, this=self._parse_id_var()), 751 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 752 "PRIMARY KEY": lambda self: self._parse_primary_key(), 753 "REFERENCES": lambda self: self._parse_references(match=False), 754 "TITLE": lambda self: self.expression( 755 exp.TitleColumnConstraint, this=self._parse_var_or_string() 756 ), 757 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 758 "UNIQUE": lambda self: self._parse_unique(), 759 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 760 "WITH": lambda self: self.expression( 761 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 762 ), 763 } 764 765 ALTER_PARSERS = { 766 "ADD": lambda self: self._parse_alter_table_add(), 767 "ALTER": lambda self: self._parse_alter_table_alter(), 768 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 769 "DROP": lambda self: self._parse_alter_table_drop(), 770 "RENAME": lambda self: self._parse_alter_table_rename(), 771 } 772 773 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 774 775 NO_PAREN_FUNCTION_PARSERS = { 776 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 777 "CASE": lambda self: self._parse_case(), 778 "IF": lambda self: self._parse_if(), 779 "NEXT": lambda self: self._parse_next_value_for(), 780 } 781 782 INVALID_FUNC_NAME_TOKENS = { 783 TokenType.IDENTIFIER, 784 TokenType.STRING, 785 } 786 787 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 788 789 FUNCTION_PARSERS = { 790 "ANY_VALUE": lambda self: self._parse_any_value(), 791 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 792 "CONCAT": lambda self: self._parse_concat(), 793 "CONCAT_WS": lambda self: self._parse_concat_ws(), 794 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 795 "DECODE": lambda self: self._parse_decode(), 796 "EXTRACT": lambda self: self._parse_extract(), 797 "JSON_OBJECT": lambda self: self._parse_json_object(), 798 "JSON_TABLE": lambda self: self._parse_json_table(), 799 "LOG": lambda self: self._parse_logarithm(), 800 "MATCH": lambda self: self._parse_match_against(), 801 "OPENJSON": lambda self: self._parse_open_json(), 802 "POSITION": lambda self: self._parse_position(), 803 "PREDICT": lambda self: self._parse_predict(), 804 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 805 "STRING_AGG": lambda self: self._parse_string_agg(), 806 "SUBSTRING": lambda self: self._parse_substring(), 807 "TRIM": lambda self: self._parse_trim(), 808 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 809 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 810 } 811 812 QUERY_MODIFIER_PARSERS = { 813 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 814 TokenType.WHERE: lambda self: ("where", self._parse_where()), 815 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 816 TokenType.HAVING: lambda self: ("having", self._parse_having()), 817 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 818 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 819 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 820 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 821 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 822 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 823 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 824 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 825 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 826 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 827 TokenType.CLUSTER_BY: lambda self: ( 828 "cluster", 829 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 830 ), 831 TokenType.DISTRIBUTE_BY: lambda self: ( 832 "distribute", 833 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 834 ), 835 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 836 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 837 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 838 } 839 840 SET_PARSERS = { 841 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 842 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 843 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 844 "TRANSACTION": lambda self: self._parse_set_transaction(), 845 } 846 847 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 848 849 TYPE_LITERAL_PARSERS = { 850 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 851 } 852 853 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 854 855 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 856 857 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 858 859 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 860 TRANSACTION_CHARACTERISTICS = { 861 "ISOLATION LEVEL REPEATABLE READ", 862 "ISOLATION LEVEL READ COMMITTED", 863 "ISOLATION LEVEL READ UNCOMMITTED", 864 "ISOLATION LEVEL SERIALIZABLE", 865 "READ WRITE", 866 "READ ONLY", 867 } 868 869 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 870 871 CLONE_KEYWORDS = {"CLONE", "COPY"} 872 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 873 874 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 875 876 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 877 878 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 879 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 880 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 881 882 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 883 884 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 885 886 DISTINCT_TOKENS = {TokenType.DISTINCT} 887 888 NULL_TOKENS = {TokenType.NULL} 889 890 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 891 892 STRICT_CAST = True 893 894 # A NULL arg in CONCAT yields NULL by default 895 CONCAT_NULL_OUTPUTS_STRING = False 896 897 PREFIXED_PIVOT_COLUMNS = False 898 IDENTIFY_PIVOT_STRINGS = False 899 900 LOG_BASE_FIRST = True 901 LOG_DEFAULTS_TO_LN = False 902 903 # Whether or not ADD is present for each column added by ALTER TABLE 904 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 905 906 # Whether or not the table sample clause expects CSV syntax 907 TABLESAMPLE_CSV = False 908 909 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 910 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 911 912 # Whether the TRIM function expects the characters to trim as its first argument 913 TRIM_PATTERN_FIRST = False 914 915 __slots__ = ( 916 "error_level", 917 "error_message_context", 918 "max_errors", 919 "sql", 920 "errors", 921 "_tokens", 922 "_index", 923 "_curr", 924 "_next", 925 "_prev", 926 "_prev_comments", 927 "_tokenizer", 928 ) 929 930 # Autofilled 931 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 932 INDEX_OFFSET: int = 0 933 UNNEST_COLUMN_ONLY: bool = False 934 ALIAS_POST_TABLESAMPLE: bool = False 935 STRICT_STRING_CONCAT = False 936 SUPPORTS_USER_DEFINED_TYPES = True 937 NORMALIZE_FUNCTIONS = "upper" 938 NULL_ORDERING: str = "nulls_are_small" 939 SHOW_TRIE: t.Dict = {} 940 SET_TRIE: t.Dict = {} 941 FORMAT_MAPPING: t.Dict[str, str] = {} 942 FORMAT_TRIE: t.Dict = {} 943 TIME_MAPPING: t.Dict[str, str] = {} 944 TIME_TRIE: t.Dict = {} 945 946 def __init__( 947 self, 948 error_level: t.Optional[ErrorLevel] = None, 949 error_message_context: int = 100, 950 max_errors: int = 3, 951 ): 952 self.error_level = error_level or ErrorLevel.IMMEDIATE 953 self.error_message_context = error_message_context 954 self.max_errors = max_errors 955 self._tokenizer = self.TOKENIZER_CLASS() 956 self.reset() 957 958 def reset(self): 959 self.sql = "" 960 self.errors = [] 961 self._tokens = [] 962 self._index = 0 963 self._curr = None 964 self._next = None 965 self._prev = None 966 self._prev_comments = None 967 968 def parse( 969 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 970 ) -> t.List[t.Optional[exp.Expression]]: 971 """ 972 Parses a list of tokens and returns a list of syntax trees, one tree 973 per parsed SQL statement. 974 975 Args: 976 raw_tokens: The list of tokens. 977 sql: The original SQL string, used to produce helpful debug messages. 978 979 Returns: 980 The list of the produced syntax trees. 981 """ 982 return self._parse( 983 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 984 ) 985 986 def parse_into( 987 self, 988 expression_types: exp.IntoType, 989 raw_tokens: t.List[Token], 990 sql: t.Optional[str] = None, 991 ) -> t.List[t.Optional[exp.Expression]]: 992 """ 993 Parses a list of tokens into a given Expression type. If a collection of Expression 994 types is given instead, this method will try to parse the token list into each one 995 of them, stopping at the first for which the parsing succeeds. 996 997 Args: 998 expression_types: The expression type(s) to try and parse the token list into. 999 raw_tokens: The list of tokens. 1000 sql: The original SQL string, used to produce helpful debug messages. 1001 1002 Returns: 1003 The target Expression. 1004 """ 1005 errors = [] 1006 for expression_type in ensure_list(expression_types): 1007 parser = self.EXPRESSION_PARSERS.get(expression_type) 1008 if not parser: 1009 raise TypeError(f"No parser registered for {expression_type}") 1010 1011 try: 1012 return self._parse(parser, raw_tokens, sql) 1013 except ParseError as e: 1014 e.errors[0]["into_expression"] = expression_type 1015 errors.append(e) 1016 1017 raise ParseError( 1018 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1019 errors=merge_errors(errors), 1020 ) from errors[-1] 1021 1022 def _parse( 1023 self, 1024 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1025 raw_tokens: t.List[Token], 1026 sql: t.Optional[str] = None, 1027 ) -> t.List[t.Optional[exp.Expression]]: 1028 self.reset() 1029 self.sql = sql or "" 1030 1031 total = len(raw_tokens) 1032 chunks: t.List[t.List[Token]] = [[]] 1033 1034 for i, token in enumerate(raw_tokens): 1035 if token.token_type == TokenType.SEMICOLON: 1036 if i < total - 1: 1037 chunks.append([]) 1038 else: 1039 chunks[-1].append(token) 1040 1041 expressions = [] 1042 1043 for tokens in chunks: 1044 self._index = -1 1045 self._tokens = tokens 1046 self._advance() 1047 1048 expressions.append(parse_method(self)) 1049 1050 if self._index < len(self._tokens): 1051 self.raise_error("Invalid expression / Unexpected token") 1052 1053 self.check_errors() 1054 1055 return expressions 1056 1057 def check_errors(self) -> None: 1058 """Logs or raises any found errors, depending on the chosen error level setting.""" 1059 if self.error_level == ErrorLevel.WARN: 1060 for error in self.errors: 1061 logger.error(str(error)) 1062 elif self.error_level == ErrorLevel.RAISE and self.errors: 1063 raise ParseError( 1064 concat_messages(self.errors, self.max_errors), 1065 errors=merge_errors(self.errors), 1066 ) 1067 1068 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1069 """ 1070 Appends an error in the list of recorded errors or raises it, depending on the chosen 1071 error level setting. 1072 """ 1073 token = token or self._curr or self._prev or Token.string("") 1074 start = token.start 1075 end = token.end + 1 1076 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1077 highlight = self.sql[start:end] 1078 end_context = self.sql[end : end + self.error_message_context] 1079 1080 error = ParseError.new( 1081 f"{message}. Line {token.line}, Col: {token.col}.\n" 1082 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1083 description=message, 1084 line=token.line, 1085 col=token.col, 1086 start_context=start_context, 1087 highlight=highlight, 1088 end_context=end_context, 1089 ) 1090 1091 if self.error_level == ErrorLevel.IMMEDIATE: 1092 raise error 1093 1094 self.errors.append(error) 1095 1096 def expression( 1097 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1098 ) -> E: 1099 """ 1100 Creates a new, validated Expression. 1101 1102 Args: 1103 exp_class: The expression class to instantiate. 1104 comments: An optional list of comments to attach to the expression. 1105 kwargs: The arguments to set for the expression along with their respective values. 1106 1107 Returns: 1108 The target expression. 1109 """ 1110 instance = exp_class(**kwargs) 1111 instance.add_comments(comments) if comments else self._add_comments(instance) 1112 return self.validate_expression(instance) 1113 1114 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1115 if expression and self._prev_comments: 1116 expression.add_comments(self._prev_comments) 1117 self._prev_comments = None 1118 1119 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1120 """ 1121 Validates an Expression, making sure that all its mandatory arguments are set. 1122 1123 Args: 1124 expression: The expression to validate. 1125 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1126 1127 Returns: 1128 The validated expression. 1129 """ 1130 if self.error_level != ErrorLevel.IGNORE: 1131 for error_message in expression.error_messages(args): 1132 self.raise_error(error_message) 1133 1134 return expression 1135 1136 def _find_sql(self, start: Token, end: Token) -> str: 1137 return self.sql[start.start : end.end + 1] 1138 1139 def _advance(self, times: int = 1) -> None: 1140 self._index += times 1141 self._curr = seq_get(self._tokens, self._index) 1142 self._next = seq_get(self._tokens, self._index + 1) 1143 1144 if self._index > 0: 1145 self._prev = self._tokens[self._index - 1] 1146 self._prev_comments = self._prev.comments 1147 else: 1148 self._prev = None 1149 self._prev_comments = None 1150 1151 def _retreat(self, index: int) -> None: 1152 if index != self._index: 1153 self._advance(index - self._index) 1154 1155 def _parse_command(self) -> exp.Command: 1156 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1157 1158 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1159 start = self._prev 1160 exists = self._parse_exists() if allow_exists else None 1161 1162 self._match(TokenType.ON) 1163 1164 kind = self._match_set(self.CREATABLES) and self._prev 1165 if not kind: 1166 return self._parse_as_command(start) 1167 1168 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1169 this = self._parse_user_defined_function(kind=kind.token_type) 1170 elif kind.token_type == TokenType.TABLE: 1171 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1172 elif kind.token_type == TokenType.COLUMN: 1173 this = self._parse_column() 1174 else: 1175 this = self._parse_id_var() 1176 1177 self._match(TokenType.IS) 1178 1179 return self.expression( 1180 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1181 ) 1182 1183 def _parse_to_table( 1184 self, 1185 ) -> exp.ToTableProperty: 1186 table = self._parse_table_parts(schema=True) 1187 return self.expression(exp.ToTableProperty, this=table) 1188 1189 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1190 def _parse_ttl(self) -> exp.Expression: 1191 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1192 this = self._parse_bitwise() 1193 1194 if self._match_text_seq("DELETE"): 1195 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1196 if self._match_text_seq("RECOMPRESS"): 1197 return self.expression( 1198 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1199 ) 1200 if self._match_text_seq("TO", "DISK"): 1201 return self.expression( 1202 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1203 ) 1204 if self._match_text_seq("TO", "VOLUME"): 1205 return self.expression( 1206 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1207 ) 1208 1209 return this 1210 1211 expressions = self._parse_csv(_parse_ttl_action) 1212 where = self._parse_where() 1213 group = self._parse_group() 1214 1215 aggregates = None 1216 if group and self._match(TokenType.SET): 1217 aggregates = self._parse_csv(self._parse_set_item) 1218 1219 return self.expression( 1220 exp.MergeTreeTTL, 1221 expressions=expressions, 1222 where=where, 1223 group=group, 1224 aggregates=aggregates, 1225 ) 1226 1227 def _parse_statement(self) -> t.Optional[exp.Expression]: 1228 if self._curr is None: 1229 return None 1230 1231 if self._match_set(self.STATEMENT_PARSERS): 1232 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1233 1234 if self._match_set(Tokenizer.COMMANDS): 1235 return self._parse_command() 1236 1237 expression = self._parse_expression() 1238 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1239 return self._parse_query_modifiers(expression) 1240 1241 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1242 start = self._prev 1243 temporary = self._match(TokenType.TEMPORARY) 1244 materialized = self._match_text_seq("MATERIALIZED") 1245 1246 kind = self._match_set(self.CREATABLES) and self._prev.text 1247 if not kind: 1248 return self._parse_as_command(start) 1249 1250 return self.expression( 1251 exp.Drop, 1252 comments=start.comments, 1253 exists=exists or self._parse_exists(), 1254 this=self._parse_table(schema=True), 1255 kind=kind, 1256 temporary=temporary, 1257 materialized=materialized, 1258 cascade=self._match_text_seq("CASCADE"), 1259 constraints=self._match_text_seq("CONSTRAINTS"), 1260 purge=self._match_text_seq("PURGE"), 1261 ) 1262 1263 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1264 return ( 1265 self._match_text_seq("IF") 1266 and (not not_ or self._match(TokenType.NOT)) 1267 and self._match(TokenType.EXISTS) 1268 ) 1269 1270 def _parse_create(self) -> exp.Create | exp.Command: 1271 # Note: this can't be None because we've matched a statement parser 1272 start = self._prev 1273 comments = self._prev_comments 1274 1275 replace = start.text.upper() == "REPLACE" or self._match_pair( 1276 TokenType.OR, TokenType.REPLACE 1277 ) 1278 unique = self._match(TokenType.UNIQUE) 1279 1280 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1281 self._advance() 1282 1283 properties = None 1284 create_token = self._match_set(self.CREATABLES) and self._prev 1285 1286 if not create_token: 1287 # exp.Properties.Location.POST_CREATE 1288 properties = self._parse_properties() 1289 create_token = self._match_set(self.CREATABLES) and self._prev 1290 1291 if not properties or not create_token: 1292 return self._parse_as_command(start) 1293 1294 exists = self._parse_exists(not_=True) 1295 this = None 1296 expression: t.Optional[exp.Expression] = None 1297 indexes = None 1298 no_schema_binding = None 1299 begin = None 1300 end = None 1301 clone = None 1302 1303 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1304 nonlocal properties 1305 if properties and temp_props: 1306 properties.expressions.extend(temp_props.expressions) 1307 elif temp_props: 1308 properties = temp_props 1309 1310 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1311 this = self._parse_user_defined_function(kind=create_token.token_type) 1312 1313 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1314 extend_props(self._parse_properties()) 1315 1316 self._match(TokenType.ALIAS) 1317 1318 if self._match(TokenType.COMMAND): 1319 expression = self._parse_as_command(self._prev) 1320 else: 1321 begin = self._match(TokenType.BEGIN) 1322 return_ = self._match_text_seq("RETURN") 1323 1324 if self._match(TokenType.STRING, advance=False): 1325 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1326 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1327 expression = self._parse_string() 1328 extend_props(self._parse_properties()) 1329 else: 1330 expression = self._parse_statement() 1331 1332 end = self._match_text_seq("END") 1333 1334 if return_: 1335 expression = self.expression(exp.Return, this=expression) 1336 elif create_token.token_type == TokenType.INDEX: 1337 this = self._parse_index(index=self._parse_id_var()) 1338 elif create_token.token_type in self.DB_CREATABLES: 1339 table_parts = self._parse_table_parts(schema=True) 1340 1341 # exp.Properties.Location.POST_NAME 1342 self._match(TokenType.COMMA) 1343 extend_props(self._parse_properties(before=True)) 1344 1345 this = self._parse_schema(this=table_parts) 1346 1347 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1348 extend_props(self._parse_properties()) 1349 1350 self._match(TokenType.ALIAS) 1351 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1352 # exp.Properties.Location.POST_ALIAS 1353 extend_props(self._parse_properties()) 1354 1355 expression = self._parse_ddl_select() 1356 1357 if create_token.token_type == TokenType.TABLE: 1358 # exp.Properties.Location.POST_EXPRESSION 1359 extend_props(self._parse_properties()) 1360 1361 indexes = [] 1362 while True: 1363 index = self._parse_index() 1364 1365 # exp.Properties.Location.POST_INDEX 1366 extend_props(self._parse_properties()) 1367 1368 if not index: 1369 break 1370 else: 1371 self._match(TokenType.COMMA) 1372 indexes.append(index) 1373 elif create_token.token_type == TokenType.VIEW: 1374 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1375 no_schema_binding = True 1376 1377 shallow = self._match_text_seq("SHALLOW") 1378 1379 if self._match_texts(self.CLONE_KEYWORDS): 1380 copy = self._prev.text.lower() == "copy" 1381 clone = self._parse_table(schema=True) 1382 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1383 clone_kind = ( 1384 self._match(TokenType.L_PAREN) 1385 and self._match_texts(self.CLONE_KINDS) 1386 and self._prev.text.upper() 1387 ) 1388 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1389 self._match(TokenType.R_PAREN) 1390 clone = self.expression( 1391 exp.Clone, 1392 this=clone, 1393 when=when, 1394 kind=clone_kind, 1395 shallow=shallow, 1396 expression=clone_expression, 1397 copy=copy, 1398 ) 1399 1400 return self.expression( 1401 exp.Create, 1402 comments=comments, 1403 this=this, 1404 kind=create_token.text, 1405 replace=replace, 1406 unique=unique, 1407 expression=expression, 1408 exists=exists, 1409 properties=properties, 1410 indexes=indexes, 1411 no_schema_binding=no_schema_binding, 1412 begin=begin, 1413 end=end, 1414 clone=clone, 1415 ) 1416 1417 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1418 # only used for teradata currently 1419 self._match(TokenType.COMMA) 1420 1421 kwargs = { 1422 "no": self._match_text_seq("NO"), 1423 "dual": self._match_text_seq("DUAL"), 1424 "before": self._match_text_seq("BEFORE"), 1425 "default": self._match_text_seq("DEFAULT"), 1426 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1427 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1428 "after": self._match_text_seq("AFTER"), 1429 "minimum": self._match_texts(("MIN", "MINIMUM")), 1430 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1431 } 1432 1433 if self._match_texts(self.PROPERTY_PARSERS): 1434 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1435 try: 1436 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1437 except TypeError: 1438 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1439 1440 return None 1441 1442 def _parse_property(self) -> t.Optional[exp.Expression]: 1443 if self._match_texts(self.PROPERTY_PARSERS): 1444 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1445 1446 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1447 return self._parse_character_set(default=True) 1448 1449 if self._match_text_seq("COMPOUND", "SORTKEY"): 1450 return self._parse_sortkey(compound=True) 1451 1452 if self._match_text_seq("SQL", "SECURITY"): 1453 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1454 1455 index = self._index 1456 key = self._parse_column() 1457 1458 if not self._match(TokenType.EQ): 1459 self._retreat(index) 1460 return None 1461 1462 return self.expression( 1463 exp.Property, 1464 this=key.to_dot() if isinstance(key, exp.Column) else key, 1465 value=self._parse_column() or self._parse_var(any_token=True), 1466 ) 1467 1468 def _parse_stored(self) -> exp.FileFormatProperty: 1469 self._match(TokenType.ALIAS) 1470 1471 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1472 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1473 1474 return self.expression( 1475 exp.FileFormatProperty, 1476 this=self.expression( 1477 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1478 ) 1479 if input_format or output_format 1480 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1481 ) 1482 1483 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1484 self._match(TokenType.EQ) 1485 self._match(TokenType.ALIAS) 1486 return self.expression(exp_class, this=self._parse_field()) 1487 1488 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1489 properties = [] 1490 while True: 1491 if before: 1492 prop = self._parse_property_before() 1493 else: 1494 prop = self._parse_property() 1495 1496 if not prop: 1497 break 1498 for p in ensure_list(prop): 1499 properties.append(p) 1500 1501 if properties: 1502 return self.expression(exp.Properties, expressions=properties) 1503 1504 return None 1505 1506 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1507 return self.expression( 1508 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1509 ) 1510 1511 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1512 if self._index >= 2: 1513 pre_volatile_token = self._tokens[self._index - 2] 1514 else: 1515 pre_volatile_token = None 1516 1517 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1518 return exp.VolatileProperty() 1519 1520 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1521 1522 def _parse_with_property( 1523 self, 1524 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1525 if self._match(TokenType.L_PAREN, advance=False): 1526 return self._parse_wrapped_csv(self._parse_property) 1527 1528 if self._match_text_seq("JOURNAL"): 1529 return self._parse_withjournaltable() 1530 1531 if self._match_text_seq("DATA"): 1532 return self._parse_withdata(no=False) 1533 elif self._match_text_seq("NO", "DATA"): 1534 return self._parse_withdata(no=True) 1535 1536 if not self._next: 1537 return None 1538 1539 return self._parse_withisolatedloading() 1540 1541 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1542 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1543 self._match(TokenType.EQ) 1544 1545 user = self._parse_id_var() 1546 self._match(TokenType.PARAMETER) 1547 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1548 1549 if not user or not host: 1550 return None 1551 1552 return exp.DefinerProperty(this=f"{user}@{host}") 1553 1554 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1555 self._match(TokenType.TABLE) 1556 self._match(TokenType.EQ) 1557 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1558 1559 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1560 return self.expression(exp.LogProperty, no=no) 1561 1562 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1563 return self.expression(exp.JournalProperty, **kwargs) 1564 1565 def _parse_checksum(self) -> exp.ChecksumProperty: 1566 self._match(TokenType.EQ) 1567 1568 on = None 1569 if self._match(TokenType.ON): 1570 on = True 1571 elif self._match_text_seq("OFF"): 1572 on = False 1573 1574 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1575 1576 def _parse_cluster(self) -> exp.Cluster: 1577 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1578 1579 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1580 self._match_text_seq("BY") 1581 1582 self._match_l_paren() 1583 expressions = self._parse_csv(self._parse_column) 1584 self._match_r_paren() 1585 1586 if self._match_text_seq("SORTED", "BY"): 1587 self._match_l_paren() 1588 sorted_by = self._parse_csv(self._parse_ordered) 1589 self._match_r_paren() 1590 else: 1591 sorted_by = None 1592 1593 self._match(TokenType.INTO) 1594 buckets = self._parse_number() 1595 self._match_text_seq("BUCKETS") 1596 1597 return self.expression( 1598 exp.ClusteredByProperty, 1599 expressions=expressions, 1600 sorted_by=sorted_by, 1601 buckets=buckets, 1602 ) 1603 1604 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1605 if not self._match_text_seq("GRANTS"): 1606 self._retreat(self._index - 1) 1607 return None 1608 1609 return self.expression(exp.CopyGrantsProperty) 1610 1611 def _parse_freespace(self) -> exp.FreespaceProperty: 1612 self._match(TokenType.EQ) 1613 return self.expression( 1614 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1615 ) 1616 1617 def _parse_mergeblockratio( 1618 self, no: bool = False, default: bool = False 1619 ) -> exp.MergeBlockRatioProperty: 1620 if self._match(TokenType.EQ): 1621 return self.expression( 1622 exp.MergeBlockRatioProperty, 1623 this=self._parse_number(), 1624 percent=self._match(TokenType.PERCENT), 1625 ) 1626 1627 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1628 1629 def _parse_datablocksize( 1630 self, 1631 default: t.Optional[bool] = None, 1632 minimum: t.Optional[bool] = None, 1633 maximum: t.Optional[bool] = None, 1634 ) -> exp.DataBlocksizeProperty: 1635 self._match(TokenType.EQ) 1636 size = self._parse_number() 1637 1638 units = None 1639 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1640 units = self._prev.text 1641 1642 return self.expression( 1643 exp.DataBlocksizeProperty, 1644 size=size, 1645 units=units, 1646 default=default, 1647 minimum=minimum, 1648 maximum=maximum, 1649 ) 1650 1651 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1652 self._match(TokenType.EQ) 1653 always = self._match_text_seq("ALWAYS") 1654 manual = self._match_text_seq("MANUAL") 1655 never = self._match_text_seq("NEVER") 1656 default = self._match_text_seq("DEFAULT") 1657 1658 autotemp = None 1659 if self._match_text_seq("AUTOTEMP"): 1660 autotemp = self._parse_schema() 1661 1662 return self.expression( 1663 exp.BlockCompressionProperty, 1664 always=always, 1665 manual=manual, 1666 never=never, 1667 default=default, 1668 autotemp=autotemp, 1669 ) 1670 1671 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1672 no = self._match_text_seq("NO") 1673 concurrent = self._match_text_seq("CONCURRENT") 1674 self._match_text_seq("ISOLATED", "LOADING") 1675 for_all = self._match_text_seq("FOR", "ALL") 1676 for_insert = self._match_text_seq("FOR", "INSERT") 1677 for_none = self._match_text_seq("FOR", "NONE") 1678 return self.expression( 1679 exp.IsolatedLoadingProperty, 1680 no=no, 1681 concurrent=concurrent, 1682 for_all=for_all, 1683 for_insert=for_insert, 1684 for_none=for_none, 1685 ) 1686 1687 def _parse_locking(self) -> exp.LockingProperty: 1688 if self._match(TokenType.TABLE): 1689 kind = "TABLE" 1690 elif self._match(TokenType.VIEW): 1691 kind = "VIEW" 1692 elif self._match(TokenType.ROW): 1693 kind = "ROW" 1694 elif self._match_text_seq("DATABASE"): 1695 kind = "DATABASE" 1696 else: 1697 kind = None 1698 1699 if kind in ("DATABASE", "TABLE", "VIEW"): 1700 this = self._parse_table_parts() 1701 else: 1702 this = None 1703 1704 if self._match(TokenType.FOR): 1705 for_or_in = "FOR" 1706 elif self._match(TokenType.IN): 1707 for_or_in = "IN" 1708 else: 1709 for_or_in = None 1710 1711 if self._match_text_seq("ACCESS"): 1712 lock_type = "ACCESS" 1713 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1714 lock_type = "EXCLUSIVE" 1715 elif self._match_text_seq("SHARE"): 1716 lock_type = "SHARE" 1717 elif self._match_text_seq("READ"): 1718 lock_type = "READ" 1719 elif self._match_text_seq("WRITE"): 1720 lock_type = "WRITE" 1721 elif self._match_text_seq("CHECKSUM"): 1722 lock_type = "CHECKSUM" 1723 else: 1724 lock_type = None 1725 1726 override = self._match_text_seq("OVERRIDE") 1727 1728 return self.expression( 1729 exp.LockingProperty, 1730 this=this, 1731 kind=kind, 1732 for_or_in=for_or_in, 1733 lock_type=lock_type, 1734 override=override, 1735 ) 1736 1737 def _parse_partition_by(self) -> t.List[exp.Expression]: 1738 if self._match(TokenType.PARTITION_BY): 1739 return self._parse_csv(self._parse_conjunction) 1740 return [] 1741 1742 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1743 self._match(TokenType.EQ) 1744 return self.expression( 1745 exp.PartitionedByProperty, 1746 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1747 ) 1748 1749 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1750 if self._match_text_seq("AND", "STATISTICS"): 1751 statistics = True 1752 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1753 statistics = False 1754 else: 1755 statistics = None 1756 1757 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1758 1759 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1760 if self._match_text_seq("PRIMARY", "INDEX"): 1761 return exp.NoPrimaryIndexProperty() 1762 return None 1763 1764 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1765 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1766 return exp.OnCommitProperty() 1767 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1768 return exp.OnCommitProperty(delete=True) 1769 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1770 1771 def _parse_distkey(self) -> exp.DistKeyProperty: 1772 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1773 1774 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1775 table = self._parse_table(schema=True) 1776 1777 options = [] 1778 while self._match_texts(("INCLUDING", "EXCLUDING")): 1779 this = self._prev.text.upper() 1780 1781 id_var = self._parse_id_var() 1782 if not id_var: 1783 return None 1784 1785 options.append( 1786 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1787 ) 1788 1789 return self.expression(exp.LikeProperty, this=table, expressions=options) 1790 1791 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1792 return self.expression( 1793 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1794 ) 1795 1796 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1797 self._match(TokenType.EQ) 1798 return self.expression( 1799 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1800 ) 1801 1802 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1803 self._match_text_seq("WITH", "CONNECTION") 1804 return self.expression( 1805 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1806 ) 1807 1808 def _parse_returns(self) -> exp.ReturnsProperty: 1809 value: t.Optional[exp.Expression] 1810 is_table = self._match(TokenType.TABLE) 1811 1812 if is_table: 1813 if self._match(TokenType.LT): 1814 value = self.expression( 1815 exp.Schema, 1816 this="TABLE", 1817 expressions=self._parse_csv(self._parse_struct_types), 1818 ) 1819 if not self._match(TokenType.GT): 1820 self.raise_error("Expecting >") 1821 else: 1822 value = self._parse_schema(exp.var("TABLE")) 1823 else: 1824 value = self._parse_types() 1825 1826 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1827 1828 def _parse_describe(self) -> exp.Describe: 1829 kind = self._match_set(self.CREATABLES) and self._prev.text 1830 this = self._parse_table(schema=True) 1831 properties = self._parse_properties() 1832 expressions = properties.expressions if properties else None 1833 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1834 1835 def _parse_insert(self) -> exp.Insert: 1836 comments = ensure_list(self._prev_comments) 1837 overwrite = self._match(TokenType.OVERWRITE) 1838 ignore = self._match(TokenType.IGNORE) 1839 local = self._match_text_seq("LOCAL") 1840 alternative = None 1841 1842 if self._match_text_seq("DIRECTORY"): 1843 this: t.Optional[exp.Expression] = self.expression( 1844 exp.Directory, 1845 this=self._parse_var_or_string(), 1846 local=local, 1847 row_format=self._parse_row_format(match_row=True), 1848 ) 1849 else: 1850 if self._match(TokenType.OR): 1851 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1852 1853 self._match(TokenType.INTO) 1854 comments += ensure_list(self._prev_comments) 1855 self._match(TokenType.TABLE) 1856 this = self._parse_table(schema=True) 1857 1858 returning = self._parse_returning() 1859 1860 return self.expression( 1861 exp.Insert, 1862 comments=comments, 1863 this=this, 1864 by_name=self._match_text_seq("BY", "NAME"), 1865 exists=self._parse_exists(), 1866 partition=self._parse_partition(), 1867 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1868 and self._parse_conjunction(), 1869 expression=self._parse_ddl_select(), 1870 conflict=self._parse_on_conflict(), 1871 returning=returning or self._parse_returning(), 1872 overwrite=overwrite, 1873 alternative=alternative, 1874 ignore=ignore, 1875 ) 1876 1877 def _parse_kill(self) -> exp.Kill: 1878 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1879 1880 return self.expression( 1881 exp.Kill, 1882 this=self._parse_primary(), 1883 kind=kind, 1884 ) 1885 1886 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1887 conflict = self._match_text_seq("ON", "CONFLICT") 1888 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1889 1890 if not conflict and not duplicate: 1891 return None 1892 1893 nothing = None 1894 expressions = None 1895 key = None 1896 constraint = None 1897 1898 if conflict: 1899 if self._match_text_seq("ON", "CONSTRAINT"): 1900 constraint = self._parse_id_var() 1901 else: 1902 key = self._parse_csv(self._parse_value) 1903 1904 self._match_text_seq("DO") 1905 if self._match_text_seq("NOTHING"): 1906 nothing = True 1907 else: 1908 self._match(TokenType.UPDATE) 1909 self._match(TokenType.SET) 1910 expressions = self._parse_csv(self._parse_equality) 1911 1912 return self.expression( 1913 exp.OnConflict, 1914 duplicate=duplicate, 1915 expressions=expressions, 1916 nothing=nothing, 1917 key=key, 1918 constraint=constraint, 1919 ) 1920 1921 def _parse_returning(self) -> t.Optional[exp.Returning]: 1922 if not self._match(TokenType.RETURNING): 1923 return None 1924 return self.expression( 1925 exp.Returning, 1926 expressions=self._parse_csv(self._parse_expression), 1927 into=self._match(TokenType.INTO) and self._parse_table_part(), 1928 ) 1929 1930 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1931 if not self._match(TokenType.FORMAT): 1932 return None 1933 return self._parse_row_format() 1934 1935 def _parse_row_format( 1936 self, match_row: bool = False 1937 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1938 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1939 return None 1940 1941 if self._match_text_seq("SERDE"): 1942 this = self._parse_string() 1943 1944 serde_properties = None 1945 if self._match(TokenType.SERDE_PROPERTIES): 1946 serde_properties = self.expression( 1947 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1948 ) 1949 1950 return self.expression( 1951 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1952 ) 1953 1954 self._match_text_seq("DELIMITED") 1955 1956 kwargs = {} 1957 1958 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1959 kwargs["fields"] = self._parse_string() 1960 if self._match_text_seq("ESCAPED", "BY"): 1961 kwargs["escaped"] = self._parse_string() 1962 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1963 kwargs["collection_items"] = self._parse_string() 1964 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1965 kwargs["map_keys"] = self._parse_string() 1966 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1967 kwargs["lines"] = self._parse_string() 1968 if self._match_text_seq("NULL", "DEFINED", "AS"): 1969 kwargs["null"] = self._parse_string() 1970 1971 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1972 1973 def _parse_load(self) -> exp.LoadData | exp.Command: 1974 if self._match_text_seq("DATA"): 1975 local = self._match_text_seq("LOCAL") 1976 self._match_text_seq("INPATH") 1977 inpath = self._parse_string() 1978 overwrite = self._match(TokenType.OVERWRITE) 1979 self._match_pair(TokenType.INTO, TokenType.TABLE) 1980 1981 return self.expression( 1982 exp.LoadData, 1983 this=self._parse_table(schema=True), 1984 local=local, 1985 overwrite=overwrite, 1986 inpath=inpath, 1987 partition=self._parse_partition(), 1988 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1989 serde=self._match_text_seq("SERDE") and self._parse_string(), 1990 ) 1991 return self._parse_as_command(self._prev) 1992 1993 def _parse_delete(self) -> exp.Delete: 1994 # This handles MySQL's "Multiple-Table Syntax" 1995 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1996 tables = None 1997 comments = self._prev_comments 1998 if not self._match(TokenType.FROM, advance=False): 1999 tables = self._parse_csv(self._parse_table) or None 2000 2001 returning = self._parse_returning() 2002 2003 return self.expression( 2004 exp.Delete, 2005 comments=comments, 2006 tables=tables, 2007 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2008 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2009 where=self._parse_where(), 2010 returning=returning or self._parse_returning(), 2011 limit=self._parse_limit(), 2012 ) 2013 2014 def _parse_update(self) -> exp.Update: 2015 comments = self._prev_comments 2016 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2017 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2018 returning = self._parse_returning() 2019 return self.expression( 2020 exp.Update, 2021 comments=comments, 2022 **{ # type: ignore 2023 "this": this, 2024 "expressions": expressions, 2025 "from": self._parse_from(joins=True), 2026 "where": self._parse_where(), 2027 "returning": returning or self._parse_returning(), 2028 "order": self._parse_order(), 2029 "limit": self._parse_limit(), 2030 }, 2031 ) 2032 2033 def _parse_uncache(self) -> exp.Uncache: 2034 if not self._match(TokenType.TABLE): 2035 self.raise_error("Expecting TABLE after UNCACHE") 2036 2037 return self.expression( 2038 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2039 ) 2040 2041 def _parse_cache(self) -> exp.Cache: 2042 lazy = self._match_text_seq("LAZY") 2043 self._match(TokenType.TABLE) 2044 table = self._parse_table(schema=True) 2045 2046 options = [] 2047 if self._match_text_seq("OPTIONS"): 2048 self._match_l_paren() 2049 k = self._parse_string() 2050 self._match(TokenType.EQ) 2051 v = self._parse_string() 2052 options = [k, v] 2053 self._match_r_paren() 2054 2055 self._match(TokenType.ALIAS) 2056 return self.expression( 2057 exp.Cache, 2058 this=table, 2059 lazy=lazy, 2060 options=options, 2061 expression=self._parse_select(nested=True), 2062 ) 2063 2064 def _parse_partition(self) -> t.Optional[exp.Partition]: 2065 if not self._match(TokenType.PARTITION): 2066 return None 2067 2068 return self.expression( 2069 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2070 ) 2071 2072 def _parse_value(self) -> exp.Tuple: 2073 if self._match(TokenType.L_PAREN): 2074 expressions = self._parse_csv(self._parse_conjunction) 2075 self._match_r_paren() 2076 return self.expression(exp.Tuple, expressions=expressions) 2077 2078 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2079 # https://prestodb.io/docs/current/sql/values.html 2080 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2081 2082 def _parse_projections(self) -> t.List[exp.Expression]: 2083 return self._parse_expressions() 2084 2085 def _parse_select( 2086 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2087 ) -> t.Optional[exp.Expression]: 2088 cte = self._parse_with() 2089 2090 if cte: 2091 this = self._parse_statement() 2092 2093 if not this: 2094 self.raise_error("Failed to parse any statement following CTE") 2095 return cte 2096 2097 if "with" in this.arg_types: 2098 this.set("with", cte) 2099 else: 2100 self.raise_error(f"{this.key} does not support CTE") 2101 this = cte 2102 2103 return this 2104 2105 # duckdb supports leading with FROM x 2106 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2107 2108 if self._match(TokenType.SELECT): 2109 comments = self._prev_comments 2110 2111 hint = self._parse_hint() 2112 all_ = self._match(TokenType.ALL) 2113 distinct = self._match_set(self.DISTINCT_TOKENS) 2114 2115 kind = ( 2116 self._match(TokenType.ALIAS) 2117 and self._match_texts(("STRUCT", "VALUE")) 2118 and self._prev.text 2119 ) 2120 2121 if distinct: 2122 distinct = self.expression( 2123 exp.Distinct, 2124 on=self._parse_value() if self._match(TokenType.ON) else None, 2125 ) 2126 2127 if all_ and distinct: 2128 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2129 2130 limit = self._parse_limit(top=True) 2131 projections = self._parse_projections() 2132 2133 this = self.expression( 2134 exp.Select, 2135 kind=kind, 2136 hint=hint, 2137 distinct=distinct, 2138 expressions=projections, 2139 limit=limit, 2140 ) 2141 this.comments = comments 2142 2143 into = self._parse_into() 2144 if into: 2145 this.set("into", into) 2146 2147 if not from_: 2148 from_ = self._parse_from() 2149 2150 if from_: 2151 this.set("from", from_) 2152 2153 this = self._parse_query_modifiers(this) 2154 elif (table or nested) and self._match(TokenType.L_PAREN): 2155 if self._match(TokenType.PIVOT): 2156 this = self._parse_simplified_pivot() 2157 elif self._match(TokenType.FROM): 2158 this = exp.select("*").from_( 2159 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2160 ) 2161 else: 2162 this = self._parse_table() if table else self._parse_select(nested=True) 2163 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2164 2165 self._match_r_paren() 2166 2167 # We return early here so that the UNION isn't attached to the subquery by the 2168 # following call to _parse_set_operations, but instead becomes the parent node 2169 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2170 elif self._match(TokenType.VALUES): 2171 this = self.expression( 2172 exp.Values, 2173 expressions=self._parse_csv(self._parse_value), 2174 alias=self._parse_table_alias(), 2175 ) 2176 elif from_: 2177 this = exp.select("*").from_(from_.this, copy=False) 2178 else: 2179 this = None 2180 2181 return self._parse_set_operations(this) 2182 2183 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2184 if not skip_with_token and not self._match(TokenType.WITH): 2185 return None 2186 2187 comments = self._prev_comments 2188 recursive = self._match(TokenType.RECURSIVE) 2189 2190 expressions = [] 2191 while True: 2192 expressions.append(self._parse_cte()) 2193 2194 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2195 break 2196 else: 2197 self._match(TokenType.WITH) 2198 2199 return self.expression( 2200 exp.With, comments=comments, expressions=expressions, recursive=recursive 2201 ) 2202 2203 def _parse_cte(self) -> exp.CTE: 2204 alias = self._parse_table_alias() 2205 if not alias or not alias.this: 2206 self.raise_error("Expected CTE to have alias") 2207 2208 self._match(TokenType.ALIAS) 2209 return self.expression( 2210 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2211 ) 2212 2213 def _parse_table_alias( 2214 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2215 ) -> t.Optional[exp.TableAlias]: 2216 any_token = self._match(TokenType.ALIAS) 2217 alias = ( 2218 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2219 or self._parse_string_as_identifier() 2220 ) 2221 2222 index = self._index 2223 if self._match(TokenType.L_PAREN): 2224 columns = self._parse_csv(self._parse_function_parameter) 2225 self._match_r_paren() if columns else self._retreat(index) 2226 else: 2227 columns = None 2228 2229 if not alias and not columns: 2230 return None 2231 2232 return self.expression(exp.TableAlias, this=alias, columns=columns) 2233 2234 def _parse_subquery( 2235 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2236 ) -> t.Optional[exp.Subquery]: 2237 if not this: 2238 return None 2239 2240 return self.expression( 2241 exp.Subquery, 2242 this=this, 2243 pivots=self._parse_pivots(), 2244 alias=self._parse_table_alias() if parse_alias else None, 2245 ) 2246 2247 def _parse_query_modifiers( 2248 self, this: t.Optional[exp.Expression] 2249 ) -> t.Optional[exp.Expression]: 2250 if isinstance(this, self.MODIFIABLES): 2251 for join in iter(self._parse_join, None): 2252 this.append("joins", join) 2253 for lateral in iter(self._parse_lateral, None): 2254 this.append("laterals", lateral) 2255 2256 while True: 2257 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2258 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2259 key, expression = parser(self) 2260 2261 if expression: 2262 this.set(key, expression) 2263 if key == "limit": 2264 offset = expression.args.pop("offset", None) 2265 if offset: 2266 this.set("offset", exp.Offset(expression=offset)) 2267 continue 2268 break 2269 return this 2270 2271 def _parse_hint(self) -> t.Optional[exp.Hint]: 2272 if self._match(TokenType.HINT): 2273 hints = [] 2274 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2275 hints.extend(hint) 2276 2277 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2278 self.raise_error("Expected */ after HINT") 2279 2280 return self.expression(exp.Hint, expressions=hints) 2281 2282 return None 2283 2284 def _parse_into(self) -> t.Optional[exp.Into]: 2285 if not self._match(TokenType.INTO): 2286 return None 2287 2288 temp = self._match(TokenType.TEMPORARY) 2289 unlogged = self._match_text_seq("UNLOGGED") 2290 self._match(TokenType.TABLE) 2291 2292 return self.expression( 2293 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2294 ) 2295 2296 def _parse_from( 2297 self, joins: bool = False, skip_from_token: bool = False 2298 ) -> t.Optional[exp.From]: 2299 if not skip_from_token and not self._match(TokenType.FROM): 2300 return None 2301 2302 return self.expression( 2303 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2304 ) 2305 2306 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2307 if not self._match(TokenType.MATCH_RECOGNIZE): 2308 return None 2309 2310 self._match_l_paren() 2311 2312 partition = self._parse_partition_by() 2313 order = self._parse_order() 2314 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2315 2316 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2317 rows = exp.var("ONE ROW PER MATCH") 2318 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2319 text = "ALL ROWS PER MATCH" 2320 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2321 text += f" SHOW EMPTY MATCHES" 2322 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2323 text += f" OMIT EMPTY MATCHES" 2324 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2325 text += f" WITH UNMATCHED ROWS" 2326 rows = exp.var(text) 2327 else: 2328 rows = None 2329 2330 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2331 text = "AFTER MATCH SKIP" 2332 if self._match_text_seq("PAST", "LAST", "ROW"): 2333 text += f" PAST LAST ROW" 2334 elif self._match_text_seq("TO", "NEXT", "ROW"): 2335 text += f" TO NEXT ROW" 2336 elif self._match_text_seq("TO", "FIRST"): 2337 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2338 elif self._match_text_seq("TO", "LAST"): 2339 text += f" TO LAST {self._advance_any().text}" # type: ignore 2340 after = exp.var(text) 2341 else: 2342 after = None 2343 2344 if self._match_text_seq("PATTERN"): 2345 self._match_l_paren() 2346 2347 if not self._curr: 2348 self.raise_error("Expecting )", self._curr) 2349 2350 paren = 1 2351 start = self._curr 2352 2353 while self._curr and paren > 0: 2354 if self._curr.token_type == TokenType.L_PAREN: 2355 paren += 1 2356 if self._curr.token_type == TokenType.R_PAREN: 2357 paren -= 1 2358 2359 end = self._prev 2360 self._advance() 2361 2362 if paren > 0: 2363 self.raise_error("Expecting )", self._curr) 2364 2365 pattern = exp.var(self._find_sql(start, end)) 2366 else: 2367 pattern = None 2368 2369 define = ( 2370 self._parse_csv( 2371 lambda: self.expression( 2372 exp.Alias, 2373 alias=self._parse_id_var(any_token=True), 2374 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2375 ) 2376 ) 2377 if self._match_text_seq("DEFINE") 2378 else None 2379 ) 2380 2381 self._match_r_paren() 2382 2383 return self.expression( 2384 exp.MatchRecognize, 2385 partition_by=partition, 2386 order=order, 2387 measures=measures, 2388 rows=rows, 2389 after=after, 2390 pattern=pattern, 2391 define=define, 2392 alias=self._parse_table_alias(), 2393 ) 2394 2395 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2396 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2397 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2398 2399 if outer_apply or cross_apply: 2400 this = self._parse_select(table=True) 2401 view = None 2402 outer = not cross_apply 2403 elif self._match(TokenType.LATERAL): 2404 this = self._parse_select(table=True) 2405 view = self._match(TokenType.VIEW) 2406 outer = self._match(TokenType.OUTER) 2407 else: 2408 return None 2409 2410 if not this: 2411 this = ( 2412 self._parse_unnest() 2413 or self._parse_function() 2414 or self._parse_id_var(any_token=False) 2415 ) 2416 2417 while self._match(TokenType.DOT): 2418 this = exp.Dot( 2419 this=this, 2420 expression=self._parse_function() or self._parse_id_var(any_token=False), 2421 ) 2422 2423 if view: 2424 table = self._parse_id_var(any_token=False) 2425 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2426 table_alias: t.Optional[exp.TableAlias] = self.expression( 2427 exp.TableAlias, this=table, columns=columns 2428 ) 2429 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2430 # We move the alias from the lateral's child node to the lateral itself 2431 table_alias = this.args["alias"].pop() 2432 else: 2433 table_alias = self._parse_table_alias() 2434 2435 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2436 2437 def _parse_join_parts( 2438 self, 2439 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2440 return ( 2441 self._match_set(self.JOIN_METHODS) and self._prev, 2442 self._match_set(self.JOIN_SIDES) and self._prev, 2443 self._match_set(self.JOIN_KINDS) and self._prev, 2444 ) 2445 2446 def _parse_join( 2447 self, skip_join_token: bool = False, parse_bracket: bool = False 2448 ) -> t.Optional[exp.Join]: 2449 if self._match(TokenType.COMMA): 2450 return self.expression(exp.Join, this=self._parse_table()) 2451 2452 index = self._index 2453 method, side, kind = self._parse_join_parts() 2454 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2455 join = self._match(TokenType.JOIN) 2456 2457 if not skip_join_token and not join: 2458 self._retreat(index) 2459 kind = None 2460 method = None 2461 side = None 2462 2463 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2464 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2465 2466 if not skip_join_token and not join and not outer_apply and not cross_apply: 2467 return None 2468 2469 if outer_apply: 2470 side = Token(TokenType.LEFT, "LEFT") 2471 2472 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2473 2474 if method: 2475 kwargs["method"] = method.text 2476 if side: 2477 kwargs["side"] = side.text 2478 if kind: 2479 kwargs["kind"] = kind.text 2480 if hint: 2481 kwargs["hint"] = hint 2482 2483 if self._match(TokenType.ON): 2484 kwargs["on"] = self._parse_conjunction() 2485 elif self._match(TokenType.USING): 2486 kwargs["using"] = self._parse_wrapped_id_vars() 2487 elif not (kind and kind.token_type == TokenType.CROSS): 2488 index = self._index 2489 join = self._parse_join() 2490 2491 if join and self._match(TokenType.ON): 2492 kwargs["on"] = self._parse_conjunction() 2493 elif join and self._match(TokenType.USING): 2494 kwargs["using"] = self._parse_wrapped_id_vars() 2495 else: 2496 join = None 2497 self._retreat(index) 2498 2499 kwargs["this"].set("joins", [join] if join else None) 2500 2501 comments = [c for token in (method, side, kind) if token for c in token.comments] 2502 return self.expression(exp.Join, comments=comments, **kwargs) 2503 2504 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2505 this = self._parse_conjunction() 2506 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2507 return this 2508 2509 opclass = self._parse_var(any_token=True) 2510 if opclass: 2511 return self.expression(exp.Opclass, this=this, expression=opclass) 2512 2513 return this 2514 2515 def _parse_index( 2516 self, 2517 index: t.Optional[exp.Expression] = None, 2518 ) -> t.Optional[exp.Index]: 2519 if index: 2520 unique = None 2521 primary = None 2522 amp = None 2523 2524 self._match(TokenType.ON) 2525 self._match(TokenType.TABLE) # hive 2526 table = self._parse_table_parts(schema=True) 2527 else: 2528 unique = self._match(TokenType.UNIQUE) 2529 primary = self._match_text_seq("PRIMARY") 2530 amp = self._match_text_seq("AMP") 2531 2532 if not self._match(TokenType.INDEX): 2533 return None 2534 2535 index = self._parse_id_var() 2536 table = None 2537 2538 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2539 2540 if self._match(TokenType.L_PAREN, advance=False): 2541 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2542 else: 2543 columns = None 2544 2545 return self.expression( 2546 exp.Index, 2547 this=index, 2548 table=table, 2549 using=using, 2550 columns=columns, 2551 unique=unique, 2552 primary=primary, 2553 amp=amp, 2554 partition_by=self._parse_partition_by(), 2555 where=self._parse_where(), 2556 ) 2557 2558 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2559 hints: t.List[exp.Expression] = [] 2560 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2561 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2562 hints.append( 2563 self.expression( 2564 exp.WithTableHint, 2565 expressions=self._parse_csv( 2566 lambda: self._parse_function() or self._parse_var(any_token=True) 2567 ), 2568 ) 2569 ) 2570 self._match_r_paren() 2571 else: 2572 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2573 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2574 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2575 2576 self._match_texts({"INDEX", "KEY"}) 2577 if self._match(TokenType.FOR): 2578 hint.set("target", self._advance_any() and self._prev.text.upper()) 2579 2580 hint.set("expressions", self._parse_wrapped_id_vars()) 2581 hints.append(hint) 2582 2583 return hints or None 2584 2585 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2586 return ( 2587 (not schema and self._parse_function(optional_parens=False)) 2588 or self._parse_id_var(any_token=False) 2589 or self._parse_string_as_identifier() 2590 or self._parse_placeholder() 2591 ) 2592 2593 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2594 catalog = None 2595 db = None 2596 table = self._parse_table_part(schema=schema) 2597 2598 while self._match(TokenType.DOT): 2599 if catalog: 2600 # This allows nesting the table in arbitrarily many dot expressions if needed 2601 table = self.expression( 2602 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2603 ) 2604 else: 2605 catalog = db 2606 db = table 2607 table = self._parse_table_part(schema=schema) 2608 2609 if not table: 2610 self.raise_error(f"Expected table name but got {self._curr}") 2611 2612 return self.expression( 2613 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2614 ) 2615 2616 def _parse_table( 2617 self, 2618 schema: bool = False, 2619 joins: bool = False, 2620 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2621 parse_bracket: bool = False, 2622 ) -> t.Optional[exp.Expression]: 2623 lateral = self._parse_lateral() 2624 if lateral: 2625 return lateral 2626 2627 unnest = self._parse_unnest() 2628 if unnest: 2629 return unnest 2630 2631 values = self._parse_derived_table_values() 2632 if values: 2633 return values 2634 2635 subquery = self._parse_select(table=True) 2636 if subquery: 2637 if not subquery.args.get("pivots"): 2638 subquery.set("pivots", self._parse_pivots()) 2639 return subquery 2640 2641 bracket = parse_bracket and self._parse_bracket(None) 2642 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2643 this = t.cast( 2644 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2645 ) 2646 2647 if schema: 2648 return self._parse_schema(this=this) 2649 2650 version = self._parse_version() 2651 2652 if version: 2653 this.set("version", version) 2654 2655 if self.ALIAS_POST_TABLESAMPLE: 2656 table_sample = self._parse_table_sample() 2657 2658 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2659 if alias: 2660 this.set("alias", alias) 2661 2662 if self._match_text_seq("AT"): 2663 this.set("index", self._parse_id_var()) 2664 2665 this.set("hints", self._parse_table_hints()) 2666 2667 if not this.args.get("pivots"): 2668 this.set("pivots", self._parse_pivots()) 2669 2670 if not self.ALIAS_POST_TABLESAMPLE: 2671 table_sample = self._parse_table_sample() 2672 2673 if table_sample: 2674 table_sample.set("this", this) 2675 this = table_sample 2676 2677 if joins: 2678 for join in iter(self._parse_join, None): 2679 this.append("joins", join) 2680 2681 return this 2682 2683 def _parse_version(self) -> t.Optional[exp.Version]: 2684 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2685 this = "TIMESTAMP" 2686 elif self._match(TokenType.VERSION_SNAPSHOT): 2687 this = "VERSION" 2688 else: 2689 return None 2690 2691 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2692 kind = self._prev.text.upper() 2693 start = self._parse_bitwise() 2694 self._match_texts(("TO", "AND")) 2695 end = self._parse_bitwise() 2696 expression: t.Optional[exp.Expression] = self.expression( 2697 exp.Tuple, expressions=[start, end] 2698 ) 2699 elif self._match_text_seq("CONTAINED", "IN"): 2700 kind = "CONTAINED IN" 2701 expression = self.expression( 2702 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2703 ) 2704 elif self._match(TokenType.ALL): 2705 kind = "ALL" 2706 expression = None 2707 else: 2708 self._match_text_seq("AS", "OF") 2709 kind = "AS OF" 2710 expression = self._parse_type() 2711 2712 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2713 2714 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2715 if not self._match(TokenType.UNNEST): 2716 return None 2717 2718 expressions = self._parse_wrapped_csv(self._parse_type) 2719 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2720 2721 alias = self._parse_table_alias() if with_alias else None 2722 2723 if alias: 2724 if self.UNNEST_COLUMN_ONLY: 2725 if alias.args.get("columns"): 2726 self.raise_error("Unexpected extra column alias in unnest.") 2727 2728 alias.set("columns", [alias.this]) 2729 alias.set("this", None) 2730 2731 columns = alias.args.get("columns") or [] 2732 if offset and len(expressions) < len(columns): 2733 offset = columns.pop() 2734 2735 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2736 self._match(TokenType.ALIAS) 2737 offset = self._parse_id_var( 2738 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2739 ) or exp.to_identifier("offset") 2740 2741 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2742 2743 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2744 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2745 if not is_derived and not self._match(TokenType.VALUES): 2746 return None 2747 2748 expressions = self._parse_csv(self._parse_value) 2749 alias = self._parse_table_alias() 2750 2751 if is_derived: 2752 self._match_r_paren() 2753 2754 return self.expression( 2755 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2756 ) 2757 2758 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2759 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2760 as_modifier and self._match_text_seq("USING", "SAMPLE") 2761 ): 2762 return None 2763 2764 bucket_numerator = None 2765 bucket_denominator = None 2766 bucket_field = None 2767 percent = None 2768 rows = None 2769 size = None 2770 seed = None 2771 2772 kind = ( 2773 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2774 ) 2775 method = self._parse_var(tokens=(TokenType.ROW,)) 2776 2777 matched_l_paren = self._match(TokenType.L_PAREN) 2778 2779 if self.TABLESAMPLE_CSV: 2780 num = None 2781 expressions = self._parse_csv(self._parse_primary) 2782 else: 2783 expressions = None 2784 num = ( 2785 self._parse_factor() 2786 if self._match(TokenType.NUMBER, advance=False) 2787 else self._parse_primary() 2788 ) 2789 2790 if self._match_text_seq("BUCKET"): 2791 bucket_numerator = self._parse_number() 2792 self._match_text_seq("OUT", "OF") 2793 bucket_denominator = bucket_denominator = self._parse_number() 2794 self._match(TokenType.ON) 2795 bucket_field = self._parse_field() 2796 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2797 percent = num 2798 elif self._match(TokenType.ROWS): 2799 rows = num 2800 elif num: 2801 size = num 2802 2803 if matched_l_paren: 2804 self._match_r_paren() 2805 2806 if self._match(TokenType.L_PAREN): 2807 method = self._parse_var() 2808 seed = self._match(TokenType.COMMA) and self._parse_number() 2809 self._match_r_paren() 2810 elif self._match_texts(("SEED", "REPEATABLE")): 2811 seed = self._parse_wrapped(self._parse_number) 2812 2813 return self.expression( 2814 exp.TableSample, 2815 expressions=expressions, 2816 method=method, 2817 bucket_numerator=bucket_numerator, 2818 bucket_denominator=bucket_denominator, 2819 bucket_field=bucket_field, 2820 percent=percent, 2821 rows=rows, 2822 size=size, 2823 seed=seed, 2824 kind=kind, 2825 ) 2826 2827 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2828 return list(iter(self._parse_pivot, None)) or None 2829 2830 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2831 return list(iter(self._parse_join, None)) or None 2832 2833 # https://duckdb.org/docs/sql/statements/pivot 2834 def _parse_simplified_pivot(self) -> exp.Pivot: 2835 def _parse_on() -> t.Optional[exp.Expression]: 2836 this = self._parse_bitwise() 2837 return self._parse_in(this) if self._match(TokenType.IN) else this 2838 2839 this = self._parse_table() 2840 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2841 using = self._match(TokenType.USING) and self._parse_csv( 2842 lambda: self._parse_alias(self._parse_function()) 2843 ) 2844 group = self._parse_group() 2845 return self.expression( 2846 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2847 ) 2848 2849 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2850 index = self._index 2851 include_nulls = None 2852 2853 if self._match(TokenType.PIVOT): 2854 unpivot = False 2855 elif self._match(TokenType.UNPIVOT): 2856 unpivot = True 2857 2858 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2859 if self._match_text_seq("INCLUDE", "NULLS"): 2860 include_nulls = True 2861 elif self._match_text_seq("EXCLUDE", "NULLS"): 2862 include_nulls = False 2863 else: 2864 return None 2865 2866 expressions = [] 2867 field = None 2868 2869 if not self._match(TokenType.L_PAREN): 2870 self._retreat(index) 2871 return None 2872 2873 if unpivot: 2874 expressions = self._parse_csv(self._parse_column) 2875 else: 2876 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2877 2878 if not expressions: 2879 self.raise_error("Failed to parse PIVOT's aggregation list") 2880 2881 if not self._match(TokenType.FOR): 2882 self.raise_error("Expecting FOR") 2883 2884 value = self._parse_column() 2885 2886 if not self._match(TokenType.IN): 2887 self.raise_error("Expecting IN") 2888 2889 field = self._parse_in(value, alias=True) 2890 2891 self._match_r_paren() 2892 2893 pivot = self.expression( 2894 exp.Pivot, 2895 expressions=expressions, 2896 field=field, 2897 unpivot=unpivot, 2898 include_nulls=include_nulls, 2899 ) 2900 2901 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2902 pivot.set("alias", self._parse_table_alias()) 2903 2904 if not unpivot: 2905 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2906 2907 columns: t.List[exp.Expression] = [] 2908 for fld in pivot.args["field"].expressions: 2909 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2910 for name in names: 2911 if self.PREFIXED_PIVOT_COLUMNS: 2912 name = f"{name}_{field_name}" if name else field_name 2913 else: 2914 name = f"{field_name}_{name}" if name else field_name 2915 2916 columns.append(exp.to_identifier(name)) 2917 2918 pivot.set("columns", columns) 2919 2920 return pivot 2921 2922 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2923 return [agg.alias for agg in aggregations] 2924 2925 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2926 if not skip_where_token and not self._match(TokenType.WHERE): 2927 return None 2928 2929 return self.expression( 2930 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2931 ) 2932 2933 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2934 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2935 return None 2936 2937 elements = defaultdict(list) 2938 2939 if self._match(TokenType.ALL): 2940 return self.expression(exp.Group, all=True) 2941 2942 while True: 2943 expressions = self._parse_csv(self._parse_conjunction) 2944 if expressions: 2945 elements["expressions"].extend(expressions) 2946 2947 grouping_sets = self._parse_grouping_sets() 2948 if grouping_sets: 2949 elements["grouping_sets"].extend(grouping_sets) 2950 2951 rollup = None 2952 cube = None 2953 totals = None 2954 2955 index = self._index 2956 with_ = self._match(TokenType.WITH) 2957 if self._match(TokenType.ROLLUP): 2958 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2959 elements["rollup"].extend(ensure_list(rollup)) 2960 2961 if self._match(TokenType.CUBE): 2962 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2963 elements["cube"].extend(ensure_list(cube)) 2964 2965 if self._match_text_seq("TOTALS"): 2966 totals = True 2967 elements["totals"] = True # type: ignore 2968 2969 if not (grouping_sets or rollup or cube or totals): 2970 if with_: 2971 self._retreat(index) 2972 break 2973 2974 return self.expression(exp.Group, **elements) # type: ignore 2975 2976 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2977 if not self._match(TokenType.GROUPING_SETS): 2978 return None 2979 2980 return self._parse_wrapped_csv(self._parse_grouping_set) 2981 2982 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2983 if self._match(TokenType.L_PAREN): 2984 grouping_set = self._parse_csv(self._parse_column) 2985 self._match_r_paren() 2986 return self.expression(exp.Tuple, expressions=grouping_set) 2987 2988 return self._parse_column() 2989 2990 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2991 if not skip_having_token and not self._match(TokenType.HAVING): 2992 return None 2993 return self.expression(exp.Having, this=self._parse_conjunction()) 2994 2995 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2996 if not self._match(TokenType.QUALIFY): 2997 return None 2998 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2999 3000 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3001 if skip_start_token: 3002 start = None 3003 elif self._match(TokenType.START_WITH): 3004 start = self._parse_conjunction() 3005 else: 3006 return None 3007 3008 self._match(TokenType.CONNECT_BY) 3009 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3010 exp.Prior, this=self._parse_bitwise() 3011 ) 3012 connect = self._parse_conjunction() 3013 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3014 3015 if not start and self._match(TokenType.START_WITH): 3016 start = self._parse_conjunction() 3017 3018 return self.expression(exp.Connect, start=start, connect=connect) 3019 3020 def _parse_order( 3021 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3022 ) -> t.Optional[exp.Expression]: 3023 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3024 return this 3025 3026 return self.expression( 3027 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3028 ) 3029 3030 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3031 if not self._match(token): 3032 return None 3033 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3034 3035 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3036 this = parse_method() if parse_method else self._parse_conjunction() 3037 3038 asc = self._match(TokenType.ASC) 3039 desc = self._match(TokenType.DESC) or (asc and False) 3040 3041 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3042 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3043 3044 nulls_first = is_nulls_first or False 3045 explicitly_null_ordered = is_nulls_first or is_nulls_last 3046 3047 if ( 3048 not explicitly_null_ordered 3049 and ( 3050 (not desc and self.NULL_ORDERING == "nulls_are_small") 3051 or (desc and self.NULL_ORDERING != "nulls_are_small") 3052 ) 3053 and self.NULL_ORDERING != "nulls_are_last" 3054 ): 3055 nulls_first = True 3056 3057 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3058 3059 def _parse_limit( 3060 self, this: t.Optional[exp.Expression] = None, top: bool = False 3061 ) -> t.Optional[exp.Expression]: 3062 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3063 comments = self._prev_comments 3064 if top: 3065 limit_paren = self._match(TokenType.L_PAREN) 3066 expression = self._parse_number() 3067 3068 if limit_paren: 3069 self._match_r_paren() 3070 else: 3071 expression = self._parse_term() 3072 3073 if self._match(TokenType.COMMA): 3074 offset = expression 3075 expression = self._parse_term() 3076 else: 3077 offset = None 3078 3079 limit_exp = self.expression( 3080 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3081 ) 3082 3083 return limit_exp 3084 3085 if self._match(TokenType.FETCH): 3086 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3087 direction = self._prev.text if direction else "FIRST" 3088 3089 count = self._parse_field(tokens=self.FETCH_TOKENS) 3090 percent = self._match(TokenType.PERCENT) 3091 3092 self._match_set((TokenType.ROW, TokenType.ROWS)) 3093 3094 only = self._match_text_seq("ONLY") 3095 with_ties = self._match_text_seq("WITH", "TIES") 3096 3097 if only and with_ties: 3098 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3099 3100 return self.expression( 3101 exp.Fetch, 3102 direction=direction, 3103 count=count, 3104 percent=percent, 3105 with_ties=with_ties, 3106 ) 3107 3108 return this 3109 3110 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3111 if not self._match(TokenType.OFFSET): 3112 return this 3113 3114 count = self._parse_term() 3115 self._match_set((TokenType.ROW, TokenType.ROWS)) 3116 return self.expression(exp.Offset, this=this, expression=count) 3117 3118 def _parse_locks(self) -> t.List[exp.Lock]: 3119 locks = [] 3120 while True: 3121 if self._match_text_seq("FOR", "UPDATE"): 3122 update = True 3123 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3124 "LOCK", "IN", "SHARE", "MODE" 3125 ): 3126 update = False 3127 else: 3128 break 3129 3130 expressions = None 3131 if self._match_text_seq("OF"): 3132 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3133 3134 wait: t.Optional[bool | exp.Expression] = None 3135 if self._match_text_seq("NOWAIT"): 3136 wait = True 3137 elif self._match_text_seq("WAIT"): 3138 wait = self._parse_primary() 3139 elif self._match_text_seq("SKIP", "LOCKED"): 3140 wait = False 3141 3142 locks.append( 3143 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3144 ) 3145 3146 return locks 3147 3148 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3149 if not self._match_set(self.SET_OPERATIONS): 3150 return this 3151 3152 token_type = self._prev.token_type 3153 3154 if token_type == TokenType.UNION: 3155 expression = exp.Union 3156 elif token_type == TokenType.EXCEPT: 3157 expression = exp.Except 3158 else: 3159 expression = exp.Intersect 3160 3161 return self.expression( 3162 expression, 3163 comments=self._prev.comments, 3164 this=this, 3165 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3166 by_name=self._match_text_seq("BY", "NAME"), 3167 expression=self._parse_set_operations(self._parse_select(nested=True)), 3168 ) 3169 3170 def _parse_expression(self) -> t.Optional[exp.Expression]: 3171 return self._parse_alias(self._parse_conjunction()) 3172 3173 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3174 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3175 3176 def _parse_equality(self) -> t.Optional[exp.Expression]: 3177 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3178 3179 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3180 return self._parse_tokens(self._parse_range, self.COMPARISON) 3181 3182 def _parse_range(self) -> t.Optional[exp.Expression]: 3183 this = self._parse_bitwise() 3184 negate = self._match(TokenType.NOT) 3185 3186 if self._match_set(self.RANGE_PARSERS): 3187 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3188 if not expression: 3189 return this 3190 3191 this = expression 3192 elif self._match(TokenType.ISNULL): 3193 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3194 3195 # Postgres supports ISNULL and NOTNULL for conditions. 3196 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3197 if self._match(TokenType.NOTNULL): 3198 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3199 this = self.expression(exp.Not, this=this) 3200 3201 if negate: 3202 this = self.expression(exp.Not, this=this) 3203 3204 if self._match(TokenType.IS): 3205 this = self._parse_is(this) 3206 3207 return this 3208 3209 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3210 index = self._index - 1 3211 negate = self._match(TokenType.NOT) 3212 3213 if self._match_text_seq("DISTINCT", "FROM"): 3214 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3215 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3216 3217 expression = self._parse_null() or self._parse_boolean() 3218 if not expression: 3219 self._retreat(index) 3220 return None 3221 3222 this = self.expression(exp.Is, this=this, expression=expression) 3223 return self.expression(exp.Not, this=this) if negate else this 3224 3225 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3226 unnest = self._parse_unnest(with_alias=False) 3227 if unnest: 3228 this = self.expression(exp.In, this=this, unnest=unnest) 3229 elif self._match(TokenType.L_PAREN): 3230 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3231 3232 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3233 this = self.expression(exp.In, this=this, query=expressions[0]) 3234 else: 3235 this = self.expression(exp.In, this=this, expressions=expressions) 3236 3237 self._match_r_paren(this) 3238 else: 3239 this = self.expression(exp.In, this=this, field=self._parse_field()) 3240 3241 return this 3242 3243 def _parse_between(self, this: exp.Expression) -> exp.Between: 3244 low = self._parse_bitwise() 3245 self._match(TokenType.AND) 3246 high = self._parse_bitwise() 3247 return self.expression(exp.Between, this=this, low=low, high=high) 3248 3249 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3250 if not self._match(TokenType.ESCAPE): 3251 return this 3252 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3253 3254 def _parse_interval(self) -> t.Optional[exp.Interval]: 3255 index = self._index 3256 3257 if not self._match(TokenType.INTERVAL): 3258 return None 3259 3260 if self._match(TokenType.STRING, advance=False): 3261 this = self._parse_primary() 3262 else: 3263 this = self._parse_term() 3264 3265 if not this: 3266 self._retreat(index) 3267 return None 3268 3269 unit = self._parse_function() or self._parse_var(any_token=True) 3270 3271 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3272 # each INTERVAL expression into this canonical form so it's easy to transpile 3273 if this and this.is_number: 3274 this = exp.Literal.string(this.name) 3275 elif this and this.is_string: 3276 parts = this.name.split() 3277 3278 if len(parts) == 2: 3279 if unit: 3280 # This is not actually a unit, it's something else (e.g. a "window side") 3281 unit = None 3282 self._retreat(self._index - 1) 3283 3284 this = exp.Literal.string(parts[0]) 3285 unit = self.expression(exp.Var, this=parts[1]) 3286 3287 return self.expression(exp.Interval, this=this, unit=unit) 3288 3289 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3290 this = self._parse_term() 3291 3292 while True: 3293 if self._match_set(self.BITWISE): 3294 this = self.expression( 3295 self.BITWISE[self._prev.token_type], 3296 this=this, 3297 expression=self._parse_term(), 3298 ) 3299 elif self._match(TokenType.DQMARK): 3300 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3301 elif self._match_pair(TokenType.LT, TokenType.LT): 3302 this = self.expression( 3303 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3304 ) 3305 elif self._match_pair(TokenType.GT, TokenType.GT): 3306 this = self.expression( 3307 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3308 ) 3309 else: 3310 break 3311 3312 return this 3313 3314 def _parse_term(self) -> t.Optional[exp.Expression]: 3315 return self._parse_tokens(self._parse_factor, self.TERM) 3316 3317 def _parse_factor(self) -> t.Optional[exp.Expression]: 3318 return self._parse_tokens(self._parse_unary, self.FACTOR) 3319 3320 def _parse_unary(self) -> t.Optional[exp.Expression]: 3321 if self._match_set(self.UNARY_PARSERS): 3322 return self.UNARY_PARSERS[self._prev.token_type](self) 3323 return self._parse_at_time_zone(self._parse_type()) 3324 3325 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3326 interval = parse_interval and self._parse_interval() 3327 if interval: 3328 return interval 3329 3330 index = self._index 3331 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3332 this = self._parse_column() 3333 3334 if data_type: 3335 if isinstance(this, exp.Literal): 3336 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3337 if parser: 3338 return parser(self, this, data_type) 3339 return self.expression(exp.Cast, this=this, to=data_type) 3340 if not data_type.expressions: 3341 self._retreat(index) 3342 return self._parse_column() 3343 return self._parse_column_ops(data_type) 3344 3345 return this and self._parse_column_ops(this) 3346 3347 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3348 this = self._parse_type() 3349 if not this: 3350 return None 3351 3352 return self.expression( 3353 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3354 ) 3355 3356 def _parse_types( 3357 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3358 ) -> t.Optional[exp.Expression]: 3359 index = self._index 3360 3361 prefix = self._match_text_seq("SYSUDTLIB", ".") 3362 3363 if not self._match_set(self.TYPE_TOKENS): 3364 identifier = allow_identifiers and self._parse_id_var( 3365 any_token=False, tokens=(TokenType.VAR,) 3366 ) 3367 3368 if identifier: 3369 tokens = self._tokenizer.tokenize(identifier.name) 3370 3371 if len(tokens) != 1: 3372 self.raise_error("Unexpected identifier", self._prev) 3373 3374 if tokens[0].token_type in self.TYPE_TOKENS: 3375 self._prev = tokens[0] 3376 elif self.SUPPORTS_USER_DEFINED_TYPES: 3377 type_name = identifier.name 3378 3379 while self._match(TokenType.DOT): 3380 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3381 3382 return exp.DataType.build(type_name, udt=True) 3383 else: 3384 return None 3385 else: 3386 return None 3387 3388 type_token = self._prev.token_type 3389 3390 if type_token == TokenType.PSEUDO_TYPE: 3391 return self.expression(exp.PseudoType, this=self._prev.text) 3392 3393 if type_token == TokenType.OBJECT_IDENTIFIER: 3394 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3395 3396 nested = type_token in self.NESTED_TYPE_TOKENS 3397 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3398 expressions = None 3399 maybe_func = False 3400 3401 if self._match(TokenType.L_PAREN): 3402 if is_struct: 3403 expressions = self._parse_csv(self._parse_struct_types) 3404 elif nested: 3405 expressions = self._parse_csv( 3406 lambda: self._parse_types( 3407 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3408 ) 3409 ) 3410 elif type_token in self.ENUM_TYPE_TOKENS: 3411 expressions = self._parse_csv(self._parse_equality) 3412 else: 3413 expressions = self._parse_csv(self._parse_type_size) 3414 3415 if not expressions or not self._match(TokenType.R_PAREN): 3416 self._retreat(index) 3417 return None 3418 3419 maybe_func = True 3420 3421 this: t.Optional[exp.Expression] = None 3422 values: t.Optional[t.List[exp.Expression]] = None 3423 3424 if nested and self._match(TokenType.LT): 3425 if is_struct: 3426 expressions = self._parse_csv(self._parse_struct_types) 3427 else: 3428 expressions = self._parse_csv( 3429 lambda: self._parse_types( 3430 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3431 ) 3432 ) 3433 3434 if not self._match(TokenType.GT): 3435 self.raise_error("Expecting >") 3436 3437 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3438 values = self._parse_csv(self._parse_conjunction) 3439 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3440 3441 if type_token in self.TIMESTAMPS: 3442 if self._match_text_seq("WITH", "TIME", "ZONE"): 3443 maybe_func = False 3444 tz_type = ( 3445 exp.DataType.Type.TIMETZ 3446 if type_token in self.TIMES 3447 else exp.DataType.Type.TIMESTAMPTZ 3448 ) 3449 this = exp.DataType(this=tz_type, expressions=expressions) 3450 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3451 maybe_func = False 3452 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3453 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3454 maybe_func = False 3455 elif type_token == TokenType.INTERVAL: 3456 unit = self._parse_var() 3457 3458 if self._match_text_seq("TO"): 3459 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3460 else: 3461 span = None 3462 3463 if span or not unit: 3464 this = self.expression( 3465 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3466 ) 3467 else: 3468 this = self.expression(exp.Interval, unit=unit) 3469 3470 if maybe_func and check_func: 3471 index2 = self._index 3472 peek = self._parse_string() 3473 3474 if not peek: 3475 self._retreat(index) 3476 return None 3477 3478 self._retreat(index2) 3479 3480 if not this: 3481 if self._match_text_seq("UNSIGNED"): 3482 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3483 if not unsigned_type_token: 3484 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3485 3486 type_token = unsigned_type_token or type_token 3487 3488 this = exp.DataType( 3489 this=exp.DataType.Type[type_token.value], 3490 expressions=expressions, 3491 nested=nested, 3492 values=values, 3493 prefix=prefix, 3494 ) 3495 3496 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3497 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3498 3499 return this 3500 3501 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3502 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3503 self._match(TokenType.COLON) 3504 return self._parse_column_def(this) 3505 3506 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3507 if not self._match_text_seq("AT", "TIME", "ZONE"): 3508 return this 3509 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3510 3511 def _parse_column(self) -> t.Optional[exp.Expression]: 3512 this = self._parse_field() 3513 if isinstance(this, exp.Identifier): 3514 this = self.expression(exp.Column, this=this) 3515 elif not this: 3516 return self._parse_bracket(this) 3517 return self._parse_column_ops(this) 3518 3519 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3520 this = self._parse_bracket(this) 3521 3522 while self._match_set(self.COLUMN_OPERATORS): 3523 op_token = self._prev.token_type 3524 op = self.COLUMN_OPERATORS.get(op_token) 3525 3526 if op_token == TokenType.DCOLON: 3527 field = self._parse_types() 3528 if not field: 3529 self.raise_error("Expected type") 3530 elif op and self._curr: 3531 self._advance() 3532 value = self._prev.text 3533 field = ( 3534 exp.Literal.number(value) 3535 if self._prev.token_type == TokenType.NUMBER 3536 else exp.Literal.string(value) 3537 ) 3538 else: 3539 field = self._parse_field(anonymous_func=True, any_token=True) 3540 3541 if isinstance(field, exp.Func): 3542 # bigquery allows function calls like x.y.count(...) 3543 # SAFE.SUBSTR(...) 3544 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3545 this = self._replace_columns_with_dots(this) 3546 3547 if op: 3548 this = op(self, this, field) 3549 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3550 this = self.expression( 3551 exp.Column, 3552 this=field, 3553 table=this.this, 3554 db=this.args.get("table"), 3555 catalog=this.args.get("db"), 3556 ) 3557 else: 3558 this = self.expression(exp.Dot, this=this, expression=field) 3559 this = self._parse_bracket(this) 3560 return this 3561 3562 def _parse_primary(self) -> t.Optional[exp.Expression]: 3563 if self._match_set(self.PRIMARY_PARSERS): 3564 token_type = self._prev.token_type 3565 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3566 3567 if token_type == TokenType.STRING: 3568 expressions = [primary] 3569 while self._match(TokenType.STRING): 3570 expressions.append(exp.Literal.string(self._prev.text)) 3571 3572 if len(expressions) > 1: 3573 return self.expression(exp.Concat, expressions=expressions) 3574 3575 return primary 3576 3577 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3578 return exp.Literal.number(f"0.{self._prev.text}") 3579 3580 if self._match(TokenType.L_PAREN): 3581 comments = self._prev_comments 3582 query = self._parse_select() 3583 3584 if query: 3585 expressions = [query] 3586 else: 3587 expressions = self._parse_expressions() 3588 3589 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3590 3591 if isinstance(this, exp.Subqueryable): 3592 this = self._parse_set_operations( 3593 self._parse_subquery(this=this, parse_alias=False) 3594 ) 3595 elif len(expressions) > 1: 3596 this = self.expression(exp.Tuple, expressions=expressions) 3597 else: 3598 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3599 3600 if this: 3601 this.add_comments(comments) 3602 3603 self._match_r_paren(expression=this) 3604 return this 3605 3606 return None 3607 3608 def _parse_field( 3609 self, 3610 any_token: bool = False, 3611 tokens: t.Optional[t.Collection[TokenType]] = None, 3612 anonymous_func: bool = False, 3613 ) -> t.Optional[exp.Expression]: 3614 return ( 3615 self._parse_primary() 3616 or self._parse_function(anonymous=anonymous_func) 3617 or self._parse_id_var(any_token=any_token, tokens=tokens) 3618 ) 3619 3620 def _parse_function( 3621 self, 3622 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3623 anonymous: bool = False, 3624 optional_parens: bool = True, 3625 ) -> t.Optional[exp.Expression]: 3626 if not self._curr: 3627 return None 3628 3629 token_type = self._curr.token_type 3630 this = self._curr.text 3631 upper = this.upper() 3632 3633 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3634 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3635 self._advance() 3636 return parser(self) 3637 3638 if not self._next or self._next.token_type != TokenType.L_PAREN: 3639 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3640 self._advance() 3641 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3642 3643 return None 3644 3645 if token_type not in self.FUNC_TOKENS: 3646 return None 3647 3648 self._advance(2) 3649 3650 parser = self.FUNCTION_PARSERS.get(upper) 3651 if parser and not anonymous: 3652 this = parser(self) 3653 else: 3654 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3655 3656 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3657 this = self.expression(subquery_predicate, this=self._parse_select()) 3658 self._match_r_paren() 3659 return this 3660 3661 if functions is None: 3662 functions = self.FUNCTIONS 3663 3664 function = functions.get(upper) 3665 3666 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3667 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3668 3669 if function and not anonymous: 3670 func = self.validate_expression(function(args), args) 3671 if not self.NORMALIZE_FUNCTIONS: 3672 func.meta["name"] = this 3673 this = func 3674 else: 3675 this = self.expression(exp.Anonymous, this=this, expressions=args) 3676 3677 self._match_r_paren(this) 3678 return self._parse_window(this) 3679 3680 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3681 return self._parse_column_def(self._parse_id_var()) 3682 3683 def _parse_user_defined_function( 3684 self, kind: t.Optional[TokenType] = None 3685 ) -> t.Optional[exp.Expression]: 3686 this = self._parse_id_var() 3687 3688 while self._match(TokenType.DOT): 3689 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3690 3691 if not self._match(TokenType.L_PAREN): 3692 return this 3693 3694 expressions = self._parse_csv(self._parse_function_parameter) 3695 self._match_r_paren() 3696 return self.expression( 3697 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3698 ) 3699 3700 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3701 literal = self._parse_primary() 3702 if literal: 3703 return self.expression(exp.Introducer, this=token.text, expression=literal) 3704 3705 return self.expression(exp.Identifier, this=token.text) 3706 3707 def _parse_session_parameter(self) -> exp.SessionParameter: 3708 kind = None 3709 this = self._parse_id_var() or self._parse_primary() 3710 3711 if this and self._match(TokenType.DOT): 3712 kind = this.name 3713 this = self._parse_var() or self._parse_primary() 3714 3715 return self.expression(exp.SessionParameter, this=this, kind=kind) 3716 3717 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3718 index = self._index 3719 3720 if self._match(TokenType.L_PAREN): 3721 expressions = t.cast( 3722 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3723 ) 3724 3725 if not self._match(TokenType.R_PAREN): 3726 self._retreat(index) 3727 else: 3728 expressions = [self._parse_id_var()] 3729 3730 if self._match_set(self.LAMBDAS): 3731 return self.LAMBDAS[self._prev.token_type](self, expressions) 3732 3733 self._retreat(index) 3734 3735 this: t.Optional[exp.Expression] 3736 3737 if self._match(TokenType.DISTINCT): 3738 this = self.expression( 3739 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3740 ) 3741 else: 3742 this = self._parse_select_or_expression(alias=alias) 3743 3744 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3745 3746 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3747 index = self._index 3748 3749 if not self.errors: 3750 try: 3751 if self._parse_select(nested=True): 3752 return this 3753 except ParseError: 3754 pass 3755 finally: 3756 self.errors.clear() 3757 self._retreat(index) 3758 3759 if not self._match(TokenType.L_PAREN): 3760 return this 3761 3762 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3763 3764 self._match_r_paren() 3765 return self.expression(exp.Schema, this=this, expressions=args) 3766 3767 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3768 return self._parse_column_def(self._parse_field(any_token=True)) 3769 3770 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3771 # column defs are not really columns, they're identifiers 3772 if isinstance(this, exp.Column): 3773 this = this.this 3774 3775 kind = self._parse_types(schema=True) 3776 3777 if self._match_text_seq("FOR", "ORDINALITY"): 3778 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3779 3780 constraints: t.List[exp.Expression] = [] 3781 3782 if not kind and self._match(TokenType.ALIAS): 3783 constraints.append( 3784 self.expression( 3785 exp.ComputedColumnConstraint, 3786 this=self._parse_conjunction(), 3787 persisted=self._match_text_seq("PERSISTED"), 3788 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3789 ) 3790 ) 3791 3792 while True: 3793 constraint = self._parse_column_constraint() 3794 if not constraint: 3795 break 3796 constraints.append(constraint) 3797 3798 if not kind and not constraints: 3799 return this 3800 3801 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3802 3803 def _parse_auto_increment( 3804 self, 3805 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3806 start = None 3807 increment = None 3808 3809 if self._match(TokenType.L_PAREN, advance=False): 3810 args = self._parse_wrapped_csv(self._parse_bitwise) 3811 start = seq_get(args, 0) 3812 increment = seq_get(args, 1) 3813 elif self._match_text_seq("START"): 3814 start = self._parse_bitwise() 3815 self._match_text_seq("INCREMENT") 3816 increment = self._parse_bitwise() 3817 3818 if start and increment: 3819 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3820 3821 return exp.AutoIncrementColumnConstraint() 3822 3823 def _parse_compress(self) -> exp.CompressColumnConstraint: 3824 if self._match(TokenType.L_PAREN, advance=False): 3825 return self.expression( 3826 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3827 ) 3828 3829 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3830 3831 def _parse_generated_as_identity( 3832 self, 3833 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.ComputedColumnConstraint: 3834 if self._match_text_seq("BY", "DEFAULT"): 3835 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3836 this = self.expression( 3837 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3838 ) 3839 else: 3840 self._match_text_seq("ALWAYS") 3841 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3842 3843 self._match(TokenType.ALIAS) 3844 identity = self._match_text_seq("IDENTITY") 3845 3846 if self._match(TokenType.L_PAREN): 3847 if self._match(TokenType.START_WITH): 3848 this.set("start", self._parse_bitwise()) 3849 if self._match_text_seq("INCREMENT", "BY"): 3850 this.set("increment", self._parse_bitwise()) 3851 if self._match_text_seq("MINVALUE"): 3852 this.set("minvalue", self._parse_bitwise()) 3853 if self._match_text_seq("MAXVALUE"): 3854 this.set("maxvalue", self._parse_bitwise()) 3855 3856 if self._match_text_seq("CYCLE"): 3857 this.set("cycle", True) 3858 elif self._match_text_seq("NO", "CYCLE"): 3859 this.set("cycle", False) 3860 3861 if not identity: 3862 this.set("expression", self._parse_bitwise()) 3863 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 3864 args = self._parse_csv(self._parse_bitwise) 3865 this.set("start", seq_get(args, 0)) 3866 this.set("increment", seq_get(args, 1)) 3867 3868 self._match_r_paren() 3869 3870 return this 3871 3872 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3873 self._match_text_seq("LENGTH") 3874 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3875 3876 def _parse_not_constraint( 3877 self, 3878 ) -> t.Optional[exp.Expression]: 3879 if self._match_text_seq("NULL"): 3880 return self.expression(exp.NotNullColumnConstraint) 3881 if self._match_text_seq("CASESPECIFIC"): 3882 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3883 if self._match_text_seq("FOR", "REPLICATION"): 3884 return self.expression(exp.NotForReplicationColumnConstraint) 3885 return None 3886 3887 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3888 if self._match(TokenType.CONSTRAINT): 3889 this = self._parse_id_var() 3890 else: 3891 this = None 3892 3893 if self._match_texts(self.CONSTRAINT_PARSERS): 3894 return self.expression( 3895 exp.ColumnConstraint, 3896 this=this, 3897 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3898 ) 3899 3900 return this 3901 3902 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3903 if not self._match(TokenType.CONSTRAINT): 3904 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3905 3906 this = self._parse_id_var() 3907 expressions = [] 3908 3909 while True: 3910 constraint = self._parse_unnamed_constraint() or self._parse_function() 3911 if not constraint: 3912 break 3913 expressions.append(constraint) 3914 3915 return self.expression(exp.Constraint, this=this, expressions=expressions) 3916 3917 def _parse_unnamed_constraint( 3918 self, constraints: t.Optional[t.Collection[str]] = None 3919 ) -> t.Optional[exp.Expression]: 3920 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 3921 constraints or self.CONSTRAINT_PARSERS 3922 ): 3923 return None 3924 3925 constraint = self._prev.text.upper() 3926 if constraint not in self.CONSTRAINT_PARSERS: 3927 self.raise_error(f"No parser found for schema constraint {constraint}.") 3928 3929 return self.CONSTRAINT_PARSERS[constraint](self) 3930 3931 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3932 self._match_text_seq("KEY") 3933 return self.expression( 3934 exp.UniqueColumnConstraint, 3935 this=self._parse_schema(self._parse_id_var(any_token=False)), 3936 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 3937 ) 3938 3939 def _parse_key_constraint_options(self) -> t.List[str]: 3940 options = [] 3941 while True: 3942 if not self._curr: 3943 break 3944 3945 if self._match(TokenType.ON): 3946 action = None 3947 on = self._advance_any() and self._prev.text 3948 3949 if self._match_text_seq("NO", "ACTION"): 3950 action = "NO ACTION" 3951 elif self._match_text_seq("CASCADE"): 3952 action = "CASCADE" 3953 elif self._match_text_seq("RESTRICT"): 3954 action = "RESTRICT" 3955 elif self._match_pair(TokenType.SET, TokenType.NULL): 3956 action = "SET NULL" 3957 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3958 action = "SET DEFAULT" 3959 else: 3960 self.raise_error("Invalid key constraint") 3961 3962 options.append(f"ON {on} {action}") 3963 elif self._match_text_seq("NOT", "ENFORCED"): 3964 options.append("NOT ENFORCED") 3965 elif self._match_text_seq("DEFERRABLE"): 3966 options.append("DEFERRABLE") 3967 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3968 options.append("INITIALLY DEFERRED") 3969 elif self._match_text_seq("NORELY"): 3970 options.append("NORELY") 3971 elif self._match_text_seq("MATCH", "FULL"): 3972 options.append("MATCH FULL") 3973 else: 3974 break 3975 3976 return options 3977 3978 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3979 if match and not self._match(TokenType.REFERENCES): 3980 return None 3981 3982 expressions = None 3983 this = self._parse_table(schema=True) 3984 options = self._parse_key_constraint_options() 3985 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3986 3987 def _parse_foreign_key(self) -> exp.ForeignKey: 3988 expressions = self._parse_wrapped_id_vars() 3989 reference = self._parse_references() 3990 options = {} 3991 3992 while self._match(TokenType.ON): 3993 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3994 self.raise_error("Expected DELETE or UPDATE") 3995 3996 kind = self._prev.text.lower() 3997 3998 if self._match_text_seq("NO", "ACTION"): 3999 action = "NO ACTION" 4000 elif self._match(TokenType.SET): 4001 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4002 action = "SET " + self._prev.text.upper() 4003 else: 4004 self._advance() 4005 action = self._prev.text.upper() 4006 4007 options[kind] = action 4008 4009 return self.expression( 4010 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4011 ) 4012 4013 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4014 return self._parse_field() 4015 4016 def _parse_primary_key( 4017 self, wrapped_optional: bool = False, in_props: bool = False 4018 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4019 desc = ( 4020 self._match_set((TokenType.ASC, TokenType.DESC)) 4021 and self._prev.token_type == TokenType.DESC 4022 ) 4023 4024 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4025 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4026 4027 expressions = self._parse_wrapped_csv( 4028 self._parse_primary_key_part, optional=wrapped_optional 4029 ) 4030 options = self._parse_key_constraint_options() 4031 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4032 4033 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4034 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4035 return this 4036 4037 bracket_kind = self._prev.token_type 4038 4039 if self._match(TokenType.COLON): 4040 expressions: t.List[exp.Expression] = [ 4041 self.expression(exp.Slice, expression=self._parse_conjunction()) 4042 ] 4043 else: 4044 expressions = self._parse_csv( 4045 lambda: self._parse_slice( 4046 self._parse_alias(self._parse_conjunction(), explicit=True) 4047 ) 4048 ) 4049 4050 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4051 if bracket_kind == TokenType.L_BRACE: 4052 this = self.expression(exp.Struct, expressions=expressions) 4053 elif not this or this.name.upper() == "ARRAY": 4054 this = self.expression(exp.Array, expressions=expressions) 4055 else: 4056 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4057 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4058 4059 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4060 self.raise_error("Expected ]") 4061 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4062 self.raise_error("Expected }") 4063 4064 self._add_comments(this) 4065 return self._parse_bracket(this) 4066 4067 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4068 if self._match(TokenType.COLON): 4069 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4070 return this 4071 4072 def _parse_case(self) -> t.Optional[exp.Expression]: 4073 ifs = [] 4074 default = None 4075 4076 comments = self._prev_comments 4077 expression = self._parse_conjunction() 4078 4079 while self._match(TokenType.WHEN): 4080 this = self._parse_conjunction() 4081 self._match(TokenType.THEN) 4082 then = self._parse_conjunction() 4083 ifs.append(self.expression(exp.If, this=this, true=then)) 4084 4085 if self._match(TokenType.ELSE): 4086 default = self._parse_conjunction() 4087 4088 if not self._match(TokenType.END): 4089 self.raise_error("Expected END after CASE", self._prev) 4090 4091 return self._parse_window( 4092 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4093 ) 4094 4095 def _parse_if(self) -> t.Optional[exp.Expression]: 4096 if self._match(TokenType.L_PAREN): 4097 args = self._parse_csv(self._parse_conjunction) 4098 this = self.validate_expression(exp.If.from_arg_list(args), args) 4099 self._match_r_paren() 4100 else: 4101 index = self._index - 1 4102 condition = self._parse_conjunction() 4103 4104 if not condition: 4105 self._retreat(index) 4106 return None 4107 4108 self._match(TokenType.THEN) 4109 true = self._parse_conjunction() 4110 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4111 self._match(TokenType.END) 4112 this = self.expression(exp.If, this=condition, true=true, false=false) 4113 4114 return self._parse_window(this) 4115 4116 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4117 if not self._match_text_seq("VALUE", "FOR"): 4118 self._retreat(self._index - 1) 4119 return None 4120 4121 return self.expression( 4122 exp.NextValueFor, 4123 this=self._parse_column(), 4124 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4125 ) 4126 4127 def _parse_extract(self) -> exp.Extract: 4128 this = self._parse_function() or self._parse_var() or self._parse_type() 4129 4130 if self._match(TokenType.FROM): 4131 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4132 4133 if not self._match(TokenType.COMMA): 4134 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4135 4136 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4137 4138 def _parse_any_value(self) -> exp.AnyValue: 4139 this = self._parse_lambda() 4140 is_max = None 4141 having = None 4142 4143 if self._match(TokenType.HAVING): 4144 self._match_texts(("MAX", "MIN")) 4145 is_max = self._prev.text == "MAX" 4146 having = self._parse_column() 4147 4148 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4149 4150 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4151 this = self._parse_conjunction() 4152 4153 if not self._match(TokenType.ALIAS): 4154 if self._match(TokenType.COMMA): 4155 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4156 4157 self.raise_error("Expected AS after CAST") 4158 4159 fmt = None 4160 to = self._parse_types() 4161 4162 if not to: 4163 self.raise_error("Expected TYPE after CAST") 4164 elif isinstance(to, exp.Identifier): 4165 to = exp.DataType.build(to.name, udt=True) 4166 elif to.this == exp.DataType.Type.CHAR: 4167 if self._match(TokenType.CHARACTER_SET): 4168 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4169 elif self._match(TokenType.FORMAT): 4170 fmt_string = self._parse_string() 4171 fmt = self._parse_at_time_zone(fmt_string) 4172 4173 if to.this in exp.DataType.TEMPORAL_TYPES: 4174 this = self.expression( 4175 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4176 this=this, 4177 format=exp.Literal.string( 4178 format_time( 4179 fmt_string.this if fmt_string else "", 4180 self.FORMAT_MAPPING or self.TIME_MAPPING, 4181 self.FORMAT_TRIE or self.TIME_TRIE, 4182 ) 4183 ), 4184 ) 4185 4186 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4187 this.set("zone", fmt.args["zone"]) 4188 4189 return this 4190 4191 return self.expression( 4192 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4193 ) 4194 4195 def _parse_concat(self) -> t.Optional[exp.Expression]: 4196 args = self._parse_csv(self._parse_conjunction) 4197 if self.CONCAT_NULL_OUTPUTS_STRING: 4198 args = self._ensure_string_if_null(args) 4199 4200 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4201 # we find such a call we replace it with its argument. 4202 if len(args) == 1: 4203 return args[0] 4204 4205 return self.expression( 4206 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4207 ) 4208 4209 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4210 args = self._parse_csv(self._parse_conjunction) 4211 if len(args) < 2: 4212 return self.expression(exp.ConcatWs, expressions=args) 4213 delim, *values = args 4214 if self.CONCAT_NULL_OUTPUTS_STRING: 4215 values = self._ensure_string_if_null(values) 4216 4217 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4218 4219 def _parse_string_agg(self) -> exp.Expression: 4220 if self._match(TokenType.DISTINCT): 4221 args: t.List[t.Optional[exp.Expression]] = [ 4222 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4223 ] 4224 if self._match(TokenType.COMMA): 4225 args.extend(self._parse_csv(self._parse_conjunction)) 4226 else: 4227 args = self._parse_csv(self._parse_conjunction) # type: ignore 4228 4229 index = self._index 4230 if not self._match(TokenType.R_PAREN) and args: 4231 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4232 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4233 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4234 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4235 4236 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4237 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4238 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4239 if not self._match_text_seq("WITHIN", "GROUP"): 4240 self._retreat(index) 4241 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4242 4243 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4244 order = self._parse_order(this=seq_get(args, 0)) 4245 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4246 4247 def _parse_convert( 4248 self, strict: bool, safe: t.Optional[bool] = None 4249 ) -> t.Optional[exp.Expression]: 4250 this = self._parse_bitwise() 4251 4252 if self._match(TokenType.USING): 4253 to: t.Optional[exp.Expression] = self.expression( 4254 exp.CharacterSet, this=self._parse_var() 4255 ) 4256 elif self._match(TokenType.COMMA): 4257 to = self._parse_types() 4258 else: 4259 to = None 4260 4261 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4262 4263 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4264 """ 4265 There are generally two variants of the DECODE function: 4266 4267 - DECODE(bin, charset) 4268 - DECODE(expression, search, result [, search, result] ... [, default]) 4269 4270 The second variant will always be parsed into a CASE expression. Note that NULL 4271 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4272 instead of relying on pattern matching. 4273 """ 4274 args = self._parse_csv(self._parse_conjunction) 4275 4276 if len(args) < 3: 4277 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4278 4279 expression, *expressions = args 4280 if not expression: 4281 return None 4282 4283 ifs = [] 4284 for search, result in zip(expressions[::2], expressions[1::2]): 4285 if not search or not result: 4286 return None 4287 4288 if isinstance(search, exp.Literal): 4289 ifs.append( 4290 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4291 ) 4292 elif isinstance(search, exp.Null): 4293 ifs.append( 4294 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4295 ) 4296 else: 4297 cond = exp.or_( 4298 exp.EQ(this=expression.copy(), expression=search), 4299 exp.and_( 4300 exp.Is(this=expression.copy(), expression=exp.Null()), 4301 exp.Is(this=search.copy(), expression=exp.Null()), 4302 copy=False, 4303 ), 4304 copy=False, 4305 ) 4306 ifs.append(exp.If(this=cond, true=result)) 4307 4308 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4309 4310 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4311 self._match_text_seq("KEY") 4312 key = self._parse_column() 4313 self._match_set((TokenType.COLON, TokenType.COMMA)) 4314 self._match_text_seq("VALUE") 4315 value = self._parse_bitwise() 4316 4317 if not key and not value: 4318 return None 4319 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4320 4321 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4322 if not this or not self._match_text_seq("FORMAT", "JSON"): 4323 return this 4324 4325 return self.expression(exp.FormatJson, this=this) 4326 4327 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4328 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4329 for value in values: 4330 if self._match_text_seq(value, "ON", on): 4331 return f"{value} ON {on}" 4332 4333 return None 4334 4335 def _parse_json_object(self) -> exp.JSONObject: 4336 star = self._parse_star() 4337 expressions = ( 4338 [star] 4339 if star 4340 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4341 ) 4342 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4343 4344 unique_keys = None 4345 if self._match_text_seq("WITH", "UNIQUE"): 4346 unique_keys = True 4347 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4348 unique_keys = False 4349 4350 self._match_text_seq("KEYS") 4351 4352 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4353 self._parse_type() 4354 ) 4355 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4356 4357 return self.expression( 4358 exp.JSONObject, 4359 expressions=expressions, 4360 null_handling=null_handling, 4361 unique_keys=unique_keys, 4362 return_type=return_type, 4363 encoding=encoding, 4364 ) 4365 4366 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4367 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4368 if not self._match_text_seq("NESTED"): 4369 this = self._parse_id_var() 4370 kind = self._parse_types(allow_identifiers=False) 4371 nested = None 4372 else: 4373 this = None 4374 kind = None 4375 nested = True 4376 4377 path = self._match_text_seq("PATH") and self._parse_string() 4378 nested_schema = nested and self._parse_json_schema() 4379 4380 return self.expression( 4381 exp.JSONColumnDef, 4382 this=this, 4383 kind=kind, 4384 path=path, 4385 nested_schema=nested_schema, 4386 ) 4387 4388 def _parse_json_schema(self) -> exp.JSONSchema: 4389 self._match_text_seq("COLUMNS") 4390 return self.expression( 4391 exp.JSONSchema, 4392 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4393 ) 4394 4395 def _parse_json_table(self) -> exp.JSONTable: 4396 this = self._parse_format_json(self._parse_bitwise()) 4397 path = self._match(TokenType.COMMA) and self._parse_string() 4398 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4399 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4400 schema = self._parse_json_schema() 4401 4402 return exp.JSONTable( 4403 this=this, 4404 schema=schema, 4405 path=path, 4406 error_handling=error_handling, 4407 empty_handling=empty_handling, 4408 ) 4409 4410 def _parse_logarithm(self) -> exp.Func: 4411 # Default argument order is base, expression 4412 args = self._parse_csv(self._parse_range) 4413 4414 if len(args) > 1: 4415 if not self.LOG_BASE_FIRST: 4416 args.reverse() 4417 return exp.Log.from_arg_list(args) 4418 4419 return self.expression( 4420 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4421 ) 4422 4423 def _parse_match_against(self) -> exp.MatchAgainst: 4424 expressions = self._parse_csv(self._parse_column) 4425 4426 self._match_text_seq(")", "AGAINST", "(") 4427 4428 this = self._parse_string() 4429 4430 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4431 modifier = "IN NATURAL LANGUAGE MODE" 4432 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4433 modifier = f"{modifier} WITH QUERY EXPANSION" 4434 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4435 modifier = "IN BOOLEAN MODE" 4436 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4437 modifier = "WITH QUERY EXPANSION" 4438 else: 4439 modifier = None 4440 4441 return self.expression( 4442 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4443 ) 4444 4445 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4446 def _parse_open_json(self) -> exp.OpenJSON: 4447 this = self._parse_bitwise() 4448 path = self._match(TokenType.COMMA) and self._parse_string() 4449 4450 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4451 this = self._parse_field(any_token=True) 4452 kind = self._parse_types() 4453 path = self._parse_string() 4454 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4455 4456 return self.expression( 4457 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4458 ) 4459 4460 expressions = None 4461 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4462 self._match_l_paren() 4463 expressions = self._parse_csv(_parse_open_json_column_def) 4464 4465 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4466 4467 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4468 args = self._parse_csv(self._parse_bitwise) 4469 4470 if self._match(TokenType.IN): 4471 return self.expression( 4472 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4473 ) 4474 4475 if haystack_first: 4476 haystack = seq_get(args, 0) 4477 needle = seq_get(args, 1) 4478 else: 4479 needle = seq_get(args, 0) 4480 haystack = seq_get(args, 1) 4481 4482 return self.expression( 4483 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4484 ) 4485 4486 def _parse_predict(self) -> exp.Predict: 4487 self._match_text_seq("MODEL") 4488 this = self._parse_table() 4489 4490 self._match(TokenType.COMMA) 4491 self._match_text_seq("TABLE") 4492 4493 return self.expression( 4494 exp.Predict, 4495 this=this, 4496 expression=self._parse_table(), 4497 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4498 ) 4499 4500 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4501 args = self._parse_csv(self._parse_table) 4502 return exp.JoinHint(this=func_name.upper(), expressions=args) 4503 4504 def _parse_substring(self) -> exp.Substring: 4505 # Postgres supports the form: substring(string [from int] [for int]) 4506 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4507 4508 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4509 4510 if self._match(TokenType.FROM): 4511 args.append(self._parse_bitwise()) 4512 if self._match(TokenType.FOR): 4513 args.append(self._parse_bitwise()) 4514 4515 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4516 4517 def _parse_trim(self) -> exp.Trim: 4518 # https://www.w3resource.com/sql/character-functions/trim.php 4519 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4520 4521 position = None 4522 collation = None 4523 expression = None 4524 4525 if self._match_texts(self.TRIM_TYPES): 4526 position = self._prev.text.upper() 4527 4528 this = self._parse_bitwise() 4529 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4530 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4531 expression = self._parse_bitwise() 4532 4533 if invert_order: 4534 this, expression = expression, this 4535 4536 if self._match(TokenType.COLLATE): 4537 collation = self._parse_bitwise() 4538 4539 return self.expression( 4540 exp.Trim, this=this, position=position, expression=expression, collation=collation 4541 ) 4542 4543 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4544 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4545 4546 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4547 return self._parse_window(self._parse_id_var(), alias=True) 4548 4549 def _parse_respect_or_ignore_nulls( 4550 self, this: t.Optional[exp.Expression] 4551 ) -> t.Optional[exp.Expression]: 4552 if self._match_text_seq("IGNORE", "NULLS"): 4553 return self.expression(exp.IgnoreNulls, this=this) 4554 if self._match_text_seq("RESPECT", "NULLS"): 4555 return self.expression(exp.RespectNulls, this=this) 4556 return this 4557 4558 def _parse_window( 4559 self, this: t.Optional[exp.Expression], alias: bool = False 4560 ) -> t.Optional[exp.Expression]: 4561 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4562 self._match(TokenType.WHERE) 4563 this = self.expression( 4564 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4565 ) 4566 self._match_r_paren() 4567 4568 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4569 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4570 if self._match_text_seq("WITHIN", "GROUP"): 4571 order = self._parse_wrapped(self._parse_order) 4572 this = self.expression(exp.WithinGroup, this=this, expression=order) 4573 4574 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4575 # Some dialects choose to implement and some do not. 4576 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4577 4578 # There is some code above in _parse_lambda that handles 4579 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4580 4581 # The below changes handle 4582 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4583 4584 # Oracle allows both formats 4585 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4586 # and Snowflake chose to do the same for familiarity 4587 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4588 this = self._parse_respect_or_ignore_nulls(this) 4589 4590 # bigquery select from window x AS (partition by ...) 4591 if alias: 4592 over = None 4593 self._match(TokenType.ALIAS) 4594 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4595 return this 4596 else: 4597 over = self._prev.text.upper() 4598 4599 if not self._match(TokenType.L_PAREN): 4600 return self.expression( 4601 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4602 ) 4603 4604 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4605 4606 first = self._match(TokenType.FIRST) 4607 if self._match_text_seq("LAST"): 4608 first = False 4609 4610 partition, order = self._parse_partition_and_order() 4611 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4612 4613 if kind: 4614 self._match(TokenType.BETWEEN) 4615 start = self._parse_window_spec() 4616 self._match(TokenType.AND) 4617 end = self._parse_window_spec() 4618 4619 spec = self.expression( 4620 exp.WindowSpec, 4621 kind=kind, 4622 start=start["value"], 4623 start_side=start["side"], 4624 end=end["value"], 4625 end_side=end["side"], 4626 ) 4627 else: 4628 spec = None 4629 4630 self._match_r_paren() 4631 4632 window = self.expression( 4633 exp.Window, 4634 this=this, 4635 partition_by=partition, 4636 order=order, 4637 spec=spec, 4638 alias=window_alias, 4639 over=over, 4640 first=first, 4641 ) 4642 4643 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4644 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4645 return self._parse_window(window, alias=alias) 4646 4647 return window 4648 4649 def _parse_partition_and_order( 4650 self, 4651 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4652 return self._parse_partition_by(), self._parse_order() 4653 4654 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4655 self._match(TokenType.BETWEEN) 4656 4657 return { 4658 "value": ( 4659 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4660 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4661 or self._parse_bitwise() 4662 ), 4663 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4664 } 4665 4666 def _parse_alias( 4667 self, this: t.Optional[exp.Expression], explicit: bool = False 4668 ) -> t.Optional[exp.Expression]: 4669 any_token = self._match(TokenType.ALIAS) 4670 4671 if explicit and not any_token: 4672 return this 4673 4674 if self._match(TokenType.L_PAREN): 4675 aliases = self.expression( 4676 exp.Aliases, 4677 this=this, 4678 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4679 ) 4680 self._match_r_paren(aliases) 4681 return aliases 4682 4683 alias = self._parse_id_var(any_token) 4684 4685 if alias: 4686 return self.expression(exp.Alias, this=this, alias=alias) 4687 4688 return this 4689 4690 def _parse_id_var( 4691 self, 4692 any_token: bool = True, 4693 tokens: t.Optional[t.Collection[TokenType]] = None, 4694 ) -> t.Optional[exp.Expression]: 4695 identifier = self._parse_identifier() 4696 4697 if identifier: 4698 return identifier 4699 4700 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4701 quoted = self._prev.token_type == TokenType.STRING 4702 return exp.Identifier(this=self._prev.text, quoted=quoted) 4703 4704 return None 4705 4706 def _parse_string(self) -> t.Optional[exp.Expression]: 4707 if self._match(TokenType.STRING): 4708 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4709 return self._parse_placeholder() 4710 4711 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4712 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4713 4714 def _parse_number(self) -> t.Optional[exp.Expression]: 4715 if self._match(TokenType.NUMBER): 4716 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4717 return self._parse_placeholder() 4718 4719 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4720 if self._match(TokenType.IDENTIFIER): 4721 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4722 return self._parse_placeholder() 4723 4724 def _parse_var( 4725 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4726 ) -> t.Optional[exp.Expression]: 4727 if ( 4728 (any_token and self._advance_any()) 4729 or self._match(TokenType.VAR) 4730 or (self._match_set(tokens) if tokens else False) 4731 ): 4732 return self.expression(exp.Var, this=self._prev.text) 4733 return self._parse_placeholder() 4734 4735 def _advance_any(self) -> t.Optional[Token]: 4736 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4737 self._advance() 4738 return self._prev 4739 return None 4740 4741 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4742 return self._parse_var() or self._parse_string() 4743 4744 def _parse_null(self) -> t.Optional[exp.Expression]: 4745 if self._match_set(self.NULL_TOKENS): 4746 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4747 return self._parse_placeholder() 4748 4749 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4750 if self._match(TokenType.TRUE): 4751 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4752 if self._match(TokenType.FALSE): 4753 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4754 return self._parse_placeholder() 4755 4756 def _parse_star(self) -> t.Optional[exp.Expression]: 4757 if self._match(TokenType.STAR): 4758 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4759 return self._parse_placeholder() 4760 4761 def _parse_parameter(self) -> exp.Parameter: 4762 wrapped = self._match(TokenType.L_BRACE) 4763 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4764 self._match(TokenType.R_BRACE) 4765 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4766 4767 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4768 if self._match_set(self.PLACEHOLDER_PARSERS): 4769 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4770 if placeholder: 4771 return placeholder 4772 self._advance(-1) 4773 return None 4774 4775 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4776 if not self._match(TokenType.EXCEPT): 4777 return None 4778 if self._match(TokenType.L_PAREN, advance=False): 4779 return self._parse_wrapped_csv(self._parse_column) 4780 4781 except_column = self._parse_column() 4782 return [except_column] if except_column else None 4783 4784 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4785 if not self._match(TokenType.REPLACE): 4786 return None 4787 if self._match(TokenType.L_PAREN, advance=False): 4788 return self._parse_wrapped_csv(self._parse_expression) 4789 4790 replace_expression = self._parse_expression() 4791 return [replace_expression] if replace_expression else None 4792 4793 def _parse_csv( 4794 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4795 ) -> t.List[exp.Expression]: 4796 parse_result = parse_method() 4797 items = [parse_result] if parse_result is not None else [] 4798 4799 while self._match(sep): 4800 self._add_comments(parse_result) 4801 parse_result = parse_method() 4802 if parse_result is not None: 4803 items.append(parse_result) 4804 4805 return items 4806 4807 def _parse_tokens( 4808 self, parse_method: t.Callable, expressions: t.Dict 4809 ) -> t.Optional[exp.Expression]: 4810 this = parse_method() 4811 4812 while self._match_set(expressions): 4813 this = self.expression( 4814 expressions[self._prev.token_type], 4815 this=this, 4816 comments=self._prev_comments, 4817 expression=parse_method(), 4818 ) 4819 4820 return this 4821 4822 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4823 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4824 4825 def _parse_wrapped_csv( 4826 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4827 ) -> t.List[exp.Expression]: 4828 return self._parse_wrapped( 4829 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4830 ) 4831 4832 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4833 wrapped = self._match(TokenType.L_PAREN) 4834 if not wrapped and not optional: 4835 self.raise_error("Expecting (") 4836 parse_result = parse_method() 4837 if wrapped: 4838 self._match_r_paren() 4839 return parse_result 4840 4841 def _parse_expressions(self) -> t.List[exp.Expression]: 4842 return self._parse_csv(self._parse_expression) 4843 4844 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4845 return self._parse_select() or self._parse_set_operations( 4846 self._parse_expression() if alias else self._parse_conjunction() 4847 ) 4848 4849 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4850 return self._parse_query_modifiers( 4851 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4852 ) 4853 4854 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4855 this = None 4856 if self._match_texts(self.TRANSACTION_KIND): 4857 this = self._prev.text 4858 4859 self._match_texts({"TRANSACTION", "WORK"}) 4860 4861 modes = [] 4862 while True: 4863 mode = [] 4864 while self._match(TokenType.VAR): 4865 mode.append(self._prev.text) 4866 4867 if mode: 4868 modes.append(" ".join(mode)) 4869 if not self._match(TokenType.COMMA): 4870 break 4871 4872 return self.expression(exp.Transaction, this=this, modes=modes) 4873 4874 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4875 chain = None 4876 savepoint = None 4877 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4878 4879 self._match_texts({"TRANSACTION", "WORK"}) 4880 4881 if self._match_text_seq("TO"): 4882 self._match_text_seq("SAVEPOINT") 4883 savepoint = self._parse_id_var() 4884 4885 if self._match(TokenType.AND): 4886 chain = not self._match_text_seq("NO") 4887 self._match_text_seq("CHAIN") 4888 4889 if is_rollback: 4890 return self.expression(exp.Rollback, savepoint=savepoint) 4891 4892 return self.expression(exp.Commit, chain=chain) 4893 4894 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4895 if not self._match_text_seq("ADD"): 4896 return None 4897 4898 self._match(TokenType.COLUMN) 4899 exists_column = self._parse_exists(not_=True) 4900 expression = self._parse_field_def() 4901 4902 if expression: 4903 expression.set("exists", exists_column) 4904 4905 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4906 if self._match_texts(("FIRST", "AFTER")): 4907 position = self._prev.text 4908 column_position = self.expression( 4909 exp.ColumnPosition, this=self._parse_column(), position=position 4910 ) 4911 expression.set("position", column_position) 4912 4913 return expression 4914 4915 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4916 drop = self._match(TokenType.DROP) and self._parse_drop() 4917 if drop and not isinstance(drop, exp.Command): 4918 drop.set("kind", drop.args.get("kind", "COLUMN")) 4919 return drop 4920 4921 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4922 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4923 return self.expression( 4924 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4925 ) 4926 4927 def _parse_add_constraint(self) -> exp.AddConstraint: 4928 this = None 4929 kind = self._prev.token_type 4930 4931 if kind == TokenType.CONSTRAINT: 4932 this = self._parse_id_var() 4933 4934 if self._match_text_seq("CHECK"): 4935 expression = self._parse_wrapped(self._parse_conjunction) 4936 enforced = self._match_text_seq("ENFORCED") 4937 4938 return self.expression( 4939 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4940 ) 4941 4942 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4943 expression = self._parse_foreign_key() 4944 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4945 expression = self._parse_primary_key() 4946 else: 4947 expression = None 4948 4949 return self.expression(exp.AddConstraint, this=this, expression=expression) 4950 4951 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4952 index = self._index - 1 4953 4954 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4955 return self._parse_csv(self._parse_add_constraint) 4956 4957 self._retreat(index) 4958 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4959 return self._parse_csv(self._parse_field_def) 4960 4961 return self._parse_csv(self._parse_add_column) 4962 4963 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4964 self._match(TokenType.COLUMN) 4965 column = self._parse_field(any_token=True) 4966 4967 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4968 return self.expression(exp.AlterColumn, this=column, drop=True) 4969 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4970 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4971 4972 self._match_text_seq("SET", "DATA") 4973 return self.expression( 4974 exp.AlterColumn, 4975 this=column, 4976 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4977 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4978 using=self._match(TokenType.USING) and self._parse_conjunction(), 4979 ) 4980 4981 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4982 index = self._index - 1 4983 4984 partition_exists = self._parse_exists() 4985 if self._match(TokenType.PARTITION, advance=False): 4986 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4987 4988 self._retreat(index) 4989 return self._parse_csv(self._parse_drop_column) 4990 4991 def _parse_alter_table_rename(self) -> exp.RenameTable: 4992 self._match_text_seq("TO") 4993 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4994 4995 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4996 start = self._prev 4997 4998 if not self._match(TokenType.TABLE): 4999 return self._parse_as_command(start) 5000 5001 exists = self._parse_exists() 5002 only = self._match_text_seq("ONLY") 5003 this = self._parse_table(schema=True) 5004 5005 if self._next: 5006 self._advance() 5007 5008 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5009 if parser: 5010 actions = ensure_list(parser(self)) 5011 5012 if not self._curr: 5013 return self.expression( 5014 exp.AlterTable, 5015 this=this, 5016 exists=exists, 5017 actions=actions, 5018 only=only, 5019 ) 5020 5021 return self._parse_as_command(start) 5022 5023 def _parse_merge(self) -> exp.Merge: 5024 self._match(TokenType.INTO) 5025 target = self._parse_table() 5026 5027 if target and self._match(TokenType.ALIAS, advance=False): 5028 target.set("alias", self._parse_table_alias()) 5029 5030 self._match(TokenType.USING) 5031 using = self._parse_table() 5032 5033 self._match(TokenType.ON) 5034 on = self._parse_conjunction() 5035 5036 return self.expression( 5037 exp.Merge, 5038 this=target, 5039 using=using, 5040 on=on, 5041 expressions=self._parse_when_matched(), 5042 ) 5043 5044 def _parse_when_matched(self) -> t.List[exp.When]: 5045 whens = [] 5046 5047 while self._match(TokenType.WHEN): 5048 matched = not self._match(TokenType.NOT) 5049 self._match_text_seq("MATCHED") 5050 source = ( 5051 False 5052 if self._match_text_seq("BY", "TARGET") 5053 else self._match_text_seq("BY", "SOURCE") 5054 ) 5055 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5056 5057 self._match(TokenType.THEN) 5058 5059 if self._match(TokenType.INSERT): 5060 _this = self._parse_star() 5061 if _this: 5062 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5063 else: 5064 then = self.expression( 5065 exp.Insert, 5066 this=self._parse_value(), 5067 expression=self._match(TokenType.VALUES) and self._parse_value(), 5068 ) 5069 elif self._match(TokenType.UPDATE): 5070 expressions = self._parse_star() 5071 if expressions: 5072 then = self.expression(exp.Update, expressions=expressions) 5073 else: 5074 then = self.expression( 5075 exp.Update, 5076 expressions=self._match(TokenType.SET) 5077 and self._parse_csv(self._parse_equality), 5078 ) 5079 elif self._match(TokenType.DELETE): 5080 then = self.expression(exp.Var, this=self._prev.text) 5081 else: 5082 then = None 5083 5084 whens.append( 5085 self.expression( 5086 exp.When, 5087 matched=matched, 5088 source=source, 5089 condition=condition, 5090 then=then, 5091 ) 5092 ) 5093 return whens 5094 5095 def _parse_show(self) -> t.Optional[exp.Expression]: 5096 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5097 if parser: 5098 return parser(self) 5099 return self._parse_as_command(self._prev) 5100 5101 def _parse_set_item_assignment( 5102 self, kind: t.Optional[str] = None 5103 ) -> t.Optional[exp.Expression]: 5104 index = self._index 5105 5106 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 5107 return self._parse_set_transaction(global_=kind == "GLOBAL") 5108 5109 left = self._parse_primary() or self._parse_id_var() 5110 assignment_delimiter = self._match_texts(("=", "TO")) 5111 5112 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5113 self._retreat(index) 5114 return None 5115 5116 right = self._parse_statement() or self._parse_id_var() 5117 this = self.expression(exp.EQ, this=left, expression=right) 5118 5119 return self.expression(exp.SetItem, this=this, kind=kind) 5120 5121 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5122 self._match_text_seq("TRANSACTION") 5123 characteristics = self._parse_csv( 5124 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5125 ) 5126 return self.expression( 5127 exp.SetItem, 5128 expressions=characteristics, 5129 kind="TRANSACTION", 5130 **{"global": global_}, # type: ignore 5131 ) 5132 5133 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5134 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5135 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5136 5137 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5138 index = self._index 5139 set_ = self.expression( 5140 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5141 ) 5142 5143 if self._curr: 5144 self._retreat(index) 5145 return self._parse_as_command(self._prev) 5146 5147 return set_ 5148 5149 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5150 for option in options: 5151 if self._match_text_seq(*option.split(" ")): 5152 return exp.var(option) 5153 return None 5154 5155 def _parse_as_command(self, start: Token) -> exp.Command: 5156 while self._curr: 5157 self._advance() 5158 text = self._find_sql(start, self._prev) 5159 size = len(start.text) 5160 return exp.Command(this=text[:size], expression=text[size:]) 5161 5162 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5163 settings = [] 5164 5165 self._match_l_paren() 5166 kind = self._parse_id_var() 5167 5168 if self._match(TokenType.L_PAREN): 5169 while True: 5170 key = self._parse_id_var() 5171 value = self._parse_primary() 5172 5173 if not key and value is None: 5174 break 5175 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5176 self._match(TokenType.R_PAREN) 5177 5178 self._match_r_paren() 5179 5180 return self.expression( 5181 exp.DictProperty, 5182 this=this, 5183 kind=kind.this if kind else None, 5184 settings=settings, 5185 ) 5186 5187 def _parse_dict_range(self, this: str) -> exp.DictRange: 5188 self._match_l_paren() 5189 has_min = self._match_text_seq("MIN") 5190 if has_min: 5191 min = self._parse_var() or self._parse_primary() 5192 self._match_text_seq("MAX") 5193 max = self._parse_var() or self._parse_primary() 5194 else: 5195 max = self._parse_var() or self._parse_primary() 5196 min = exp.Literal.number(0) 5197 self._match_r_paren() 5198 return self.expression(exp.DictRange, this=this, min=min, max=max) 5199 5200 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5201 index = self._index 5202 expression = self._parse_column() 5203 if not self._match(TokenType.IN): 5204 self._retreat(index - 1) 5205 return None 5206 iterator = self._parse_column() 5207 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5208 return self.expression( 5209 exp.Comprehension, 5210 this=this, 5211 expression=expression, 5212 iterator=iterator, 5213 condition=condition, 5214 ) 5215 5216 def _find_parser( 5217 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5218 ) -> t.Optional[t.Callable]: 5219 if not self._curr: 5220 return None 5221 5222 index = self._index 5223 this = [] 5224 while True: 5225 # The current token might be multiple words 5226 curr = self._curr.text.upper() 5227 key = curr.split(" ") 5228 this.append(curr) 5229 5230 self._advance() 5231 result, trie = in_trie(trie, key) 5232 if result == TrieResult.FAILED: 5233 break 5234 5235 if result == TrieResult.EXISTS: 5236 subparser = parsers[" ".join(this)] 5237 return subparser 5238 5239 self._retreat(index) 5240 return None 5241 5242 def _match(self, token_type, advance=True, expression=None): 5243 if not self._curr: 5244 return None 5245 5246 if self._curr.token_type == token_type: 5247 if advance: 5248 self._advance() 5249 self._add_comments(expression) 5250 return True 5251 5252 return None 5253 5254 def _match_set(self, types, advance=True): 5255 if not self._curr: 5256 return None 5257 5258 if self._curr.token_type in types: 5259 if advance: 5260 self._advance() 5261 return True 5262 5263 return None 5264 5265 def _match_pair(self, token_type_a, token_type_b, advance=True): 5266 if not self._curr or not self._next: 5267 return None 5268 5269 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5270 if advance: 5271 self._advance(2) 5272 return True 5273 5274 return None 5275 5276 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5277 if not self._match(TokenType.L_PAREN, expression=expression): 5278 self.raise_error("Expecting (") 5279 5280 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5281 if not self._match(TokenType.R_PAREN, expression=expression): 5282 self.raise_error("Expecting )") 5283 5284 def _match_texts(self, texts, advance=True): 5285 if self._curr and self._curr.text.upper() in texts: 5286 if advance: 5287 self._advance() 5288 return True 5289 return False 5290 5291 def _match_text_seq(self, *texts, advance=True): 5292 index = self._index 5293 for text in texts: 5294 if self._curr and self._curr.text.upper() == text: 5295 self._advance() 5296 else: 5297 self._retreat(index) 5298 return False 5299 5300 if not advance: 5301 self._retreat(index) 5302 5303 return True 5304 5305 @t.overload 5306 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5307 ... 5308 5309 @t.overload 5310 def _replace_columns_with_dots( 5311 self, this: t.Optional[exp.Expression] 5312 ) -> t.Optional[exp.Expression]: 5313 ... 5314 5315 def _replace_columns_with_dots(self, this): 5316 if isinstance(this, exp.Dot): 5317 exp.replace_children(this, self._replace_columns_with_dots) 5318 elif isinstance(this, exp.Column): 5319 exp.replace_children(this, self._replace_columns_with_dots) 5320 table = this.args.get("table") 5321 this = ( 5322 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5323 ) 5324 5325 return this 5326 5327 def _replace_lambda( 5328 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5329 ) -> t.Optional[exp.Expression]: 5330 if not node: 5331 return node 5332 5333 for column in node.find_all(exp.Column): 5334 if column.parts[0].name in lambda_variables: 5335 dot_or_id = column.to_dot() if column.table else column.this 5336 parent = column.parent 5337 5338 while isinstance(parent, exp.Dot): 5339 if not isinstance(parent.parent, exp.Dot): 5340 parent.replace(dot_or_id) 5341 break 5342 parent = parent.parent 5343 else: 5344 if column is node: 5345 node = dot_or_id 5346 else: 5347 column.replace(dot_or_id) 5348 return node 5349 5350 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5351 return [ 5352 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5353 for value in values 5354 if value 5355 ]
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.MEDIUMINT, 141 TokenType.UMEDIUMINT, 142 TokenType.FIXEDSTRING, 143 TokenType.FLOAT, 144 TokenType.DOUBLE, 145 TokenType.CHAR, 146 TokenType.NCHAR, 147 TokenType.VARCHAR, 148 TokenType.NVARCHAR, 149 TokenType.TEXT, 150 TokenType.MEDIUMTEXT, 151 TokenType.LONGTEXT, 152 TokenType.MEDIUMBLOB, 153 TokenType.LONGBLOB, 154 TokenType.BINARY, 155 TokenType.VARBINARY, 156 TokenType.JSON, 157 TokenType.JSONB, 158 TokenType.INTERVAL, 159 TokenType.TINYBLOB, 160 TokenType.TINYTEXT, 161 TokenType.TIME, 162 TokenType.TIMETZ, 163 TokenType.TIMESTAMP, 164 TokenType.TIMESTAMP_S, 165 TokenType.TIMESTAMP_MS, 166 TokenType.TIMESTAMP_NS, 167 TokenType.TIMESTAMPTZ, 168 TokenType.TIMESTAMPLTZ, 169 TokenType.DATETIME, 170 TokenType.DATETIME64, 171 TokenType.DATE, 172 TokenType.INT4RANGE, 173 TokenType.INT4MULTIRANGE, 174 TokenType.INT8RANGE, 175 TokenType.INT8MULTIRANGE, 176 TokenType.NUMRANGE, 177 TokenType.NUMMULTIRANGE, 178 TokenType.TSRANGE, 179 TokenType.TSMULTIRANGE, 180 TokenType.TSTZRANGE, 181 TokenType.TSTZMULTIRANGE, 182 TokenType.DATERANGE, 183 TokenType.DATEMULTIRANGE, 184 TokenType.DECIMAL, 185 TokenType.UDECIMAL, 186 TokenType.BIGDECIMAL, 187 TokenType.UUID, 188 TokenType.GEOGRAPHY, 189 TokenType.GEOMETRY, 190 TokenType.HLLSKETCH, 191 TokenType.HSTORE, 192 TokenType.PSEUDO_TYPE, 193 TokenType.SUPER, 194 TokenType.SERIAL, 195 TokenType.SMALLSERIAL, 196 TokenType.BIGSERIAL, 197 TokenType.XML, 198 TokenType.YEAR, 199 TokenType.UNIQUEIDENTIFIER, 200 TokenType.USERDEFINED, 201 TokenType.MONEY, 202 TokenType.SMALLMONEY, 203 TokenType.ROWVERSION, 204 TokenType.IMAGE, 205 TokenType.VARIANT, 206 TokenType.OBJECT, 207 TokenType.OBJECT_IDENTIFIER, 208 TokenType.INET, 209 TokenType.IPADDRESS, 210 TokenType.IPPREFIX, 211 TokenType.UNKNOWN, 212 TokenType.NULL, 213 *ENUM_TYPE_TOKENS, 214 *NESTED_TYPE_TOKENS, 215 } 216 217 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 218 TokenType.BIGINT: TokenType.UBIGINT, 219 TokenType.INT: TokenType.UINT, 220 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 221 TokenType.SMALLINT: TokenType.USMALLINT, 222 TokenType.TINYINT: TokenType.UTINYINT, 223 TokenType.DECIMAL: TokenType.UDECIMAL, 224 } 225 226 SUBQUERY_PREDICATES = { 227 TokenType.ANY: exp.Any, 228 TokenType.ALL: exp.All, 229 TokenType.EXISTS: exp.Exists, 230 TokenType.SOME: exp.Any, 231 } 232 233 RESERVED_KEYWORDS = { 234 *Tokenizer.SINGLE_TOKENS.values(), 235 TokenType.SELECT, 236 } 237 238 DB_CREATABLES = { 239 TokenType.DATABASE, 240 TokenType.SCHEMA, 241 TokenType.TABLE, 242 TokenType.VIEW, 243 TokenType.MODEL, 244 TokenType.DICTIONARY, 245 } 246 247 CREATABLES = { 248 TokenType.COLUMN, 249 TokenType.FUNCTION, 250 TokenType.INDEX, 251 TokenType.PROCEDURE, 252 *DB_CREATABLES, 253 } 254 255 # Tokens that can represent identifiers 256 ID_VAR_TOKENS = { 257 TokenType.VAR, 258 TokenType.ANTI, 259 TokenType.APPLY, 260 TokenType.ASC, 261 TokenType.AUTO_INCREMENT, 262 TokenType.BEGIN, 263 TokenType.CACHE, 264 TokenType.CASE, 265 TokenType.COLLATE, 266 TokenType.COMMAND, 267 TokenType.COMMENT, 268 TokenType.COMMIT, 269 TokenType.CONSTRAINT, 270 TokenType.DEFAULT, 271 TokenType.DELETE, 272 TokenType.DESC, 273 TokenType.DESCRIBE, 274 TokenType.DICTIONARY, 275 TokenType.DIV, 276 TokenType.END, 277 TokenType.EXECUTE, 278 TokenType.ESCAPE, 279 TokenType.FALSE, 280 TokenType.FIRST, 281 TokenType.FILTER, 282 TokenType.FORMAT, 283 TokenType.FULL, 284 TokenType.IS, 285 TokenType.ISNULL, 286 TokenType.INTERVAL, 287 TokenType.KEEP, 288 TokenType.KILL, 289 TokenType.LEFT, 290 TokenType.LOAD, 291 TokenType.MERGE, 292 TokenType.NATURAL, 293 TokenType.NEXT, 294 TokenType.OFFSET, 295 TokenType.ORDINALITY, 296 TokenType.OVERLAPS, 297 TokenType.OVERWRITE, 298 TokenType.PARTITION, 299 TokenType.PERCENT, 300 TokenType.PIVOT, 301 TokenType.PRAGMA, 302 TokenType.RANGE, 303 TokenType.REFERENCES, 304 TokenType.RIGHT, 305 TokenType.ROW, 306 TokenType.ROWS, 307 TokenType.SEMI, 308 TokenType.SET, 309 TokenType.SETTINGS, 310 TokenType.SHOW, 311 TokenType.TEMPORARY, 312 TokenType.TOP, 313 TokenType.TRUE, 314 TokenType.UNIQUE, 315 TokenType.UNPIVOT, 316 TokenType.UPDATE, 317 TokenType.VOLATILE, 318 TokenType.WINDOW, 319 *CREATABLES, 320 *SUBQUERY_PREDICATES, 321 *TYPE_TOKENS, 322 *NO_PAREN_FUNCTIONS, 323 } 324 325 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 326 327 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 328 TokenType.ANTI, 329 TokenType.APPLY, 330 TokenType.ASOF, 331 TokenType.FULL, 332 TokenType.LEFT, 333 TokenType.LOCK, 334 TokenType.NATURAL, 335 TokenType.OFFSET, 336 TokenType.RIGHT, 337 TokenType.SEMI, 338 TokenType.WINDOW, 339 } 340 341 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 342 343 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 344 345 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 346 347 FUNC_TOKENS = { 348 TokenType.COLLATE, 349 TokenType.COMMAND, 350 TokenType.CURRENT_DATE, 351 TokenType.CURRENT_DATETIME, 352 TokenType.CURRENT_TIMESTAMP, 353 TokenType.CURRENT_TIME, 354 TokenType.CURRENT_USER, 355 TokenType.FILTER, 356 TokenType.FIRST, 357 TokenType.FORMAT, 358 TokenType.GLOB, 359 TokenType.IDENTIFIER, 360 TokenType.INDEX, 361 TokenType.ISNULL, 362 TokenType.ILIKE, 363 TokenType.INSERT, 364 TokenType.LIKE, 365 TokenType.MERGE, 366 TokenType.OFFSET, 367 TokenType.PRIMARY_KEY, 368 TokenType.RANGE, 369 TokenType.REPLACE, 370 TokenType.RLIKE, 371 TokenType.ROW, 372 TokenType.UNNEST, 373 TokenType.VAR, 374 TokenType.LEFT, 375 TokenType.RIGHT, 376 TokenType.DATE, 377 TokenType.DATETIME, 378 TokenType.TABLE, 379 TokenType.TIMESTAMP, 380 TokenType.TIMESTAMPTZ, 381 TokenType.WINDOW, 382 TokenType.XOR, 383 *TYPE_TOKENS, 384 *SUBQUERY_PREDICATES, 385 } 386 387 CONJUNCTION = { 388 TokenType.AND: exp.And, 389 TokenType.OR: exp.Or, 390 } 391 392 EQUALITY = { 393 TokenType.EQ: exp.EQ, 394 TokenType.NEQ: exp.NEQ, 395 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 396 } 397 398 COMPARISON = { 399 TokenType.GT: exp.GT, 400 TokenType.GTE: exp.GTE, 401 TokenType.LT: exp.LT, 402 TokenType.LTE: exp.LTE, 403 } 404 405 BITWISE = { 406 TokenType.AMP: exp.BitwiseAnd, 407 TokenType.CARET: exp.BitwiseXor, 408 TokenType.PIPE: exp.BitwiseOr, 409 TokenType.DPIPE: exp.DPipe, 410 } 411 412 TERM = { 413 TokenType.DASH: exp.Sub, 414 TokenType.PLUS: exp.Add, 415 TokenType.MOD: exp.Mod, 416 TokenType.COLLATE: exp.Collate, 417 } 418 419 FACTOR = { 420 TokenType.DIV: exp.IntDiv, 421 TokenType.LR_ARROW: exp.Distance, 422 TokenType.SLASH: exp.Div, 423 TokenType.STAR: exp.Mul, 424 } 425 426 TIMES = { 427 TokenType.TIME, 428 TokenType.TIMETZ, 429 } 430 431 TIMESTAMPS = { 432 TokenType.TIMESTAMP, 433 TokenType.TIMESTAMPTZ, 434 TokenType.TIMESTAMPLTZ, 435 *TIMES, 436 } 437 438 SET_OPERATIONS = { 439 TokenType.UNION, 440 TokenType.INTERSECT, 441 TokenType.EXCEPT, 442 } 443 444 JOIN_METHODS = { 445 TokenType.NATURAL, 446 TokenType.ASOF, 447 } 448 449 JOIN_SIDES = { 450 TokenType.LEFT, 451 TokenType.RIGHT, 452 TokenType.FULL, 453 } 454 455 JOIN_KINDS = { 456 TokenType.INNER, 457 TokenType.OUTER, 458 TokenType.CROSS, 459 TokenType.SEMI, 460 TokenType.ANTI, 461 } 462 463 JOIN_HINTS: t.Set[str] = set() 464 465 LAMBDAS = { 466 TokenType.ARROW: lambda self, expressions: self.expression( 467 exp.Lambda, 468 this=self._replace_lambda( 469 self._parse_conjunction(), 470 {node.name for node in expressions}, 471 ), 472 expressions=expressions, 473 ), 474 TokenType.FARROW: lambda self, expressions: self.expression( 475 exp.Kwarg, 476 this=exp.var(expressions[0].name), 477 expression=self._parse_conjunction(), 478 ), 479 } 480 481 COLUMN_OPERATORS = { 482 TokenType.DOT: None, 483 TokenType.DCOLON: lambda self, this, to: self.expression( 484 exp.Cast if self.STRICT_CAST else exp.TryCast, 485 this=this, 486 to=to, 487 ), 488 TokenType.ARROW: lambda self, this, path: self.expression( 489 exp.JSONExtract, 490 this=this, 491 expression=path, 492 ), 493 TokenType.DARROW: lambda self, this, path: self.expression( 494 exp.JSONExtractScalar, 495 this=this, 496 expression=path, 497 ), 498 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 499 exp.JSONBExtract, 500 this=this, 501 expression=path, 502 ), 503 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 504 exp.JSONBExtractScalar, 505 this=this, 506 expression=path, 507 ), 508 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 509 exp.JSONBContains, 510 this=this, 511 expression=key, 512 ), 513 } 514 515 EXPRESSION_PARSERS = { 516 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 517 exp.Column: lambda self: self._parse_column(), 518 exp.Condition: lambda self: self._parse_conjunction(), 519 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 520 exp.Expression: lambda self: self._parse_statement(), 521 exp.From: lambda self: self._parse_from(), 522 exp.Group: lambda self: self._parse_group(), 523 exp.Having: lambda self: self._parse_having(), 524 exp.Identifier: lambda self: self._parse_id_var(), 525 exp.Join: lambda self: self._parse_join(), 526 exp.Lambda: lambda self: self._parse_lambda(), 527 exp.Lateral: lambda self: self._parse_lateral(), 528 exp.Limit: lambda self: self._parse_limit(), 529 exp.Offset: lambda self: self._parse_offset(), 530 exp.Order: lambda self: self._parse_order(), 531 exp.Ordered: lambda self: self._parse_ordered(), 532 exp.Properties: lambda self: self._parse_properties(), 533 exp.Qualify: lambda self: self._parse_qualify(), 534 exp.Returning: lambda self: self._parse_returning(), 535 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 536 exp.Table: lambda self: self._parse_table_parts(), 537 exp.TableAlias: lambda self: self._parse_table_alias(), 538 exp.Where: lambda self: self._parse_where(), 539 exp.Window: lambda self: self._parse_named_window(), 540 exp.With: lambda self: self._parse_with(), 541 "JOIN_TYPE": lambda self: self._parse_join_parts(), 542 } 543 544 STATEMENT_PARSERS = { 545 TokenType.ALTER: lambda self: self._parse_alter(), 546 TokenType.BEGIN: lambda self: self._parse_transaction(), 547 TokenType.CACHE: lambda self: self._parse_cache(), 548 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 549 TokenType.COMMENT: lambda self: self._parse_comment(), 550 TokenType.CREATE: lambda self: self._parse_create(), 551 TokenType.DELETE: lambda self: self._parse_delete(), 552 TokenType.DESC: lambda self: self._parse_describe(), 553 TokenType.DESCRIBE: lambda self: self._parse_describe(), 554 TokenType.DROP: lambda self: self._parse_drop(), 555 TokenType.INSERT: lambda self: self._parse_insert(), 556 TokenType.KILL: lambda self: self._parse_kill(), 557 TokenType.LOAD: lambda self: self._parse_load(), 558 TokenType.MERGE: lambda self: self._parse_merge(), 559 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 560 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 561 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 562 TokenType.SET: lambda self: self._parse_set(), 563 TokenType.UNCACHE: lambda self: self._parse_uncache(), 564 TokenType.UPDATE: lambda self: self._parse_update(), 565 TokenType.USE: lambda self: self.expression( 566 exp.Use, 567 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 568 and exp.var(self._prev.text), 569 this=self._parse_table(schema=False), 570 ), 571 } 572 573 UNARY_PARSERS = { 574 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 575 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 576 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 577 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 578 } 579 580 PRIMARY_PARSERS = { 581 TokenType.STRING: lambda self, token: self.expression( 582 exp.Literal, this=token.text, is_string=True 583 ), 584 TokenType.NUMBER: lambda self, token: self.expression( 585 exp.Literal, this=token.text, is_string=False 586 ), 587 TokenType.STAR: lambda self, _: self.expression( 588 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 589 ), 590 TokenType.NULL: lambda self, _: self.expression(exp.Null), 591 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 592 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 593 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 594 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 595 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 596 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 597 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 598 exp.National, this=token.text 599 ), 600 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 601 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 602 exp.RawString, this=token.text 603 ), 604 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 605 } 606 607 PLACEHOLDER_PARSERS = { 608 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 609 TokenType.PARAMETER: lambda self: self._parse_parameter(), 610 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 611 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 612 else None, 613 } 614 615 RANGE_PARSERS = { 616 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 617 TokenType.GLOB: binary_range_parser(exp.Glob), 618 TokenType.ILIKE: binary_range_parser(exp.ILike), 619 TokenType.IN: lambda self, this: self._parse_in(this), 620 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 621 TokenType.IS: lambda self, this: self._parse_is(this), 622 TokenType.LIKE: binary_range_parser(exp.Like), 623 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 624 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 625 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 626 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 627 } 628 629 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 630 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 631 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 632 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 633 "CHARACTER SET": lambda self: self._parse_character_set(), 634 "CHECKSUM": lambda self: self._parse_checksum(), 635 "CLUSTER BY": lambda self: self._parse_cluster(), 636 "CLUSTERED": lambda self: self._parse_clustered_by(), 637 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 638 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 639 "COPY": lambda self: self._parse_copy_property(), 640 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 641 "DEFINER": lambda self: self._parse_definer(), 642 "DETERMINISTIC": lambda self: self.expression( 643 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 644 ), 645 "DISTKEY": lambda self: self._parse_distkey(), 646 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 647 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 648 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 649 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 650 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 651 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 652 "FREESPACE": lambda self: self._parse_freespace(), 653 "HEAP": lambda self: self.expression(exp.HeapProperty), 654 "IMMUTABLE": lambda self: self.expression( 655 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 656 ), 657 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 658 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 659 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 660 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 661 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 662 "LIKE": lambda self: self._parse_create_like(), 663 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 664 "LOCK": lambda self: self._parse_locking(), 665 "LOCKING": lambda self: self._parse_locking(), 666 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 667 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 668 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 669 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 670 "NO": lambda self: self._parse_no_property(), 671 "ON": lambda self: self._parse_on_property(), 672 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 673 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 674 "PARTITION BY": lambda self: self._parse_partitioned_by(), 675 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 676 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 677 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 678 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 679 "REMOTE": lambda self: self._parse_remote_with_connection(), 680 "RETURNS": lambda self: self._parse_returns(), 681 "ROW": lambda self: self._parse_row(), 682 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 683 "SAMPLE": lambda self: self.expression( 684 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 685 ), 686 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 687 "SETTINGS": lambda self: self.expression( 688 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 689 ), 690 "SORTKEY": lambda self: self._parse_sortkey(), 691 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 692 "STABLE": lambda self: self.expression( 693 exp.StabilityProperty, this=exp.Literal.string("STABLE") 694 ), 695 "STORED": lambda self: self._parse_stored(), 696 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 697 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 698 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 699 "TO": lambda self: self._parse_to_table(), 700 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 701 "TRANSFORM": lambda self: self.expression( 702 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 703 ), 704 "TTL": lambda self: self._parse_ttl(), 705 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 706 "VOLATILE": lambda self: self._parse_volatile_property(), 707 "WITH": lambda self: self._parse_with_property(), 708 } 709 710 CONSTRAINT_PARSERS = { 711 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 712 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 713 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 714 "CHARACTER SET": lambda self: self.expression( 715 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 716 ), 717 "CHECK": lambda self: self.expression( 718 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 719 ), 720 "COLLATE": lambda self: self.expression( 721 exp.CollateColumnConstraint, this=self._parse_var() 722 ), 723 "COMMENT": lambda self: self.expression( 724 exp.CommentColumnConstraint, this=self._parse_string() 725 ), 726 "COMPRESS": lambda self: self._parse_compress(), 727 "CLUSTERED": lambda self: self.expression( 728 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 729 ), 730 "NONCLUSTERED": lambda self: self.expression( 731 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 732 ), 733 "DEFAULT": lambda self: self.expression( 734 exp.DefaultColumnConstraint, this=self._parse_bitwise() 735 ), 736 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 737 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 738 "FORMAT": lambda self: self.expression( 739 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 740 ), 741 "GENERATED": lambda self: self._parse_generated_as_identity(), 742 "IDENTITY": lambda self: self._parse_auto_increment(), 743 "INLINE": lambda self: self._parse_inline(), 744 "LIKE": lambda self: self._parse_create_like(), 745 "NOT": lambda self: self._parse_not_constraint(), 746 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 747 "ON": lambda self: ( 748 self._match(TokenType.UPDATE) 749 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 750 ) 751 or self.expression(exp.OnProperty, this=self._parse_id_var()), 752 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 753 "PRIMARY KEY": lambda self: self._parse_primary_key(), 754 "REFERENCES": lambda self: self._parse_references(match=False), 755 "TITLE": lambda self: self.expression( 756 exp.TitleColumnConstraint, this=self._parse_var_or_string() 757 ), 758 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 759 "UNIQUE": lambda self: self._parse_unique(), 760 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 761 "WITH": lambda self: self.expression( 762 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 763 ), 764 } 765 766 ALTER_PARSERS = { 767 "ADD": lambda self: self._parse_alter_table_add(), 768 "ALTER": lambda self: self._parse_alter_table_alter(), 769 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 770 "DROP": lambda self: self._parse_alter_table_drop(), 771 "RENAME": lambda self: self._parse_alter_table_rename(), 772 } 773 774 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 775 776 NO_PAREN_FUNCTION_PARSERS = { 777 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 778 "CASE": lambda self: self._parse_case(), 779 "IF": lambda self: self._parse_if(), 780 "NEXT": lambda self: self._parse_next_value_for(), 781 } 782 783 INVALID_FUNC_NAME_TOKENS = { 784 TokenType.IDENTIFIER, 785 TokenType.STRING, 786 } 787 788 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 789 790 FUNCTION_PARSERS = { 791 "ANY_VALUE": lambda self: self._parse_any_value(), 792 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 793 "CONCAT": lambda self: self._parse_concat(), 794 "CONCAT_WS": lambda self: self._parse_concat_ws(), 795 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 796 "DECODE": lambda self: self._parse_decode(), 797 "EXTRACT": lambda self: self._parse_extract(), 798 "JSON_OBJECT": lambda self: self._parse_json_object(), 799 "JSON_TABLE": lambda self: self._parse_json_table(), 800 "LOG": lambda self: self._parse_logarithm(), 801 "MATCH": lambda self: self._parse_match_against(), 802 "OPENJSON": lambda self: self._parse_open_json(), 803 "POSITION": lambda self: self._parse_position(), 804 "PREDICT": lambda self: self._parse_predict(), 805 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 806 "STRING_AGG": lambda self: self._parse_string_agg(), 807 "SUBSTRING": lambda self: self._parse_substring(), 808 "TRIM": lambda self: self._parse_trim(), 809 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 810 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 811 } 812 813 QUERY_MODIFIER_PARSERS = { 814 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 815 TokenType.WHERE: lambda self: ("where", self._parse_where()), 816 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 817 TokenType.HAVING: lambda self: ("having", self._parse_having()), 818 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 819 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 820 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 821 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 822 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 823 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 824 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 825 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 826 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 827 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 828 TokenType.CLUSTER_BY: lambda self: ( 829 "cluster", 830 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 831 ), 832 TokenType.DISTRIBUTE_BY: lambda self: ( 833 "distribute", 834 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 835 ), 836 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 837 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 838 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 839 } 840 841 SET_PARSERS = { 842 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 843 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 844 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 845 "TRANSACTION": lambda self: self._parse_set_transaction(), 846 } 847 848 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 849 850 TYPE_LITERAL_PARSERS = { 851 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 852 } 853 854 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 855 856 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 857 858 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 859 860 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 861 TRANSACTION_CHARACTERISTICS = { 862 "ISOLATION LEVEL REPEATABLE READ", 863 "ISOLATION LEVEL READ COMMITTED", 864 "ISOLATION LEVEL READ UNCOMMITTED", 865 "ISOLATION LEVEL SERIALIZABLE", 866 "READ WRITE", 867 "READ ONLY", 868 } 869 870 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 871 872 CLONE_KEYWORDS = {"CLONE", "COPY"} 873 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 874 875 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 876 877 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 878 879 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 880 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 881 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 882 883 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 884 885 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 886 887 DISTINCT_TOKENS = {TokenType.DISTINCT} 888 889 NULL_TOKENS = {TokenType.NULL} 890 891 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 892 893 STRICT_CAST = True 894 895 # A NULL arg in CONCAT yields NULL by default 896 CONCAT_NULL_OUTPUTS_STRING = False 897 898 PREFIXED_PIVOT_COLUMNS = False 899 IDENTIFY_PIVOT_STRINGS = False 900 901 LOG_BASE_FIRST = True 902 LOG_DEFAULTS_TO_LN = False 903 904 # Whether or not ADD is present for each column added by ALTER TABLE 905 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 906 907 # Whether or not the table sample clause expects CSV syntax 908 TABLESAMPLE_CSV = False 909 910 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 911 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 912 913 # Whether the TRIM function expects the characters to trim as its first argument 914 TRIM_PATTERN_FIRST = False 915 916 __slots__ = ( 917 "error_level", 918 "error_message_context", 919 "max_errors", 920 "sql", 921 "errors", 922 "_tokens", 923 "_index", 924 "_curr", 925 "_next", 926 "_prev", 927 "_prev_comments", 928 "_tokenizer", 929 ) 930 931 # Autofilled 932 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 933 INDEX_OFFSET: int = 0 934 UNNEST_COLUMN_ONLY: bool = False 935 ALIAS_POST_TABLESAMPLE: bool = False 936 STRICT_STRING_CONCAT = False 937 SUPPORTS_USER_DEFINED_TYPES = True 938 NORMALIZE_FUNCTIONS = "upper" 939 NULL_ORDERING: str = "nulls_are_small" 940 SHOW_TRIE: t.Dict = {} 941 SET_TRIE: t.Dict = {} 942 FORMAT_MAPPING: t.Dict[str, str] = {} 943 FORMAT_TRIE: t.Dict = {} 944 TIME_MAPPING: t.Dict[str, str] = {} 945 TIME_TRIE: t.Dict = {} 946 947 def __init__( 948 self, 949 error_level: t.Optional[ErrorLevel] = None, 950 error_message_context: int = 100, 951 max_errors: int = 3, 952 ): 953 self.error_level = error_level or ErrorLevel.IMMEDIATE 954 self.error_message_context = error_message_context 955 self.max_errors = max_errors 956 self._tokenizer = self.TOKENIZER_CLASS() 957 self.reset() 958 959 def reset(self): 960 self.sql = "" 961 self.errors = [] 962 self._tokens = [] 963 self._index = 0 964 self._curr = None 965 self._next = None 966 self._prev = None 967 self._prev_comments = None 968 969 def parse( 970 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 971 ) -> t.List[t.Optional[exp.Expression]]: 972 """ 973 Parses a list of tokens and returns a list of syntax trees, one tree 974 per parsed SQL statement. 975 976 Args: 977 raw_tokens: The list of tokens. 978 sql: The original SQL string, used to produce helpful debug messages. 979 980 Returns: 981 The list of the produced syntax trees. 982 """ 983 return self._parse( 984 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 985 ) 986 987 def parse_into( 988 self, 989 expression_types: exp.IntoType, 990 raw_tokens: t.List[Token], 991 sql: t.Optional[str] = None, 992 ) -> t.List[t.Optional[exp.Expression]]: 993 """ 994 Parses a list of tokens into a given Expression type. If a collection of Expression 995 types is given instead, this method will try to parse the token list into each one 996 of them, stopping at the first for which the parsing succeeds. 997 998 Args: 999 expression_types: The expression type(s) to try and parse the token list into. 1000 raw_tokens: The list of tokens. 1001 sql: The original SQL string, used to produce helpful debug messages. 1002 1003 Returns: 1004 The target Expression. 1005 """ 1006 errors = [] 1007 for expression_type in ensure_list(expression_types): 1008 parser = self.EXPRESSION_PARSERS.get(expression_type) 1009 if not parser: 1010 raise TypeError(f"No parser registered for {expression_type}") 1011 1012 try: 1013 return self._parse(parser, raw_tokens, sql) 1014 except ParseError as e: 1015 e.errors[0]["into_expression"] = expression_type 1016 errors.append(e) 1017 1018 raise ParseError( 1019 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1020 errors=merge_errors(errors), 1021 ) from errors[-1] 1022 1023 def _parse( 1024 self, 1025 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1026 raw_tokens: t.List[Token], 1027 sql: t.Optional[str] = None, 1028 ) -> t.List[t.Optional[exp.Expression]]: 1029 self.reset() 1030 self.sql = sql or "" 1031 1032 total = len(raw_tokens) 1033 chunks: t.List[t.List[Token]] = [[]] 1034 1035 for i, token in enumerate(raw_tokens): 1036 if token.token_type == TokenType.SEMICOLON: 1037 if i < total - 1: 1038 chunks.append([]) 1039 else: 1040 chunks[-1].append(token) 1041 1042 expressions = [] 1043 1044 for tokens in chunks: 1045 self._index = -1 1046 self._tokens = tokens 1047 self._advance() 1048 1049 expressions.append(parse_method(self)) 1050 1051 if self._index < len(self._tokens): 1052 self.raise_error("Invalid expression / Unexpected token") 1053 1054 self.check_errors() 1055 1056 return expressions 1057 1058 def check_errors(self) -> None: 1059 """Logs or raises any found errors, depending on the chosen error level setting.""" 1060 if self.error_level == ErrorLevel.WARN: 1061 for error in self.errors: 1062 logger.error(str(error)) 1063 elif self.error_level == ErrorLevel.RAISE and self.errors: 1064 raise ParseError( 1065 concat_messages(self.errors, self.max_errors), 1066 errors=merge_errors(self.errors), 1067 ) 1068 1069 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1070 """ 1071 Appends an error in the list of recorded errors or raises it, depending on the chosen 1072 error level setting. 1073 """ 1074 token = token or self._curr or self._prev or Token.string("") 1075 start = token.start 1076 end = token.end + 1 1077 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1078 highlight = self.sql[start:end] 1079 end_context = self.sql[end : end + self.error_message_context] 1080 1081 error = ParseError.new( 1082 f"{message}. Line {token.line}, Col: {token.col}.\n" 1083 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1084 description=message, 1085 line=token.line, 1086 col=token.col, 1087 start_context=start_context, 1088 highlight=highlight, 1089 end_context=end_context, 1090 ) 1091 1092 if self.error_level == ErrorLevel.IMMEDIATE: 1093 raise error 1094 1095 self.errors.append(error) 1096 1097 def expression( 1098 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1099 ) -> E: 1100 """ 1101 Creates a new, validated Expression. 1102 1103 Args: 1104 exp_class: The expression class to instantiate. 1105 comments: An optional list of comments to attach to the expression. 1106 kwargs: The arguments to set for the expression along with their respective values. 1107 1108 Returns: 1109 The target expression. 1110 """ 1111 instance = exp_class(**kwargs) 1112 instance.add_comments(comments) if comments else self._add_comments(instance) 1113 return self.validate_expression(instance) 1114 1115 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1116 if expression and self._prev_comments: 1117 expression.add_comments(self._prev_comments) 1118 self._prev_comments = None 1119 1120 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1121 """ 1122 Validates an Expression, making sure that all its mandatory arguments are set. 1123 1124 Args: 1125 expression: The expression to validate. 1126 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1127 1128 Returns: 1129 The validated expression. 1130 """ 1131 if self.error_level != ErrorLevel.IGNORE: 1132 for error_message in expression.error_messages(args): 1133 self.raise_error(error_message) 1134 1135 return expression 1136 1137 def _find_sql(self, start: Token, end: Token) -> str: 1138 return self.sql[start.start : end.end + 1] 1139 1140 def _advance(self, times: int = 1) -> None: 1141 self._index += times 1142 self._curr = seq_get(self._tokens, self._index) 1143 self._next = seq_get(self._tokens, self._index + 1) 1144 1145 if self._index > 0: 1146 self._prev = self._tokens[self._index - 1] 1147 self._prev_comments = self._prev.comments 1148 else: 1149 self._prev = None 1150 self._prev_comments = None 1151 1152 def _retreat(self, index: int) -> None: 1153 if index != self._index: 1154 self._advance(index - self._index) 1155 1156 def _parse_command(self) -> exp.Command: 1157 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1158 1159 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1160 start = self._prev 1161 exists = self._parse_exists() if allow_exists else None 1162 1163 self._match(TokenType.ON) 1164 1165 kind = self._match_set(self.CREATABLES) and self._prev 1166 if not kind: 1167 return self._parse_as_command(start) 1168 1169 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1170 this = self._parse_user_defined_function(kind=kind.token_type) 1171 elif kind.token_type == TokenType.TABLE: 1172 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1173 elif kind.token_type == TokenType.COLUMN: 1174 this = self._parse_column() 1175 else: 1176 this = self._parse_id_var() 1177 1178 self._match(TokenType.IS) 1179 1180 return self.expression( 1181 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1182 ) 1183 1184 def _parse_to_table( 1185 self, 1186 ) -> exp.ToTableProperty: 1187 table = self._parse_table_parts(schema=True) 1188 return self.expression(exp.ToTableProperty, this=table) 1189 1190 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1191 def _parse_ttl(self) -> exp.Expression: 1192 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1193 this = self._parse_bitwise() 1194 1195 if self._match_text_seq("DELETE"): 1196 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1197 if self._match_text_seq("RECOMPRESS"): 1198 return self.expression( 1199 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1200 ) 1201 if self._match_text_seq("TO", "DISK"): 1202 return self.expression( 1203 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1204 ) 1205 if self._match_text_seq("TO", "VOLUME"): 1206 return self.expression( 1207 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1208 ) 1209 1210 return this 1211 1212 expressions = self._parse_csv(_parse_ttl_action) 1213 where = self._parse_where() 1214 group = self._parse_group() 1215 1216 aggregates = None 1217 if group and self._match(TokenType.SET): 1218 aggregates = self._parse_csv(self._parse_set_item) 1219 1220 return self.expression( 1221 exp.MergeTreeTTL, 1222 expressions=expressions, 1223 where=where, 1224 group=group, 1225 aggregates=aggregates, 1226 ) 1227 1228 def _parse_statement(self) -> t.Optional[exp.Expression]: 1229 if self._curr is None: 1230 return None 1231 1232 if self._match_set(self.STATEMENT_PARSERS): 1233 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1234 1235 if self._match_set(Tokenizer.COMMANDS): 1236 return self._parse_command() 1237 1238 expression = self._parse_expression() 1239 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1240 return self._parse_query_modifiers(expression) 1241 1242 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1243 start = self._prev 1244 temporary = self._match(TokenType.TEMPORARY) 1245 materialized = self._match_text_seq("MATERIALIZED") 1246 1247 kind = self._match_set(self.CREATABLES) and self._prev.text 1248 if not kind: 1249 return self._parse_as_command(start) 1250 1251 return self.expression( 1252 exp.Drop, 1253 comments=start.comments, 1254 exists=exists or self._parse_exists(), 1255 this=self._parse_table(schema=True), 1256 kind=kind, 1257 temporary=temporary, 1258 materialized=materialized, 1259 cascade=self._match_text_seq("CASCADE"), 1260 constraints=self._match_text_seq("CONSTRAINTS"), 1261 purge=self._match_text_seq("PURGE"), 1262 ) 1263 1264 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1265 return ( 1266 self._match_text_seq("IF") 1267 and (not not_ or self._match(TokenType.NOT)) 1268 and self._match(TokenType.EXISTS) 1269 ) 1270 1271 def _parse_create(self) -> exp.Create | exp.Command: 1272 # Note: this can't be None because we've matched a statement parser 1273 start = self._prev 1274 comments = self._prev_comments 1275 1276 replace = start.text.upper() == "REPLACE" or self._match_pair( 1277 TokenType.OR, TokenType.REPLACE 1278 ) 1279 unique = self._match(TokenType.UNIQUE) 1280 1281 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1282 self._advance() 1283 1284 properties = None 1285 create_token = self._match_set(self.CREATABLES) and self._prev 1286 1287 if not create_token: 1288 # exp.Properties.Location.POST_CREATE 1289 properties = self._parse_properties() 1290 create_token = self._match_set(self.CREATABLES) and self._prev 1291 1292 if not properties or not create_token: 1293 return self._parse_as_command(start) 1294 1295 exists = self._parse_exists(not_=True) 1296 this = None 1297 expression: t.Optional[exp.Expression] = None 1298 indexes = None 1299 no_schema_binding = None 1300 begin = None 1301 end = None 1302 clone = None 1303 1304 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1305 nonlocal properties 1306 if properties and temp_props: 1307 properties.expressions.extend(temp_props.expressions) 1308 elif temp_props: 1309 properties = temp_props 1310 1311 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1312 this = self._parse_user_defined_function(kind=create_token.token_type) 1313 1314 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1315 extend_props(self._parse_properties()) 1316 1317 self._match(TokenType.ALIAS) 1318 1319 if self._match(TokenType.COMMAND): 1320 expression = self._parse_as_command(self._prev) 1321 else: 1322 begin = self._match(TokenType.BEGIN) 1323 return_ = self._match_text_seq("RETURN") 1324 1325 if self._match(TokenType.STRING, advance=False): 1326 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1327 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1328 expression = self._parse_string() 1329 extend_props(self._parse_properties()) 1330 else: 1331 expression = self._parse_statement() 1332 1333 end = self._match_text_seq("END") 1334 1335 if return_: 1336 expression = self.expression(exp.Return, this=expression) 1337 elif create_token.token_type == TokenType.INDEX: 1338 this = self._parse_index(index=self._parse_id_var()) 1339 elif create_token.token_type in self.DB_CREATABLES: 1340 table_parts = self._parse_table_parts(schema=True) 1341 1342 # exp.Properties.Location.POST_NAME 1343 self._match(TokenType.COMMA) 1344 extend_props(self._parse_properties(before=True)) 1345 1346 this = self._parse_schema(this=table_parts) 1347 1348 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1349 extend_props(self._parse_properties()) 1350 1351 self._match(TokenType.ALIAS) 1352 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1353 # exp.Properties.Location.POST_ALIAS 1354 extend_props(self._parse_properties()) 1355 1356 expression = self._parse_ddl_select() 1357 1358 if create_token.token_type == TokenType.TABLE: 1359 # exp.Properties.Location.POST_EXPRESSION 1360 extend_props(self._parse_properties()) 1361 1362 indexes = [] 1363 while True: 1364 index = self._parse_index() 1365 1366 # exp.Properties.Location.POST_INDEX 1367 extend_props(self._parse_properties()) 1368 1369 if not index: 1370 break 1371 else: 1372 self._match(TokenType.COMMA) 1373 indexes.append(index) 1374 elif create_token.token_type == TokenType.VIEW: 1375 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1376 no_schema_binding = True 1377 1378 shallow = self._match_text_seq("SHALLOW") 1379 1380 if self._match_texts(self.CLONE_KEYWORDS): 1381 copy = self._prev.text.lower() == "copy" 1382 clone = self._parse_table(schema=True) 1383 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1384 clone_kind = ( 1385 self._match(TokenType.L_PAREN) 1386 and self._match_texts(self.CLONE_KINDS) 1387 and self._prev.text.upper() 1388 ) 1389 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1390 self._match(TokenType.R_PAREN) 1391 clone = self.expression( 1392 exp.Clone, 1393 this=clone, 1394 when=when, 1395 kind=clone_kind, 1396 shallow=shallow, 1397 expression=clone_expression, 1398 copy=copy, 1399 ) 1400 1401 return self.expression( 1402 exp.Create, 1403 comments=comments, 1404 this=this, 1405 kind=create_token.text, 1406 replace=replace, 1407 unique=unique, 1408 expression=expression, 1409 exists=exists, 1410 properties=properties, 1411 indexes=indexes, 1412 no_schema_binding=no_schema_binding, 1413 begin=begin, 1414 end=end, 1415 clone=clone, 1416 ) 1417 1418 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1419 # only used for teradata currently 1420 self._match(TokenType.COMMA) 1421 1422 kwargs = { 1423 "no": self._match_text_seq("NO"), 1424 "dual": self._match_text_seq("DUAL"), 1425 "before": self._match_text_seq("BEFORE"), 1426 "default": self._match_text_seq("DEFAULT"), 1427 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1428 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1429 "after": self._match_text_seq("AFTER"), 1430 "minimum": self._match_texts(("MIN", "MINIMUM")), 1431 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1432 } 1433 1434 if self._match_texts(self.PROPERTY_PARSERS): 1435 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1436 try: 1437 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1438 except TypeError: 1439 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1440 1441 return None 1442 1443 def _parse_property(self) -> t.Optional[exp.Expression]: 1444 if self._match_texts(self.PROPERTY_PARSERS): 1445 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1446 1447 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1448 return self._parse_character_set(default=True) 1449 1450 if self._match_text_seq("COMPOUND", "SORTKEY"): 1451 return self._parse_sortkey(compound=True) 1452 1453 if self._match_text_seq("SQL", "SECURITY"): 1454 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1455 1456 index = self._index 1457 key = self._parse_column() 1458 1459 if not self._match(TokenType.EQ): 1460 self._retreat(index) 1461 return None 1462 1463 return self.expression( 1464 exp.Property, 1465 this=key.to_dot() if isinstance(key, exp.Column) else key, 1466 value=self._parse_column() or self._parse_var(any_token=True), 1467 ) 1468 1469 def _parse_stored(self) -> exp.FileFormatProperty: 1470 self._match(TokenType.ALIAS) 1471 1472 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1473 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1474 1475 return self.expression( 1476 exp.FileFormatProperty, 1477 this=self.expression( 1478 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1479 ) 1480 if input_format or output_format 1481 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1482 ) 1483 1484 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1485 self._match(TokenType.EQ) 1486 self._match(TokenType.ALIAS) 1487 return self.expression(exp_class, this=self._parse_field()) 1488 1489 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1490 properties = [] 1491 while True: 1492 if before: 1493 prop = self._parse_property_before() 1494 else: 1495 prop = self._parse_property() 1496 1497 if not prop: 1498 break 1499 for p in ensure_list(prop): 1500 properties.append(p) 1501 1502 if properties: 1503 return self.expression(exp.Properties, expressions=properties) 1504 1505 return None 1506 1507 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1508 return self.expression( 1509 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1510 ) 1511 1512 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1513 if self._index >= 2: 1514 pre_volatile_token = self._tokens[self._index - 2] 1515 else: 1516 pre_volatile_token = None 1517 1518 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1519 return exp.VolatileProperty() 1520 1521 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1522 1523 def _parse_with_property( 1524 self, 1525 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1526 if self._match(TokenType.L_PAREN, advance=False): 1527 return self._parse_wrapped_csv(self._parse_property) 1528 1529 if self._match_text_seq("JOURNAL"): 1530 return self._parse_withjournaltable() 1531 1532 if self._match_text_seq("DATA"): 1533 return self._parse_withdata(no=False) 1534 elif self._match_text_seq("NO", "DATA"): 1535 return self._parse_withdata(no=True) 1536 1537 if not self._next: 1538 return None 1539 1540 return self._parse_withisolatedloading() 1541 1542 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1543 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1544 self._match(TokenType.EQ) 1545 1546 user = self._parse_id_var() 1547 self._match(TokenType.PARAMETER) 1548 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1549 1550 if not user or not host: 1551 return None 1552 1553 return exp.DefinerProperty(this=f"{user}@{host}") 1554 1555 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1556 self._match(TokenType.TABLE) 1557 self._match(TokenType.EQ) 1558 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1559 1560 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1561 return self.expression(exp.LogProperty, no=no) 1562 1563 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1564 return self.expression(exp.JournalProperty, **kwargs) 1565 1566 def _parse_checksum(self) -> exp.ChecksumProperty: 1567 self._match(TokenType.EQ) 1568 1569 on = None 1570 if self._match(TokenType.ON): 1571 on = True 1572 elif self._match_text_seq("OFF"): 1573 on = False 1574 1575 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1576 1577 def _parse_cluster(self) -> exp.Cluster: 1578 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1579 1580 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1581 self._match_text_seq("BY") 1582 1583 self._match_l_paren() 1584 expressions = self._parse_csv(self._parse_column) 1585 self._match_r_paren() 1586 1587 if self._match_text_seq("SORTED", "BY"): 1588 self._match_l_paren() 1589 sorted_by = self._parse_csv(self._parse_ordered) 1590 self._match_r_paren() 1591 else: 1592 sorted_by = None 1593 1594 self._match(TokenType.INTO) 1595 buckets = self._parse_number() 1596 self._match_text_seq("BUCKETS") 1597 1598 return self.expression( 1599 exp.ClusteredByProperty, 1600 expressions=expressions, 1601 sorted_by=sorted_by, 1602 buckets=buckets, 1603 ) 1604 1605 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1606 if not self._match_text_seq("GRANTS"): 1607 self._retreat(self._index - 1) 1608 return None 1609 1610 return self.expression(exp.CopyGrantsProperty) 1611 1612 def _parse_freespace(self) -> exp.FreespaceProperty: 1613 self._match(TokenType.EQ) 1614 return self.expression( 1615 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1616 ) 1617 1618 def _parse_mergeblockratio( 1619 self, no: bool = False, default: bool = False 1620 ) -> exp.MergeBlockRatioProperty: 1621 if self._match(TokenType.EQ): 1622 return self.expression( 1623 exp.MergeBlockRatioProperty, 1624 this=self._parse_number(), 1625 percent=self._match(TokenType.PERCENT), 1626 ) 1627 1628 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1629 1630 def _parse_datablocksize( 1631 self, 1632 default: t.Optional[bool] = None, 1633 minimum: t.Optional[bool] = None, 1634 maximum: t.Optional[bool] = None, 1635 ) -> exp.DataBlocksizeProperty: 1636 self._match(TokenType.EQ) 1637 size = self._parse_number() 1638 1639 units = None 1640 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1641 units = self._prev.text 1642 1643 return self.expression( 1644 exp.DataBlocksizeProperty, 1645 size=size, 1646 units=units, 1647 default=default, 1648 minimum=minimum, 1649 maximum=maximum, 1650 ) 1651 1652 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1653 self._match(TokenType.EQ) 1654 always = self._match_text_seq("ALWAYS") 1655 manual = self._match_text_seq("MANUAL") 1656 never = self._match_text_seq("NEVER") 1657 default = self._match_text_seq("DEFAULT") 1658 1659 autotemp = None 1660 if self._match_text_seq("AUTOTEMP"): 1661 autotemp = self._parse_schema() 1662 1663 return self.expression( 1664 exp.BlockCompressionProperty, 1665 always=always, 1666 manual=manual, 1667 never=never, 1668 default=default, 1669 autotemp=autotemp, 1670 ) 1671 1672 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1673 no = self._match_text_seq("NO") 1674 concurrent = self._match_text_seq("CONCURRENT") 1675 self._match_text_seq("ISOLATED", "LOADING") 1676 for_all = self._match_text_seq("FOR", "ALL") 1677 for_insert = self._match_text_seq("FOR", "INSERT") 1678 for_none = self._match_text_seq("FOR", "NONE") 1679 return self.expression( 1680 exp.IsolatedLoadingProperty, 1681 no=no, 1682 concurrent=concurrent, 1683 for_all=for_all, 1684 for_insert=for_insert, 1685 for_none=for_none, 1686 ) 1687 1688 def _parse_locking(self) -> exp.LockingProperty: 1689 if self._match(TokenType.TABLE): 1690 kind = "TABLE" 1691 elif self._match(TokenType.VIEW): 1692 kind = "VIEW" 1693 elif self._match(TokenType.ROW): 1694 kind = "ROW" 1695 elif self._match_text_seq("DATABASE"): 1696 kind = "DATABASE" 1697 else: 1698 kind = None 1699 1700 if kind in ("DATABASE", "TABLE", "VIEW"): 1701 this = self._parse_table_parts() 1702 else: 1703 this = None 1704 1705 if self._match(TokenType.FOR): 1706 for_or_in = "FOR" 1707 elif self._match(TokenType.IN): 1708 for_or_in = "IN" 1709 else: 1710 for_or_in = None 1711 1712 if self._match_text_seq("ACCESS"): 1713 lock_type = "ACCESS" 1714 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1715 lock_type = "EXCLUSIVE" 1716 elif self._match_text_seq("SHARE"): 1717 lock_type = "SHARE" 1718 elif self._match_text_seq("READ"): 1719 lock_type = "READ" 1720 elif self._match_text_seq("WRITE"): 1721 lock_type = "WRITE" 1722 elif self._match_text_seq("CHECKSUM"): 1723 lock_type = "CHECKSUM" 1724 else: 1725 lock_type = None 1726 1727 override = self._match_text_seq("OVERRIDE") 1728 1729 return self.expression( 1730 exp.LockingProperty, 1731 this=this, 1732 kind=kind, 1733 for_or_in=for_or_in, 1734 lock_type=lock_type, 1735 override=override, 1736 ) 1737 1738 def _parse_partition_by(self) -> t.List[exp.Expression]: 1739 if self._match(TokenType.PARTITION_BY): 1740 return self._parse_csv(self._parse_conjunction) 1741 return [] 1742 1743 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1744 self._match(TokenType.EQ) 1745 return self.expression( 1746 exp.PartitionedByProperty, 1747 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1748 ) 1749 1750 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1751 if self._match_text_seq("AND", "STATISTICS"): 1752 statistics = True 1753 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1754 statistics = False 1755 else: 1756 statistics = None 1757 1758 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1759 1760 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1761 if self._match_text_seq("PRIMARY", "INDEX"): 1762 return exp.NoPrimaryIndexProperty() 1763 return None 1764 1765 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1766 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1767 return exp.OnCommitProperty() 1768 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1769 return exp.OnCommitProperty(delete=True) 1770 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1771 1772 def _parse_distkey(self) -> exp.DistKeyProperty: 1773 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1774 1775 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1776 table = self._parse_table(schema=True) 1777 1778 options = [] 1779 while self._match_texts(("INCLUDING", "EXCLUDING")): 1780 this = self._prev.text.upper() 1781 1782 id_var = self._parse_id_var() 1783 if not id_var: 1784 return None 1785 1786 options.append( 1787 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1788 ) 1789 1790 return self.expression(exp.LikeProperty, this=table, expressions=options) 1791 1792 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1793 return self.expression( 1794 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1795 ) 1796 1797 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1798 self._match(TokenType.EQ) 1799 return self.expression( 1800 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1801 ) 1802 1803 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1804 self._match_text_seq("WITH", "CONNECTION") 1805 return self.expression( 1806 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1807 ) 1808 1809 def _parse_returns(self) -> exp.ReturnsProperty: 1810 value: t.Optional[exp.Expression] 1811 is_table = self._match(TokenType.TABLE) 1812 1813 if is_table: 1814 if self._match(TokenType.LT): 1815 value = self.expression( 1816 exp.Schema, 1817 this="TABLE", 1818 expressions=self._parse_csv(self._parse_struct_types), 1819 ) 1820 if not self._match(TokenType.GT): 1821 self.raise_error("Expecting >") 1822 else: 1823 value = self._parse_schema(exp.var("TABLE")) 1824 else: 1825 value = self._parse_types() 1826 1827 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1828 1829 def _parse_describe(self) -> exp.Describe: 1830 kind = self._match_set(self.CREATABLES) and self._prev.text 1831 this = self._parse_table(schema=True) 1832 properties = self._parse_properties() 1833 expressions = properties.expressions if properties else None 1834 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1835 1836 def _parse_insert(self) -> exp.Insert: 1837 comments = ensure_list(self._prev_comments) 1838 overwrite = self._match(TokenType.OVERWRITE) 1839 ignore = self._match(TokenType.IGNORE) 1840 local = self._match_text_seq("LOCAL") 1841 alternative = None 1842 1843 if self._match_text_seq("DIRECTORY"): 1844 this: t.Optional[exp.Expression] = self.expression( 1845 exp.Directory, 1846 this=self._parse_var_or_string(), 1847 local=local, 1848 row_format=self._parse_row_format(match_row=True), 1849 ) 1850 else: 1851 if self._match(TokenType.OR): 1852 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1853 1854 self._match(TokenType.INTO) 1855 comments += ensure_list(self._prev_comments) 1856 self._match(TokenType.TABLE) 1857 this = self._parse_table(schema=True) 1858 1859 returning = self._parse_returning() 1860 1861 return self.expression( 1862 exp.Insert, 1863 comments=comments, 1864 this=this, 1865 by_name=self._match_text_seq("BY", "NAME"), 1866 exists=self._parse_exists(), 1867 partition=self._parse_partition(), 1868 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1869 and self._parse_conjunction(), 1870 expression=self._parse_ddl_select(), 1871 conflict=self._parse_on_conflict(), 1872 returning=returning or self._parse_returning(), 1873 overwrite=overwrite, 1874 alternative=alternative, 1875 ignore=ignore, 1876 ) 1877 1878 def _parse_kill(self) -> exp.Kill: 1879 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1880 1881 return self.expression( 1882 exp.Kill, 1883 this=self._parse_primary(), 1884 kind=kind, 1885 ) 1886 1887 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1888 conflict = self._match_text_seq("ON", "CONFLICT") 1889 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1890 1891 if not conflict and not duplicate: 1892 return None 1893 1894 nothing = None 1895 expressions = None 1896 key = None 1897 constraint = None 1898 1899 if conflict: 1900 if self._match_text_seq("ON", "CONSTRAINT"): 1901 constraint = self._parse_id_var() 1902 else: 1903 key = self._parse_csv(self._parse_value) 1904 1905 self._match_text_seq("DO") 1906 if self._match_text_seq("NOTHING"): 1907 nothing = True 1908 else: 1909 self._match(TokenType.UPDATE) 1910 self._match(TokenType.SET) 1911 expressions = self._parse_csv(self._parse_equality) 1912 1913 return self.expression( 1914 exp.OnConflict, 1915 duplicate=duplicate, 1916 expressions=expressions, 1917 nothing=nothing, 1918 key=key, 1919 constraint=constraint, 1920 ) 1921 1922 def _parse_returning(self) -> t.Optional[exp.Returning]: 1923 if not self._match(TokenType.RETURNING): 1924 return None 1925 return self.expression( 1926 exp.Returning, 1927 expressions=self._parse_csv(self._parse_expression), 1928 into=self._match(TokenType.INTO) and self._parse_table_part(), 1929 ) 1930 1931 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1932 if not self._match(TokenType.FORMAT): 1933 return None 1934 return self._parse_row_format() 1935 1936 def _parse_row_format( 1937 self, match_row: bool = False 1938 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1939 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1940 return None 1941 1942 if self._match_text_seq("SERDE"): 1943 this = self._parse_string() 1944 1945 serde_properties = None 1946 if self._match(TokenType.SERDE_PROPERTIES): 1947 serde_properties = self.expression( 1948 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1949 ) 1950 1951 return self.expression( 1952 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1953 ) 1954 1955 self._match_text_seq("DELIMITED") 1956 1957 kwargs = {} 1958 1959 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1960 kwargs["fields"] = self._parse_string() 1961 if self._match_text_seq("ESCAPED", "BY"): 1962 kwargs["escaped"] = self._parse_string() 1963 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1964 kwargs["collection_items"] = self._parse_string() 1965 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1966 kwargs["map_keys"] = self._parse_string() 1967 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1968 kwargs["lines"] = self._parse_string() 1969 if self._match_text_seq("NULL", "DEFINED", "AS"): 1970 kwargs["null"] = self._parse_string() 1971 1972 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1973 1974 def _parse_load(self) -> exp.LoadData | exp.Command: 1975 if self._match_text_seq("DATA"): 1976 local = self._match_text_seq("LOCAL") 1977 self._match_text_seq("INPATH") 1978 inpath = self._parse_string() 1979 overwrite = self._match(TokenType.OVERWRITE) 1980 self._match_pair(TokenType.INTO, TokenType.TABLE) 1981 1982 return self.expression( 1983 exp.LoadData, 1984 this=self._parse_table(schema=True), 1985 local=local, 1986 overwrite=overwrite, 1987 inpath=inpath, 1988 partition=self._parse_partition(), 1989 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1990 serde=self._match_text_seq("SERDE") and self._parse_string(), 1991 ) 1992 return self._parse_as_command(self._prev) 1993 1994 def _parse_delete(self) -> exp.Delete: 1995 # This handles MySQL's "Multiple-Table Syntax" 1996 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1997 tables = None 1998 comments = self._prev_comments 1999 if not self._match(TokenType.FROM, advance=False): 2000 tables = self._parse_csv(self._parse_table) or None 2001 2002 returning = self._parse_returning() 2003 2004 return self.expression( 2005 exp.Delete, 2006 comments=comments, 2007 tables=tables, 2008 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2009 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2010 where=self._parse_where(), 2011 returning=returning or self._parse_returning(), 2012 limit=self._parse_limit(), 2013 ) 2014 2015 def _parse_update(self) -> exp.Update: 2016 comments = self._prev_comments 2017 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2018 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2019 returning = self._parse_returning() 2020 return self.expression( 2021 exp.Update, 2022 comments=comments, 2023 **{ # type: ignore 2024 "this": this, 2025 "expressions": expressions, 2026 "from": self._parse_from(joins=True), 2027 "where": self._parse_where(), 2028 "returning": returning or self._parse_returning(), 2029 "order": self._parse_order(), 2030 "limit": self._parse_limit(), 2031 }, 2032 ) 2033 2034 def _parse_uncache(self) -> exp.Uncache: 2035 if not self._match(TokenType.TABLE): 2036 self.raise_error("Expecting TABLE after UNCACHE") 2037 2038 return self.expression( 2039 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2040 ) 2041 2042 def _parse_cache(self) -> exp.Cache: 2043 lazy = self._match_text_seq("LAZY") 2044 self._match(TokenType.TABLE) 2045 table = self._parse_table(schema=True) 2046 2047 options = [] 2048 if self._match_text_seq("OPTIONS"): 2049 self._match_l_paren() 2050 k = self._parse_string() 2051 self._match(TokenType.EQ) 2052 v = self._parse_string() 2053 options = [k, v] 2054 self._match_r_paren() 2055 2056 self._match(TokenType.ALIAS) 2057 return self.expression( 2058 exp.Cache, 2059 this=table, 2060 lazy=lazy, 2061 options=options, 2062 expression=self._parse_select(nested=True), 2063 ) 2064 2065 def _parse_partition(self) -> t.Optional[exp.Partition]: 2066 if not self._match(TokenType.PARTITION): 2067 return None 2068 2069 return self.expression( 2070 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2071 ) 2072 2073 def _parse_value(self) -> exp.Tuple: 2074 if self._match(TokenType.L_PAREN): 2075 expressions = self._parse_csv(self._parse_conjunction) 2076 self._match_r_paren() 2077 return self.expression(exp.Tuple, expressions=expressions) 2078 2079 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2080 # https://prestodb.io/docs/current/sql/values.html 2081 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2082 2083 def _parse_projections(self) -> t.List[exp.Expression]: 2084 return self._parse_expressions() 2085 2086 def _parse_select( 2087 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2088 ) -> t.Optional[exp.Expression]: 2089 cte = self._parse_with() 2090 2091 if cte: 2092 this = self._parse_statement() 2093 2094 if not this: 2095 self.raise_error("Failed to parse any statement following CTE") 2096 return cte 2097 2098 if "with" in this.arg_types: 2099 this.set("with", cte) 2100 else: 2101 self.raise_error(f"{this.key} does not support CTE") 2102 this = cte 2103 2104 return this 2105 2106 # duckdb supports leading with FROM x 2107 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2108 2109 if self._match(TokenType.SELECT): 2110 comments = self._prev_comments 2111 2112 hint = self._parse_hint() 2113 all_ = self._match(TokenType.ALL) 2114 distinct = self._match_set(self.DISTINCT_TOKENS) 2115 2116 kind = ( 2117 self._match(TokenType.ALIAS) 2118 and self._match_texts(("STRUCT", "VALUE")) 2119 and self._prev.text 2120 ) 2121 2122 if distinct: 2123 distinct = self.expression( 2124 exp.Distinct, 2125 on=self._parse_value() if self._match(TokenType.ON) else None, 2126 ) 2127 2128 if all_ and distinct: 2129 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2130 2131 limit = self._parse_limit(top=True) 2132 projections = self._parse_projections() 2133 2134 this = self.expression( 2135 exp.Select, 2136 kind=kind, 2137 hint=hint, 2138 distinct=distinct, 2139 expressions=projections, 2140 limit=limit, 2141 ) 2142 this.comments = comments 2143 2144 into = self._parse_into() 2145 if into: 2146 this.set("into", into) 2147 2148 if not from_: 2149 from_ = self._parse_from() 2150 2151 if from_: 2152 this.set("from", from_) 2153 2154 this = self._parse_query_modifiers(this) 2155 elif (table or nested) and self._match(TokenType.L_PAREN): 2156 if self._match(TokenType.PIVOT): 2157 this = self._parse_simplified_pivot() 2158 elif self._match(TokenType.FROM): 2159 this = exp.select("*").from_( 2160 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2161 ) 2162 else: 2163 this = self._parse_table() if table else self._parse_select(nested=True) 2164 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2165 2166 self._match_r_paren() 2167 2168 # We return early here so that the UNION isn't attached to the subquery by the 2169 # following call to _parse_set_operations, but instead becomes the parent node 2170 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2171 elif self._match(TokenType.VALUES): 2172 this = self.expression( 2173 exp.Values, 2174 expressions=self._parse_csv(self._parse_value), 2175 alias=self._parse_table_alias(), 2176 ) 2177 elif from_: 2178 this = exp.select("*").from_(from_.this, copy=False) 2179 else: 2180 this = None 2181 2182 return self._parse_set_operations(this) 2183 2184 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2185 if not skip_with_token and not self._match(TokenType.WITH): 2186 return None 2187 2188 comments = self._prev_comments 2189 recursive = self._match(TokenType.RECURSIVE) 2190 2191 expressions = [] 2192 while True: 2193 expressions.append(self._parse_cte()) 2194 2195 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2196 break 2197 else: 2198 self._match(TokenType.WITH) 2199 2200 return self.expression( 2201 exp.With, comments=comments, expressions=expressions, recursive=recursive 2202 ) 2203 2204 def _parse_cte(self) -> exp.CTE: 2205 alias = self._parse_table_alias() 2206 if not alias or not alias.this: 2207 self.raise_error("Expected CTE to have alias") 2208 2209 self._match(TokenType.ALIAS) 2210 return self.expression( 2211 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2212 ) 2213 2214 def _parse_table_alias( 2215 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2216 ) -> t.Optional[exp.TableAlias]: 2217 any_token = self._match(TokenType.ALIAS) 2218 alias = ( 2219 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2220 or self._parse_string_as_identifier() 2221 ) 2222 2223 index = self._index 2224 if self._match(TokenType.L_PAREN): 2225 columns = self._parse_csv(self._parse_function_parameter) 2226 self._match_r_paren() if columns else self._retreat(index) 2227 else: 2228 columns = None 2229 2230 if not alias and not columns: 2231 return None 2232 2233 return self.expression(exp.TableAlias, this=alias, columns=columns) 2234 2235 def _parse_subquery( 2236 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2237 ) -> t.Optional[exp.Subquery]: 2238 if not this: 2239 return None 2240 2241 return self.expression( 2242 exp.Subquery, 2243 this=this, 2244 pivots=self._parse_pivots(), 2245 alias=self._parse_table_alias() if parse_alias else None, 2246 ) 2247 2248 def _parse_query_modifiers( 2249 self, this: t.Optional[exp.Expression] 2250 ) -> t.Optional[exp.Expression]: 2251 if isinstance(this, self.MODIFIABLES): 2252 for join in iter(self._parse_join, None): 2253 this.append("joins", join) 2254 for lateral in iter(self._parse_lateral, None): 2255 this.append("laterals", lateral) 2256 2257 while True: 2258 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2259 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2260 key, expression = parser(self) 2261 2262 if expression: 2263 this.set(key, expression) 2264 if key == "limit": 2265 offset = expression.args.pop("offset", None) 2266 if offset: 2267 this.set("offset", exp.Offset(expression=offset)) 2268 continue 2269 break 2270 return this 2271 2272 def _parse_hint(self) -> t.Optional[exp.Hint]: 2273 if self._match(TokenType.HINT): 2274 hints = [] 2275 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2276 hints.extend(hint) 2277 2278 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2279 self.raise_error("Expected */ after HINT") 2280 2281 return self.expression(exp.Hint, expressions=hints) 2282 2283 return None 2284 2285 def _parse_into(self) -> t.Optional[exp.Into]: 2286 if not self._match(TokenType.INTO): 2287 return None 2288 2289 temp = self._match(TokenType.TEMPORARY) 2290 unlogged = self._match_text_seq("UNLOGGED") 2291 self._match(TokenType.TABLE) 2292 2293 return self.expression( 2294 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2295 ) 2296 2297 def _parse_from( 2298 self, joins: bool = False, skip_from_token: bool = False 2299 ) -> t.Optional[exp.From]: 2300 if not skip_from_token and not self._match(TokenType.FROM): 2301 return None 2302 2303 return self.expression( 2304 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2305 ) 2306 2307 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2308 if not self._match(TokenType.MATCH_RECOGNIZE): 2309 return None 2310 2311 self._match_l_paren() 2312 2313 partition = self._parse_partition_by() 2314 order = self._parse_order() 2315 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2316 2317 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2318 rows = exp.var("ONE ROW PER MATCH") 2319 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2320 text = "ALL ROWS PER MATCH" 2321 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2322 text += f" SHOW EMPTY MATCHES" 2323 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2324 text += f" OMIT EMPTY MATCHES" 2325 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2326 text += f" WITH UNMATCHED ROWS" 2327 rows = exp.var(text) 2328 else: 2329 rows = None 2330 2331 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2332 text = "AFTER MATCH SKIP" 2333 if self._match_text_seq("PAST", "LAST", "ROW"): 2334 text += f" PAST LAST ROW" 2335 elif self._match_text_seq("TO", "NEXT", "ROW"): 2336 text += f" TO NEXT ROW" 2337 elif self._match_text_seq("TO", "FIRST"): 2338 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2339 elif self._match_text_seq("TO", "LAST"): 2340 text += f" TO LAST {self._advance_any().text}" # type: ignore 2341 after = exp.var(text) 2342 else: 2343 after = None 2344 2345 if self._match_text_seq("PATTERN"): 2346 self._match_l_paren() 2347 2348 if not self._curr: 2349 self.raise_error("Expecting )", self._curr) 2350 2351 paren = 1 2352 start = self._curr 2353 2354 while self._curr and paren > 0: 2355 if self._curr.token_type == TokenType.L_PAREN: 2356 paren += 1 2357 if self._curr.token_type == TokenType.R_PAREN: 2358 paren -= 1 2359 2360 end = self._prev 2361 self._advance() 2362 2363 if paren > 0: 2364 self.raise_error("Expecting )", self._curr) 2365 2366 pattern = exp.var(self._find_sql(start, end)) 2367 else: 2368 pattern = None 2369 2370 define = ( 2371 self._parse_csv( 2372 lambda: self.expression( 2373 exp.Alias, 2374 alias=self._parse_id_var(any_token=True), 2375 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2376 ) 2377 ) 2378 if self._match_text_seq("DEFINE") 2379 else None 2380 ) 2381 2382 self._match_r_paren() 2383 2384 return self.expression( 2385 exp.MatchRecognize, 2386 partition_by=partition, 2387 order=order, 2388 measures=measures, 2389 rows=rows, 2390 after=after, 2391 pattern=pattern, 2392 define=define, 2393 alias=self._parse_table_alias(), 2394 ) 2395 2396 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2397 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2398 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2399 2400 if outer_apply or cross_apply: 2401 this = self._parse_select(table=True) 2402 view = None 2403 outer = not cross_apply 2404 elif self._match(TokenType.LATERAL): 2405 this = self._parse_select(table=True) 2406 view = self._match(TokenType.VIEW) 2407 outer = self._match(TokenType.OUTER) 2408 else: 2409 return None 2410 2411 if not this: 2412 this = ( 2413 self._parse_unnest() 2414 or self._parse_function() 2415 or self._parse_id_var(any_token=False) 2416 ) 2417 2418 while self._match(TokenType.DOT): 2419 this = exp.Dot( 2420 this=this, 2421 expression=self._parse_function() or self._parse_id_var(any_token=False), 2422 ) 2423 2424 if view: 2425 table = self._parse_id_var(any_token=False) 2426 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2427 table_alias: t.Optional[exp.TableAlias] = self.expression( 2428 exp.TableAlias, this=table, columns=columns 2429 ) 2430 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2431 # We move the alias from the lateral's child node to the lateral itself 2432 table_alias = this.args["alias"].pop() 2433 else: 2434 table_alias = self._parse_table_alias() 2435 2436 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2437 2438 def _parse_join_parts( 2439 self, 2440 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2441 return ( 2442 self._match_set(self.JOIN_METHODS) and self._prev, 2443 self._match_set(self.JOIN_SIDES) and self._prev, 2444 self._match_set(self.JOIN_KINDS) and self._prev, 2445 ) 2446 2447 def _parse_join( 2448 self, skip_join_token: bool = False, parse_bracket: bool = False 2449 ) -> t.Optional[exp.Join]: 2450 if self._match(TokenType.COMMA): 2451 return self.expression(exp.Join, this=self._parse_table()) 2452 2453 index = self._index 2454 method, side, kind = self._parse_join_parts() 2455 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2456 join = self._match(TokenType.JOIN) 2457 2458 if not skip_join_token and not join: 2459 self._retreat(index) 2460 kind = None 2461 method = None 2462 side = None 2463 2464 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2465 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2466 2467 if not skip_join_token and not join and not outer_apply and not cross_apply: 2468 return None 2469 2470 if outer_apply: 2471 side = Token(TokenType.LEFT, "LEFT") 2472 2473 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2474 2475 if method: 2476 kwargs["method"] = method.text 2477 if side: 2478 kwargs["side"] = side.text 2479 if kind: 2480 kwargs["kind"] = kind.text 2481 if hint: 2482 kwargs["hint"] = hint 2483 2484 if self._match(TokenType.ON): 2485 kwargs["on"] = self._parse_conjunction() 2486 elif self._match(TokenType.USING): 2487 kwargs["using"] = self._parse_wrapped_id_vars() 2488 elif not (kind and kind.token_type == TokenType.CROSS): 2489 index = self._index 2490 join = self._parse_join() 2491 2492 if join and self._match(TokenType.ON): 2493 kwargs["on"] = self._parse_conjunction() 2494 elif join and self._match(TokenType.USING): 2495 kwargs["using"] = self._parse_wrapped_id_vars() 2496 else: 2497 join = None 2498 self._retreat(index) 2499 2500 kwargs["this"].set("joins", [join] if join else None) 2501 2502 comments = [c for token in (method, side, kind) if token for c in token.comments] 2503 return self.expression(exp.Join, comments=comments, **kwargs) 2504 2505 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2506 this = self._parse_conjunction() 2507 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2508 return this 2509 2510 opclass = self._parse_var(any_token=True) 2511 if opclass: 2512 return self.expression(exp.Opclass, this=this, expression=opclass) 2513 2514 return this 2515 2516 def _parse_index( 2517 self, 2518 index: t.Optional[exp.Expression] = None, 2519 ) -> t.Optional[exp.Index]: 2520 if index: 2521 unique = None 2522 primary = None 2523 amp = None 2524 2525 self._match(TokenType.ON) 2526 self._match(TokenType.TABLE) # hive 2527 table = self._parse_table_parts(schema=True) 2528 else: 2529 unique = self._match(TokenType.UNIQUE) 2530 primary = self._match_text_seq("PRIMARY") 2531 amp = self._match_text_seq("AMP") 2532 2533 if not self._match(TokenType.INDEX): 2534 return None 2535 2536 index = self._parse_id_var() 2537 table = None 2538 2539 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2540 2541 if self._match(TokenType.L_PAREN, advance=False): 2542 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2543 else: 2544 columns = None 2545 2546 return self.expression( 2547 exp.Index, 2548 this=index, 2549 table=table, 2550 using=using, 2551 columns=columns, 2552 unique=unique, 2553 primary=primary, 2554 amp=amp, 2555 partition_by=self._parse_partition_by(), 2556 where=self._parse_where(), 2557 ) 2558 2559 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2560 hints: t.List[exp.Expression] = [] 2561 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2562 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2563 hints.append( 2564 self.expression( 2565 exp.WithTableHint, 2566 expressions=self._parse_csv( 2567 lambda: self._parse_function() or self._parse_var(any_token=True) 2568 ), 2569 ) 2570 ) 2571 self._match_r_paren() 2572 else: 2573 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2574 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2575 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2576 2577 self._match_texts({"INDEX", "KEY"}) 2578 if self._match(TokenType.FOR): 2579 hint.set("target", self._advance_any() and self._prev.text.upper()) 2580 2581 hint.set("expressions", self._parse_wrapped_id_vars()) 2582 hints.append(hint) 2583 2584 return hints or None 2585 2586 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2587 return ( 2588 (not schema and self._parse_function(optional_parens=False)) 2589 or self._parse_id_var(any_token=False) 2590 or self._parse_string_as_identifier() 2591 or self._parse_placeholder() 2592 ) 2593 2594 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2595 catalog = None 2596 db = None 2597 table = self._parse_table_part(schema=schema) 2598 2599 while self._match(TokenType.DOT): 2600 if catalog: 2601 # This allows nesting the table in arbitrarily many dot expressions if needed 2602 table = self.expression( 2603 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2604 ) 2605 else: 2606 catalog = db 2607 db = table 2608 table = self._parse_table_part(schema=schema) 2609 2610 if not table: 2611 self.raise_error(f"Expected table name but got {self._curr}") 2612 2613 return self.expression( 2614 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2615 ) 2616 2617 def _parse_table( 2618 self, 2619 schema: bool = False, 2620 joins: bool = False, 2621 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2622 parse_bracket: bool = False, 2623 ) -> t.Optional[exp.Expression]: 2624 lateral = self._parse_lateral() 2625 if lateral: 2626 return lateral 2627 2628 unnest = self._parse_unnest() 2629 if unnest: 2630 return unnest 2631 2632 values = self._parse_derived_table_values() 2633 if values: 2634 return values 2635 2636 subquery = self._parse_select(table=True) 2637 if subquery: 2638 if not subquery.args.get("pivots"): 2639 subquery.set("pivots", self._parse_pivots()) 2640 return subquery 2641 2642 bracket = parse_bracket and self._parse_bracket(None) 2643 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2644 this = t.cast( 2645 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2646 ) 2647 2648 if schema: 2649 return self._parse_schema(this=this) 2650 2651 version = self._parse_version() 2652 2653 if version: 2654 this.set("version", version) 2655 2656 if self.ALIAS_POST_TABLESAMPLE: 2657 table_sample = self._parse_table_sample() 2658 2659 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2660 if alias: 2661 this.set("alias", alias) 2662 2663 if self._match_text_seq("AT"): 2664 this.set("index", self._parse_id_var()) 2665 2666 this.set("hints", self._parse_table_hints()) 2667 2668 if not this.args.get("pivots"): 2669 this.set("pivots", self._parse_pivots()) 2670 2671 if not self.ALIAS_POST_TABLESAMPLE: 2672 table_sample = self._parse_table_sample() 2673 2674 if table_sample: 2675 table_sample.set("this", this) 2676 this = table_sample 2677 2678 if joins: 2679 for join in iter(self._parse_join, None): 2680 this.append("joins", join) 2681 2682 return this 2683 2684 def _parse_version(self) -> t.Optional[exp.Version]: 2685 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2686 this = "TIMESTAMP" 2687 elif self._match(TokenType.VERSION_SNAPSHOT): 2688 this = "VERSION" 2689 else: 2690 return None 2691 2692 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2693 kind = self._prev.text.upper() 2694 start = self._parse_bitwise() 2695 self._match_texts(("TO", "AND")) 2696 end = self._parse_bitwise() 2697 expression: t.Optional[exp.Expression] = self.expression( 2698 exp.Tuple, expressions=[start, end] 2699 ) 2700 elif self._match_text_seq("CONTAINED", "IN"): 2701 kind = "CONTAINED IN" 2702 expression = self.expression( 2703 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2704 ) 2705 elif self._match(TokenType.ALL): 2706 kind = "ALL" 2707 expression = None 2708 else: 2709 self._match_text_seq("AS", "OF") 2710 kind = "AS OF" 2711 expression = self._parse_type() 2712 2713 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2714 2715 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2716 if not self._match(TokenType.UNNEST): 2717 return None 2718 2719 expressions = self._parse_wrapped_csv(self._parse_type) 2720 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2721 2722 alias = self._parse_table_alias() if with_alias else None 2723 2724 if alias: 2725 if self.UNNEST_COLUMN_ONLY: 2726 if alias.args.get("columns"): 2727 self.raise_error("Unexpected extra column alias in unnest.") 2728 2729 alias.set("columns", [alias.this]) 2730 alias.set("this", None) 2731 2732 columns = alias.args.get("columns") or [] 2733 if offset and len(expressions) < len(columns): 2734 offset = columns.pop() 2735 2736 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2737 self._match(TokenType.ALIAS) 2738 offset = self._parse_id_var( 2739 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2740 ) or exp.to_identifier("offset") 2741 2742 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2743 2744 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2745 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2746 if not is_derived and not self._match(TokenType.VALUES): 2747 return None 2748 2749 expressions = self._parse_csv(self._parse_value) 2750 alias = self._parse_table_alias() 2751 2752 if is_derived: 2753 self._match_r_paren() 2754 2755 return self.expression( 2756 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2757 ) 2758 2759 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2760 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2761 as_modifier and self._match_text_seq("USING", "SAMPLE") 2762 ): 2763 return None 2764 2765 bucket_numerator = None 2766 bucket_denominator = None 2767 bucket_field = None 2768 percent = None 2769 rows = None 2770 size = None 2771 seed = None 2772 2773 kind = ( 2774 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2775 ) 2776 method = self._parse_var(tokens=(TokenType.ROW,)) 2777 2778 matched_l_paren = self._match(TokenType.L_PAREN) 2779 2780 if self.TABLESAMPLE_CSV: 2781 num = None 2782 expressions = self._parse_csv(self._parse_primary) 2783 else: 2784 expressions = None 2785 num = ( 2786 self._parse_factor() 2787 if self._match(TokenType.NUMBER, advance=False) 2788 else self._parse_primary() 2789 ) 2790 2791 if self._match_text_seq("BUCKET"): 2792 bucket_numerator = self._parse_number() 2793 self._match_text_seq("OUT", "OF") 2794 bucket_denominator = bucket_denominator = self._parse_number() 2795 self._match(TokenType.ON) 2796 bucket_field = self._parse_field() 2797 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2798 percent = num 2799 elif self._match(TokenType.ROWS): 2800 rows = num 2801 elif num: 2802 size = num 2803 2804 if matched_l_paren: 2805 self._match_r_paren() 2806 2807 if self._match(TokenType.L_PAREN): 2808 method = self._parse_var() 2809 seed = self._match(TokenType.COMMA) and self._parse_number() 2810 self._match_r_paren() 2811 elif self._match_texts(("SEED", "REPEATABLE")): 2812 seed = self._parse_wrapped(self._parse_number) 2813 2814 return self.expression( 2815 exp.TableSample, 2816 expressions=expressions, 2817 method=method, 2818 bucket_numerator=bucket_numerator, 2819 bucket_denominator=bucket_denominator, 2820 bucket_field=bucket_field, 2821 percent=percent, 2822 rows=rows, 2823 size=size, 2824 seed=seed, 2825 kind=kind, 2826 ) 2827 2828 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2829 return list(iter(self._parse_pivot, None)) or None 2830 2831 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2832 return list(iter(self._parse_join, None)) or None 2833 2834 # https://duckdb.org/docs/sql/statements/pivot 2835 def _parse_simplified_pivot(self) -> exp.Pivot: 2836 def _parse_on() -> t.Optional[exp.Expression]: 2837 this = self._parse_bitwise() 2838 return self._parse_in(this) if self._match(TokenType.IN) else this 2839 2840 this = self._parse_table() 2841 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2842 using = self._match(TokenType.USING) and self._parse_csv( 2843 lambda: self._parse_alias(self._parse_function()) 2844 ) 2845 group = self._parse_group() 2846 return self.expression( 2847 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2848 ) 2849 2850 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2851 index = self._index 2852 include_nulls = None 2853 2854 if self._match(TokenType.PIVOT): 2855 unpivot = False 2856 elif self._match(TokenType.UNPIVOT): 2857 unpivot = True 2858 2859 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2860 if self._match_text_seq("INCLUDE", "NULLS"): 2861 include_nulls = True 2862 elif self._match_text_seq("EXCLUDE", "NULLS"): 2863 include_nulls = False 2864 else: 2865 return None 2866 2867 expressions = [] 2868 field = None 2869 2870 if not self._match(TokenType.L_PAREN): 2871 self._retreat(index) 2872 return None 2873 2874 if unpivot: 2875 expressions = self._parse_csv(self._parse_column) 2876 else: 2877 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2878 2879 if not expressions: 2880 self.raise_error("Failed to parse PIVOT's aggregation list") 2881 2882 if not self._match(TokenType.FOR): 2883 self.raise_error("Expecting FOR") 2884 2885 value = self._parse_column() 2886 2887 if not self._match(TokenType.IN): 2888 self.raise_error("Expecting IN") 2889 2890 field = self._parse_in(value, alias=True) 2891 2892 self._match_r_paren() 2893 2894 pivot = self.expression( 2895 exp.Pivot, 2896 expressions=expressions, 2897 field=field, 2898 unpivot=unpivot, 2899 include_nulls=include_nulls, 2900 ) 2901 2902 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2903 pivot.set("alias", self._parse_table_alias()) 2904 2905 if not unpivot: 2906 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2907 2908 columns: t.List[exp.Expression] = [] 2909 for fld in pivot.args["field"].expressions: 2910 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2911 for name in names: 2912 if self.PREFIXED_PIVOT_COLUMNS: 2913 name = f"{name}_{field_name}" if name else field_name 2914 else: 2915 name = f"{field_name}_{name}" if name else field_name 2916 2917 columns.append(exp.to_identifier(name)) 2918 2919 pivot.set("columns", columns) 2920 2921 return pivot 2922 2923 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2924 return [agg.alias for agg in aggregations] 2925 2926 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2927 if not skip_where_token and not self._match(TokenType.WHERE): 2928 return None 2929 2930 return self.expression( 2931 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2932 ) 2933 2934 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2935 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2936 return None 2937 2938 elements = defaultdict(list) 2939 2940 if self._match(TokenType.ALL): 2941 return self.expression(exp.Group, all=True) 2942 2943 while True: 2944 expressions = self._parse_csv(self._parse_conjunction) 2945 if expressions: 2946 elements["expressions"].extend(expressions) 2947 2948 grouping_sets = self._parse_grouping_sets() 2949 if grouping_sets: 2950 elements["grouping_sets"].extend(grouping_sets) 2951 2952 rollup = None 2953 cube = None 2954 totals = None 2955 2956 index = self._index 2957 with_ = self._match(TokenType.WITH) 2958 if self._match(TokenType.ROLLUP): 2959 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2960 elements["rollup"].extend(ensure_list(rollup)) 2961 2962 if self._match(TokenType.CUBE): 2963 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2964 elements["cube"].extend(ensure_list(cube)) 2965 2966 if self._match_text_seq("TOTALS"): 2967 totals = True 2968 elements["totals"] = True # type: ignore 2969 2970 if not (grouping_sets or rollup or cube or totals): 2971 if with_: 2972 self._retreat(index) 2973 break 2974 2975 return self.expression(exp.Group, **elements) # type: ignore 2976 2977 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2978 if not self._match(TokenType.GROUPING_SETS): 2979 return None 2980 2981 return self._parse_wrapped_csv(self._parse_grouping_set) 2982 2983 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2984 if self._match(TokenType.L_PAREN): 2985 grouping_set = self._parse_csv(self._parse_column) 2986 self._match_r_paren() 2987 return self.expression(exp.Tuple, expressions=grouping_set) 2988 2989 return self._parse_column() 2990 2991 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2992 if not skip_having_token and not self._match(TokenType.HAVING): 2993 return None 2994 return self.expression(exp.Having, this=self._parse_conjunction()) 2995 2996 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2997 if not self._match(TokenType.QUALIFY): 2998 return None 2999 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3000 3001 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3002 if skip_start_token: 3003 start = None 3004 elif self._match(TokenType.START_WITH): 3005 start = self._parse_conjunction() 3006 else: 3007 return None 3008 3009 self._match(TokenType.CONNECT_BY) 3010 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3011 exp.Prior, this=self._parse_bitwise() 3012 ) 3013 connect = self._parse_conjunction() 3014 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3015 3016 if not start and self._match(TokenType.START_WITH): 3017 start = self._parse_conjunction() 3018 3019 return self.expression(exp.Connect, start=start, connect=connect) 3020 3021 def _parse_order( 3022 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3023 ) -> t.Optional[exp.Expression]: 3024 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3025 return this 3026 3027 return self.expression( 3028 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3029 ) 3030 3031 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3032 if not self._match(token): 3033 return None 3034 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3035 3036 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3037 this = parse_method() if parse_method else self._parse_conjunction() 3038 3039 asc = self._match(TokenType.ASC) 3040 desc = self._match(TokenType.DESC) or (asc and False) 3041 3042 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3043 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3044 3045 nulls_first = is_nulls_first or False 3046 explicitly_null_ordered = is_nulls_first or is_nulls_last 3047 3048 if ( 3049 not explicitly_null_ordered 3050 and ( 3051 (not desc and self.NULL_ORDERING == "nulls_are_small") 3052 or (desc and self.NULL_ORDERING != "nulls_are_small") 3053 ) 3054 and self.NULL_ORDERING != "nulls_are_last" 3055 ): 3056 nulls_first = True 3057 3058 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3059 3060 def _parse_limit( 3061 self, this: t.Optional[exp.Expression] = None, top: bool = False 3062 ) -> t.Optional[exp.Expression]: 3063 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3064 comments = self._prev_comments 3065 if top: 3066 limit_paren = self._match(TokenType.L_PAREN) 3067 expression = self._parse_number() 3068 3069 if limit_paren: 3070 self._match_r_paren() 3071 else: 3072 expression = self._parse_term() 3073 3074 if self._match(TokenType.COMMA): 3075 offset = expression 3076 expression = self._parse_term() 3077 else: 3078 offset = None 3079 3080 limit_exp = self.expression( 3081 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3082 ) 3083 3084 return limit_exp 3085 3086 if self._match(TokenType.FETCH): 3087 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3088 direction = self._prev.text if direction else "FIRST" 3089 3090 count = self._parse_field(tokens=self.FETCH_TOKENS) 3091 percent = self._match(TokenType.PERCENT) 3092 3093 self._match_set((TokenType.ROW, TokenType.ROWS)) 3094 3095 only = self._match_text_seq("ONLY") 3096 with_ties = self._match_text_seq("WITH", "TIES") 3097 3098 if only and with_ties: 3099 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3100 3101 return self.expression( 3102 exp.Fetch, 3103 direction=direction, 3104 count=count, 3105 percent=percent, 3106 with_ties=with_ties, 3107 ) 3108 3109 return this 3110 3111 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3112 if not self._match(TokenType.OFFSET): 3113 return this 3114 3115 count = self._parse_term() 3116 self._match_set((TokenType.ROW, TokenType.ROWS)) 3117 return self.expression(exp.Offset, this=this, expression=count) 3118 3119 def _parse_locks(self) -> t.List[exp.Lock]: 3120 locks = [] 3121 while True: 3122 if self._match_text_seq("FOR", "UPDATE"): 3123 update = True 3124 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3125 "LOCK", "IN", "SHARE", "MODE" 3126 ): 3127 update = False 3128 else: 3129 break 3130 3131 expressions = None 3132 if self._match_text_seq("OF"): 3133 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3134 3135 wait: t.Optional[bool | exp.Expression] = None 3136 if self._match_text_seq("NOWAIT"): 3137 wait = True 3138 elif self._match_text_seq("WAIT"): 3139 wait = self._parse_primary() 3140 elif self._match_text_seq("SKIP", "LOCKED"): 3141 wait = False 3142 3143 locks.append( 3144 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3145 ) 3146 3147 return locks 3148 3149 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3150 if not self._match_set(self.SET_OPERATIONS): 3151 return this 3152 3153 token_type = self._prev.token_type 3154 3155 if token_type == TokenType.UNION: 3156 expression = exp.Union 3157 elif token_type == TokenType.EXCEPT: 3158 expression = exp.Except 3159 else: 3160 expression = exp.Intersect 3161 3162 return self.expression( 3163 expression, 3164 comments=self._prev.comments, 3165 this=this, 3166 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3167 by_name=self._match_text_seq("BY", "NAME"), 3168 expression=self._parse_set_operations(self._parse_select(nested=True)), 3169 ) 3170 3171 def _parse_expression(self) -> t.Optional[exp.Expression]: 3172 return self._parse_alias(self._parse_conjunction()) 3173 3174 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3175 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3176 3177 def _parse_equality(self) -> t.Optional[exp.Expression]: 3178 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3179 3180 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3181 return self._parse_tokens(self._parse_range, self.COMPARISON) 3182 3183 def _parse_range(self) -> t.Optional[exp.Expression]: 3184 this = self._parse_bitwise() 3185 negate = self._match(TokenType.NOT) 3186 3187 if self._match_set(self.RANGE_PARSERS): 3188 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3189 if not expression: 3190 return this 3191 3192 this = expression 3193 elif self._match(TokenType.ISNULL): 3194 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3195 3196 # Postgres supports ISNULL and NOTNULL for conditions. 3197 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3198 if self._match(TokenType.NOTNULL): 3199 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3200 this = self.expression(exp.Not, this=this) 3201 3202 if negate: 3203 this = self.expression(exp.Not, this=this) 3204 3205 if self._match(TokenType.IS): 3206 this = self._parse_is(this) 3207 3208 return this 3209 3210 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3211 index = self._index - 1 3212 negate = self._match(TokenType.NOT) 3213 3214 if self._match_text_seq("DISTINCT", "FROM"): 3215 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3216 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3217 3218 expression = self._parse_null() or self._parse_boolean() 3219 if not expression: 3220 self._retreat(index) 3221 return None 3222 3223 this = self.expression(exp.Is, this=this, expression=expression) 3224 return self.expression(exp.Not, this=this) if negate else this 3225 3226 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3227 unnest = self._parse_unnest(with_alias=False) 3228 if unnest: 3229 this = self.expression(exp.In, this=this, unnest=unnest) 3230 elif self._match(TokenType.L_PAREN): 3231 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3232 3233 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3234 this = self.expression(exp.In, this=this, query=expressions[0]) 3235 else: 3236 this = self.expression(exp.In, this=this, expressions=expressions) 3237 3238 self._match_r_paren(this) 3239 else: 3240 this = self.expression(exp.In, this=this, field=self._parse_field()) 3241 3242 return this 3243 3244 def _parse_between(self, this: exp.Expression) -> exp.Between: 3245 low = self._parse_bitwise() 3246 self._match(TokenType.AND) 3247 high = self._parse_bitwise() 3248 return self.expression(exp.Between, this=this, low=low, high=high) 3249 3250 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3251 if not self._match(TokenType.ESCAPE): 3252 return this 3253 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3254 3255 def _parse_interval(self) -> t.Optional[exp.Interval]: 3256 index = self._index 3257 3258 if not self._match(TokenType.INTERVAL): 3259 return None 3260 3261 if self._match(TokenType.STRING, advance=False): 3262 this = self._parse_primary() 3263 else: 3264 this = self._parse_term() 3265 3266 if not this: 3267 self._retreat(index) 3268 return None 3269 3270 unit = self._parse_function() or self._parse_var(any_token=True) 3271 3272 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3273 # each INTERVAL expression into this canonical form so it's easy to transpile 3274 if this and this.is_number: 3275 this = exp.Literal.string(this.name) 3276 elif this and this.is_string: 3277 parts = this.name.split() 3278 3279 if len(parts) == 2: 3280 if unit: 3281 # This is not actually a unit, it's something else (e.g. a "window side") 3282 unit = None 3283 self._retreat(self._index - 1) 3284 3285 this = exp.Literal.string(parts[0]) 3286 unit = self.expression(exp.Var, this=parts[1]) 3287 3288 return self.expression(exp.Interval, this=this, unit=unit) 3289 3290 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3291 this = self._parse_term() 3292 3293 while True: 3294 if self._match_set(self.BITWISE): 3295 this = self.expression( 3296 self.BITWISE[self._prev.token_type], 3297 this=this, 3298 expression=self._parse_term(), 3299 ) 3300 elif self._match(TokenType.DQMARK): 3301 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3302 elif self._match_pair(TokenType.LT, TokenType.LT): 3303 this = self.expression( 3304 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3305 ) 3306 elif self._match_pair(TokenType.GT, TokenType.GT): 3307 this = self.expression( 3308 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3309 ) 3310 else: 3311 break 3312 3313 return this 3314 3315 def _parse_term(self) -> t.Optional[exp.Expression]: 3316 return self._parse_tokens(self._parse_factor, self.TERM) 3317 3318 def _parse_factor(self) -> t.Optional[exp.Expression]: 3319 return self._parse_tokens(self._parse_unary, self.FACTOR) 3320 3321 def _parse_unary(self) -> t.Optional[exp.Expression]: 3322 if self._match_set(self.UNARY_PARSERS): 3323 return self.UNARY_PARSERS[self._prev.token_type](self) 3324 return self._parse_at_time_zone(self._parse_type()) 3325 3326 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3327 interval = parse_interval and self._parse_interval() 3328 if interval: 3329 return interval 3330 3331 index = self._index 3332 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3333 this = self._parse_column() 3334 3335 if data_type: 3336 if isinstance(this, exp.Literal): 3337 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3338 if parser: 3339 return parser(self, this, data_type) 3340 return self.expression(exp.Cast, this=this, to=data_type) 3341 if not data_type.expressions: 3342 self._retreat(index) 3343 return self._parse_column() 3344 return self._parse_column_ops(data_type) 3345 3346 return this and self._parse_column_ops(this) 3347 3348 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3349 this = self._parse_type() 3350 if not this: 3351 return None 3352 3353 return self.expression( 3354 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3355 ) 3356 3357 def _parse_types( 3358 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3359 ) -> t.Optional[exp.Expression]: 3360 index = self._index 3361 3362 prefix = self._match_text_seq("SYSUDTLIB", ".") 3363 3364 if not self._match_set(self.TYPE_TOKENS): 3365 identifier = allow_identifiers and self._parse_id_var( 3366 any_token=False, tokens=(TokenType.VAR,) 3367 ) 3368 3369 if identifier: 3370 tokens = self._tokenizer.tokenize(identifier.name) 3371 3372 if len(tokens) != 1: 3373 self.raise_error("Unexpected identifier", self._prev) 3374 3375 if tokens[0].token_type in self.TYPE_TOKENS: 3376 self._prev = tokens[0] 3377 elif self.SUPPORTS_USER_DEFINED_TYPES: 3378 type_name = identifier.name 3379 3380 while self._match(TokenType.DOT): 3381 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3382 3383 return exp.DataType.build(type_name, udt=True) 3384 else: 3385 return None 3386 else: 3387 return None 3388 3389 type_token = self._prev.token_type 3390 3391 if type_token == TokenType.PSEUDO_TYPE: 3392 return self.expression(exp.PseudoType, this=self._prev.text) 3393 3394 if type_token == TokenType.OBJECT_IDENTIFIER: 3395 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3396 3397 nested = type_token in self.NESTED_TYPE_TOKENS 3398 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3399 expressions = None 3400 maybe_func = False 3401 3402 if self._match(TokenType.L_PAREN): 3403 if is_struct: 3404 expressions = self._parse_csv(self._parse_struct_types) 3405 elif nested: 3406 expressions = self._parse_csv( 3407 lambda: self._parse_types( 3408 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3409 ) 3410 ) 3411 elif type_token in self.ENUM_TYPE_TOKENS: 3412 expressions = self._parse_csv(self._parse_equality) 3413 else: 3414 expressions = self._parse_csv(self._parse_type_size) 3415 3416 if not expressions or not self._match(TokenType.R_PAREN): 3417 self._retreat(index) 3418 return None 3419 3420 maybe_func = True 3421 3422 this: t.Optional[exp.Expression] = None 3423 values: t.Optional[t.List[exp.Expression]] = None 3424 3425 if nested and self._match(TokenType.LT): 3426 if is_struct: 3427 expressions = self._parse_csv(self._parse_struct_types) 3428 else: 3429 expressions = self._parse_csv( 3430 lambda: self._parse_types( 3431 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3432 ) 3433 ) 3434 3435 if not self._match(TokenType.GT): 3436 self.raise_error("Expecting >") 3437 3438 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3439 values = self._parse_csv(self._parse_conjunction) 3440 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3441 3442 if type_token in self.TIMESTAMPS: 3443 if self._match_text_seq("WITH", "TIME", "ZONE"): 3444 maybe_func = False 3445 tz_type = ( 3446 exp.DataType.Type.TIMETZ 3447 if type_token in self.TIMES 3448 else exp.DataType.Type.TIMESTAMPTZ 3449 ) 3450 this = exp.DataType(this=tz_type, expressions=expressions) 3451 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3452 maybe_func = False 3453 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3454 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3455 maybe_func = False 3456 elif type_token == TokenType.INTERVAL: 3457 unit = self._parse_var() 3458 3459 if self._match_text_seq("TO"): 3460 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3461 else: 3462 span = None 3463 3464 if span or not unit: 3465 this = self.expression( 3466 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3467 ) 3468 else: 3469 this = self.expression(exp.Interval, unit=unit) 3470 3471 if maybe_func and check_func: 3472 index2 = self._index 3473 peek = self._parse_string() 3474 3475 if not peek: 3476 self._retreat(index) 3477 return None 3478 3479 self._retreat(index2) 3480 3481 if not this: 3482 if self._match_text_seq("UNSIGNED"): 3483 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3484 if not unsigned_type_token: 3485 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3486 3487 type_token = unsigned_type_token or type_token 3488 3489 this = exp.DataType( 3490 this=exp.DataType.Type[type_token.value], 3491 expressions=expressions, 3492 nested=nested, 3493 values=values, 3494 prefix=prefix, 3495 ) 3496 3497 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3498 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3499 3500 return this 3501 3502 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3503 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3504 self._match(TokenType.COLON) 3505 return self._parse_column_def(this) 3506 3507 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3508 if not self._match_text_seq("AT", "TIME", "ZONE"): 3509 return this 3510 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3511 3512 def _parse_column(self) -> t.Optional[exp.Expression]: 3513 this = self._parse_field() 3514 if isinstance(this, exp.Identifier): 3515 this = self.expression(exp.Column, this=this) 3516 elif not this: 3517 return self._parse_bracket(this) 3518 return self._parse_column_ops(this) 3519 3520 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3521 this = self._parse_bracket(this) 3522 3523 while self._match_set(self.COLUMN_OPERATORS): 3524 op_token = self._prev.token_type 3525 op = self.COLUMN_OPERATORS.get(op_token) 3526 3527 if op_token == TokenType.DCOLON: 3528 field = self._parse_types() 3529 if not field: 3530 self.raise_error("Expected type") 3531 elif op and self._curr: 3532 self._advance() 3533 value = self._prev.text 3534 field = ( 3535 exp.Literal.number(value) 3536 if self._prev.token_type == TokenType.NUMBER 3537 else exp.Literal.string(value) 3538 ) 3539 else: 3540 field = self._parse_field(anonymous_func=True, any_token=True) 3541 3542 if isinstance(field, exp.Func): 3543 # bigquery allows function calls like x.y.count(...) 3544 # SAFE.SUBSTR(...) 3545 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3546 this = self._replace_columns_with_dots(this) 3547 3548 if op: 3549 this = op(self, this, field) 3550 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3551 this = self.expression( 3552 exp.Column, 3553 this=field, 3554 table=this.this, 3555 db=this.args.get("table"), 3556 catalog=this.args.get("db"), 3557 ) 3558 else: 3559 this = self.expression(exp.Dot, this=this, expression=field) 3560 this = self._parse_bracket(this) 3561 return this 3562 3563 def _parse_primary(self) -> t.Optional[exp.Expression]: 3564 if self._match_set(self.PRIMARY_PARSERS): 3565 token_type = self._prev.token_type 3566 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3567 3568 if token_type == TokenType.STRING: 3569 expressions = [primary] 3570 while self._match(TokenType.STRING): 3571 expressions.append(exp.Literal.string(self._prev.text)) 3572 3573 if len(expressions) > 1: 3574 return self.expression(exp.Concat, expressions=expressions) 3575 3576 return primary 3577 3578 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3579 return exp.Literal.number(f"0.{self._prev.text}") 3580 3581 if self._match(TokenType.L_PAREN): 3582 comments = self._prev_comments 3583 query = self._parse_select() 3584 3585 if query: 3586 expressions = [query] 3587 else: 3588 expressions = self._parse_expressions() 3589 3590 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3591 3592 if isinstance(this, exp.Subqueryable): 3593 this = self._parse_set_operations( 3594 self._parse_subquery(this=this, parse_alias=False) 3595 ) 3596 elif len(expressions) > 1: 3597 this = self.expression(exp.Tuple, expressions=expressions) 3598 else: 3599 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3600 3601 if this: 3602 this.add_comments(comments) 3603 3604 self._match_r_paren(expression=this) 3605 return this 3606 3607 return None 3608 3609 def _parse_field( 3610 self, 3611 any_token: bool = False, 3612 tokens: t.Optional[t.Collection[TokenType]] = None, 3613 anonymous_func: bool = False, 3614 ) -> t.Optional[exp.Expression]: 3615 return ( 3616 self._parse_primary() 3617 or self._parse_function(anonymous=anonymous_func) 3618 or self._parse_id_var(any_token=any_token, tokens=tokens) 3619 ) 3620 3621 def _parse_function( 3622 self, 3623 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3624 anonymous: bool = False, 3625 optional_parens: bool = True, 3626 ) -> t.Optional[exp.Expression]: 3627 if not self._curr: 3628 return None 3629 3630 token_type = self._curr.token_type 3631 this = self._curr.text 3632 upper = this.upper() 3633 3634 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3635 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3636 self._advance() 3637 return parser(self) 3638 3639 if not self._next or self._next.token_type != TokenType.L_PAREN: 3640 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3641 self._advance() 3642 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3643 3644 return None 3645 3646 if token_type not in self.FUNC_TOKENS: 3647 return None 3648 3649 self._advance(2) 3650 3651 parser = self.FUNCTION_PARSERS.get(upper) 3652 if parser and not anonymous: 3653 this = parser(self) 3654 else: 3655 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3656 3657 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3658 this = self.expression(subquery_predicate, this=self._parse_select()) 3659 self._match_r_paren() 3660 return this 3661 3662 if functions is None: 3663 functions = self.FUNCTIONS 3664 3665 function = functions.get(upper) 3666 3667 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3668 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3669 3670 if function and not anonymous: 3671 func = self.validate_expression(function(args), args) 3672 if not self.NORMALIZE_FUNCTIONS: 3673 func.meta["name"] = this 3674 this = func 3675 else: 3676 this = self.expression(exp.Anonymous, this=this, expressions=args) 3677 3678 self._match_r_paren(this) 3679 return self._parse_window(this) 3680 3681 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3682 return self._parse_column_def(self._parse_id_var()) 3683 3684 def _parse_user_defined_function( 3685 self, kind: t.Optional[TokenType] = None 3686 ) -> t.Optional[exp.Expression]: 3687 this = self._parse_id_var() 3688 3689 while self._match(TokenType.DOT): 3690 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3691 3692 if not self._match(TokenType.L_PAREN): 3693 return this 3694 3695 expressions = self._parse_csv(self._parse_function_parameter) 3696 self._match_r_paren() 3697 return self.expression( 3698 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3699 ) 3700 3701 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3702 literal = self._parse_primary() 3703 if literal: 3704 return self.expression(exp.Introducer, this=token.text, expression=literal) 3705 3706 return self.expression(exp.Identifier, this=token.text) 3707 3708 def _parse_session_parameter(self) -> exp.SessionParameter: 3709 kind = None 3710 this = self._parse_id_var() or self._parse_primary() 3711 3712 if this and self._match(TokenType.DOT): 3713 kind = this.name 3714 this = self._parse_var() or self._parse_primary() 3715 3716 return self.expression(exp.SessionParameter, this=this, kind=kind) 3717 3718 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3719 index = self._index 3720 3721 if self._match(TokenType.L_PAREN): 3722 expressions = t.cast( 3723 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3724 ) 3725 3726 if not self._match(TokenType.R_PAREN): 3727 self._retreat(index) 3728 else: 3729 expressions = [self._parse_id_var()] 3730 3731 if self._match_set(self.LAMBDAS): 3732 return self.LAMBDAS[self._prev.token_type](self, expressions) 3733 3734 self._retreat(index) 3735 3736 this: t.Optional[exp.Expression] 3737 3738 if self._match(TokenType.DISTINCT): 3739 this = self.expression( 3740 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3741 ) 3742 else: 3743 this = self._parse_select_or_expression(alias=alias) 3744 3745 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3746 3747 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3748 index = self._index 3749 3750 if not self.errors: 3751 try: 3752 if self._parse_select(nested=True): 3753 return this 3754 except ParseError: 3755 pass 3756 finally: 3757 self.errors.clear() 3758 self._retreat(index) 3759 3760 if not self._match(TokenType.L_PAREN): 3761 return this 3762 3763 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3764 3765 self._match_r_paren() 3766 return self.expression(exp.Schema, this=this, expressions=args) 3767 3768 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3769 return self._parse_column_def(self._parse_field(any_token=True)) 3770 3771 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3772 # column defs are not really columns, they're identifiers 3773 if isinstance(this, exp.Column): 3774 this = this.this 3775 3776 kind = self._parse_types(schema=True) 3777 3778 if self._match_text_seq("FOR", "ORDINALITY"): 3779 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3780 3781 constraints: t.List[exp.Expression] = [] 3782 3783 if not kind and self._match(TokenType.ALIAS): 3784 constraints.append( 3785 self.expression( 3786 exp.ComputedColumnConstraint, 3787 this=self._parse_conjunction(), 3788 persisted=self._match_text_seq("PERSISTED"), 3789 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3790 ) 3791 ) 3792 3793 while True: 3794 constraint = self._parse_column_constraint() 3795 if not constraint: 3796 break 3797 constraints.append(constraint) 3798 3799 if not kind and not constraints: 3800 return this 3801 3802 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3803 3804 def _parse_auto_increment( 3805 self, 3806 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3807 start = None 3808 increment = None 3809 3810 if self._match(TokenType.L_PAREN, advance=False): 3811 args = self._parse_wrapped_csv(self._parse_bitwise) 3812 start = seq_get(args, 0) 3813 increment = seq_get(args, 1) 3814 elif self._match_text_seq("START"): 3815 start = self._parse_bitwise() 3816 self._match_text_seq("INCREMENT") 3817 increment = self._parse_bitwise() 3818 3819 if start and increment: 3820 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3821 3822 return exp.AutoIncrementColumnConstraint() 3823 3824 def _parse_compress(self) -> exp.CompressColumnConstraint: 3825 if self._match(TokenType.L_PAREN, advance=False): 3826 return self.expression( 3827 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3828 ) 3829 3830 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3831 3832 def _parse_generated_as_identity( 3833 self, 3834 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.ComputedColumnConstraint: 3835 if self._match_text_seq("BY", "DEFAULT"): 3836 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3837 this = self.expression( 3838 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3839 ) 3840 else: 3841 self._match_text_seq("ALWAYS") 3842 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3843 3844 self._match(TokenType.ALIAS) 3845 identity = self._match_text_seq("IDENTITY") 3846 3847 if self._match(TokenType.L_PAREN): 3848 if self._match(TokenType.START_WITH): 3849 this.set("start", self._parse_bitwise()) 3850 if self._match_text_seq("INCREMENT", "BY"): 3851 this.set("increment", self._parse_bitwise()) 3852 if self._match_text_seq("MINVALUE"): 3853 this.set("minvalue", self._parse_bitwise()) 3854 if self._match_text_seq("MAXVALUE"): 3855 this.set("maxvalue", self._parse_bitwise()) 3856 3857 if self._match_text_seq("CYCLE"): 3858 this.set("cycle", True) 3859 elif self._match_text_seq("NO", "CYCLE"): 3860 this.set("cycle", False) 3861 3862 if not identity: 3863 this.set("expression", self._parse_bitwise()) 3864 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 3865 args = self._parse_csv(self._parse_bitwise) 3866 this.set("start", seq_get(args, 0)) 3867 this.set("increment", seq_get(args, 1)) 3868 3869 self._match_r_paren() 3870 3871 return this 3872 3873 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3874 self._match_text_seq("LENGTH") 3875 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3876 3877 def _parse_not_constraint( 3878 self, 3879 ) -> t.Optional[exp.Expression]: 3880 if self._match_text_seq("NULL"): 3881 return self.expression(exp.NotNullColumnConstraint) 3882 if self._match_text_seq("CASESPECIFIC"): 3883 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3884 if self._match_text_seq("FOR", "REPLICATION"): 3885 return self.expression(exp.NotForReplicationColumnConstraint) 3886 return None 3887 3888 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3889 if self._match(TokenType.CONSTRAINT): 3890 this = self._parse_id_var() 3891 else: 3892 this = None 3893 3894 if self._match_texts(self.CONSTRAINT_PARSERS): 3895 return self.expression( 3896 exp.ColumnConstraint, 3897 this=this, 3898 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3899 ) 3900 3901 return this 3902 3903 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3904 if not self._match(TokenType.CONSTRAINT): 3905 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3906 3907 this = self._parse_id_var() 3908 expressions = [] 3909 3910 while True: 3911 constraint = self._parse_unnamed_constraint() or self._parse_function() 3912 if not constraint: 3913 break 3914 expressions.append(constraint) 3915 3916 return self.expression(exp.Constraint, this=this, expressions=expressions) 3917 3918 def _parse_unnamed_constraint( 3919 self, constraints: t.Optional[t.Collection[str]] = None 3920 ) -> t.Optional[exp.Expression]: 3921 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 3922 constraints or self.CONSTRAINT_PARSERS 3923 ): 3924 return None 3925 3926 constraint = self._prev.text.upper() 3927 if constraint not in self.CONSTRAINT_PARSERS: 3928 self.raise_error(f"No parser found for schema constraint {constraint}.") 3929 3930 return self.CONSTRAINT_PARSERS[constraint](self) 3931 3932 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3933 self._match_text_seq("KEY") 3934 return self.expression( 3935 exp.UniqueColumnConstraint, 3936 this=self._parse_schema(self._parse_id_var(any_token=False)), 3937 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 3938 ) 3939 3940 def _parse_key_constraint_options(self) -> t.List[str]: 3941 options = [] 3942 while True: 3943 if not self._curr: 3944 break 3945 3946 if self._match(TokenType.ON): 3947 action = None 3948 on = self._advance_any() and self._prev.text 3949 3950 if self._match_text_seq("NO", "ACTION"): 3951 action = "NO ACTION" 3952 elif self._match_text_seq("CASCADE"): 3953 action = "CASCADE" 3954 elif self._match_text_seq("RESTRICT"): 3955 action = "RESTRICT" 3956 elif self._match_pair(TokenType.SET, TokenType.NULL): 3957 action = "SET NULL" 3958 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3959 action = "SET DEFAULT" 3960 else: 3961 self.raise_error("Invalid key constraint") 3962 3963 options.append(f"ON {on} {action}") 3964 elif self._match_text_seq("NOT", "ENFORCED"): 3965 options.append("NOT ENFORCED") 3966 elif self._match_text_seq("DEFERRABLE"): 3967 options.append("DEFERRABLE") 3968 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3969 options.append("INITIALLY DEFERRED") 3970 elif self._match_text_seq("NORELY"): 3971 options.append("NORELY") 3972 elif self._match_text_seq("MATCH", "FULL"): 3973 options.append("MATCH FULL") 3974 else: 3975 break 3976 3977 return options 3978 3979 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3980 if match and not self._match(TokenType.REFERENCES): 3981 return None 3982 3983 expressions = None 3984 this = self._parse_table(schema=True) 3985 options = self._parse_key_constraint_options() 3986 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3987 3988 def _parse_foreign_key(self) -> exp.ForeignKey: 3989 expressions = self._parse_wrapped_id_vars() 3990 reference = self._parse_references() 3991 options = {} 3992 3993 while self._match(TokenType.ON): 3994 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3995 self.raise_error("Expected DELETE or UPDATE") 3996 3997 kind = self._prev.text.lower() 3998 3999 if self._match_text_seq("NO", "ACTION"): 4000 action = "NO ACTION" 4001 elif self._match(TokenType.SET): 4002 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4003 action = "SET " + self._prev.text.upper() 4004 else: 4005 self._advance() 4006 action = self._prev.text.upper() 4007 4008 options[kind] = action 4009 4010 return self.expression( 4011 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4012 ) 4013 4014 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4015 return self._parse_field() 4016 4017 def _parse_primary_key( 4018 self, wrapped_optional: bool = False, in_props: bool = False 4019 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4020 desc = ( 4021 self._match_set((TokenType.ASC, TokenType.DESC)) 4022 and self._prev.token_type == TokenType.DESC 4023 ) 4024 4025 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4026 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4027 4028 expressions = self._parse_wrapped_csv( 4029 self._parse_primary_key_part, optional=wrapped_optional 4030 ) 4031 options = self._parse_key_constraint_options() 4032 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4033 4034 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4035 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4036 return this 4037 4038 bracket_kind = self._prev.token_type 4039 4040 if self._match(TokenType.COLON): 4041 expressions: t.List[exp.Expression] = [ 4042 self.expression(exp.Slice, expression=self._parse_conjunction()) 4043 ] 4044 else: 4045 expressions = self._parse_csv( 4046 lambda: self._parse_slice( 4047 self._parse_alias(self._parse_conjunction(), explicit=True) 4048 ) 4049 ) 4050 4051 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4052 if bracket_kind == TokenType.L_BRACE: 4053 this = self.expression(exp.Struct, expressions=expressions) 4054 elif not this or this.name.upper() == "ARRAY": 4055 this = self.expression(exp.Array, expressions=expressions) 4056 else: 4057 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4058 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4059 4060 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4061 self.raise_error("Expected ]") 4062 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4063 self.raise_error("Expected }") 4064 4065 self._add_comments(this) 4066 return self._parse_bracket(this) 4067 4068 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4069 if self._match(TokenType.COLON): 4070 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4071 return this 4072 4073 def _parse_case(self) -> t.Optional[exp.Expression]: 4074 ifs = [] 4075 default = None 4076 4077 comments = self._prev_comments 4078 expression = self._parse_conjunction() 4079 4080 while self._match(TokenType.WHEN): 4081 this = self._parse_conjunction() 4082 self._match(TokenType.THEN) 4083 then = self._parse_conjunction() 4084 ifs.append(self.expression(exp.If, this=this, true=then)) 4085 4086 if self._match(TokenType.ELSE): 4087 default = self._parse_conjunction() 4088 4089 if not self._match(TokenType.END): 4090 self.raise_error("Expected END after CASE", self._prev) 4091 4092 return self._parse_window( 4093 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4094 ) 4095 4096 def _parse_if(self) -> t.Optional[exp.Expression]: 4097 if self._match(TokenType.L_PAREN): 4098 args = self._parse_csv(self._parse_conjunction) 4099 this = self.validate_expression(exp.If.from_arg_list(args), args) 4100 self._match_r_paren() 4101 else: 4102 index = self._index - 1 4103 condition = self._parse_conjunction() 4104 4105 if not condition: 4106 self._retreat(index) 4107 return None 4108 4109 self._match(TokenType.THEN) 4110 true = self._parse_conjunction() 4111 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4112 self._match(TokenType.END) 4113 this = self.expression(exp.If, this=condition, true=true, false=false) 4114 4115 return self._parse_window(this) 4116 4117 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4118 if not self._match_text_seq("VALUE", "FOR"): 4119 self._retreat(self._index - 1) 4120 return None 4121 4122 return self.expression( 4123 exp.NextValueFor, 4124 this=self._parse_column(), 4125 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4126 ) 4127 4128 def _parse_extract(self) -> exp.Extract: 4129 this = self._parse_function() or self._parse_var() or self._parse_type() 4130 4131 if self._match(TokenType.FROM): 4132 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4133 4134 if not self._match(TokenType.COMMA): 4135 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4136 4137 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4138 4139 def _parse_any_value(self) -> exp.AnyValue: 4140 this = self._parse_lambda() 4141 is_max = None 4142 having = None 4143 4144 if self._match(TokenType.HAVING): 4145 self._match_texts(("MAX", "MIN")) 4146 is_max = self._prev.text == "MAX" 4147 having = self._parse_column() 4148 4149 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4150 4151 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4152 this = self._parse_conjunction() 4153 4154 if not self._match(TokenType.ALIAS): 4155 if self._match(TokenType.COMMA): 4156 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4157 4158 self.raise_error("Expected AS after CAST") 4159 4160 fmt = None 4161 to = self._parse_types() 4162 4163 if not to: 4164 self.raise_error("Expected TYPE after CAST") 4165 elif isinstance(to, exp.Identifier): 4166 to = exp.DataType.build(to.name, udt=True) 4167 elif to.this == exp.DataType.Type.CHAR: 4168 if self._match(TokenType.CHARACTER_SET): 4169 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4170 elif self._match(TokenType.FORMAT): 4171 fmt_string = self._parse_string() 4172 fmt = self._parse_at_time_zone(fmt_string) 4173 4174 if to.this in exp.DataType.TEMPORAL_TYPES: 4175 this = self.expression( 4176 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4177 this=this, 4178 format=exp.Literal.string( 4179 format_time( 4180 fmt_string.this if fmt_string else "", 4181 self.FORMAT_MAPPING or self.TIME_MAPPING, 4182 self.FORMAT_TRIE or self.TIME_TRIE, 4183 ) 4184 ), 4185 ) 4186 4187 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4188 this.set("zone", fmt.args["zone"]) 4189 4190 return this 4191 4192 return self.expression( 4193 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4194 ) 4195 4196 def _parse_concat(self) -> t.Optional[exp.Expression]: 4197 args = self._parse_csv(self._parse_conjunction) 4198 if self.CONCAT_NULL_OUTPUTS_STRING: 4199 args = self._ensure_string_if_null(args) 4200 4201 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4202 # we find such a call we replace it with its argument. 4203 if len(args) == 1: 4204 return args[0] 4205 4206 return self.expression( 4207 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4208 ) 4209 4210 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4211 args = self._parse_csv(self._parse_conjunction) 4212 if len(args) < 2: 4213 return self.expression(exp.ConcatWs, expressions=args) 4214 delim, *values = args 4215 if self.CONCAT_NULL_OUTPUTS_STRING: 4216 values = self._ensure_string_if_null(values) 4217 4218 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4219 4220 def _parse_string_agg(self) -> exp.Expression: 4221 if self._match(TokenType.DISTINCT): 4222 args: t.List[t.Optional[exp.Expression]] = [ 4223 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4224 ] 4225 if self._match(TokenType.COMMA): 4226 args.extend(self._parse_csv(self._parse_conjunction)) 4227 else: 4228 args = self._parse_csv(self._parse_conjunction) # type: ignore 4229 4230 index = self._index 4231 if not self._match(TokenType.R_PAREN) and args: 4232 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4233 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4234 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4235 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4236 4237 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4238 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4239 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4240 if not self._match_text_seq("WITHIN", "GROUP"): 4241 self._retreat(index) 4242 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4243 4244 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4245 order = self._parse_order(this=seq_get(args, 0)) 4246 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4247 4248 def _parse_convert( 4249 self, strict: bool, safe: t.Optional[bool] = None 4250 ) -> t.Optional[exp.Expression]: 4251 this = self._parse_bitwise() 4252 4253 if self._match(TokenType.USING): 4254 to: t.Optional[exp.Expression] = self.expression( 4255 exp.CharacterSet, this=self._parse_var() 4256 ) 4257 elif self._match(TokenType.COMMA): 4258 to = self._parse_types() 4259 else: 4260 to = None 4261 4262 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4263 4264 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4265 """ 4266 There are generally two variants of the DECODE function: 4267 4268 - DECODE(bin, charset) 4269 - DECODE(expression, search, result [, search, result] ... [, default]) 4270 4271 The second variant will always be parsed into a CASE expression. Note that NULL 4272 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4273 instead of relying on pattern matching. 4274 """ 4275 args = self._parse_csv(self._parse_conjunction) 4276 4277 if len(args) < 3: 4278 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4279 4280 expression, *expressions = args 4281 if not expression: 4282 return None 4283 4284 ifs = [] 4285 for search, result in zip(expressions[::2], expressions[1::2]): 4286 if not search or not result: 4287 return None 4288 4289 if isinstance(search, exp.Literal): 4290 ifs.append( 4291 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4292 ) 4293 elif isinstance(search, exp.Null): 4294 ifs.append( 4295 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4296 ) 4297 else: 4298 cond = exp.or_( 4299 exp.EQ(this=expression.copy(), expression=search), 4300 exp.and_( 4301 exp.Is(this=expression.copy(), expression=exp.Null()), 4302 exp.Is(this=search.copy(), expression=exp.Null()), 4303 copy=False, 4304 ), 4305 copy=False, 4306 ) 4307 ifs.append(exp.If(this=cond, true=result)) 4308 4309 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4310 4311 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4312 self._match_text_seq("KEY") 4313 key = self._parse_column() 4314 self._match_set((TokenType.COLON, TokenType.COMMA)) 4315 self._match_text_seq("VALUE") 4316 value = self._parse_bitwise() 4317 4318 if not key and not value: 4319 return None 4320 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4321 4322 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4323 if not this or not self._match_text_seq("FORMAT", "JSON"): 4324 return this 4325 4326 return self.expression(exp.FormatJson, this=this) 4327 4328 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4329 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4330 for value in values: 4331 if self._match_text_seq(value, "ON", on): 4332 return f"{value} ON {on}" 4333 4334 return None 4335 4336 def _parse_json_object(self) -> exp.JSONObject: 4337 star = self._parse_star() 4338 expressions = ( 4339 [star] 4340 if star 4341 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4342 ) 4343 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4344 4345 unique_keys = None 4346 if self._match_text_seq("WITH", "UNIQUE"): 4347 unique_keys = True 4348 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4349 unique_keys = False 4350 4351 self._match_text_seq("KEYS") 4352 4353 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4354 self._parse_type() 4355 ) 4356 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4357 4358 return self.expression( 4359 exp.JSONObject, 4360 expressions=expressions, 4361 null_handling=null_handling, 4362 unique_keys=unique_keys, 4363 return_type=return_type, 4364 encoding=encoding, 4365 ) 4366 4367 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4368 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4369 if not self._match_text_seq("NESTED"): 4370 this = self._parse_id_var() 4371 kind = self._parse_types(allow_identifiers=False) 4372 nested = None 4373 else: 4374 this = None 4375 kind = None 4376 nested = True 4377 4378 path = self._match_text_seq("PATH") and self._parse_string() 4379 nested_schema = nested and self._parse_json_schema() 4380 4381 return self.expression( 4382 exp.JSONColumnDef, 4383 this=this, 4384 kind=kind, 4385 path=path, 4386 nested_schema=nested_schema, 4387 ) 4388 4389 def _parse_json_schema(self) -> exp.JSONSchema: 4390 self._match_text_seq("COLUMNS") 4391 return self.expression( 4392 exp.JSONSchema, 4393 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4394 ) 4395 4396 def _parse_json_table(self) -> exp.JSONTable: 4397 this = self._parse_format_json(self._parse_bitwise()) 4398 path = self._match(TokenType.COMMA) and self._parse_string() 4399 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4400 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4401 schema = self._parse_json_schema() 4402 4403 return exp.JSONTable( 4404 this=this, 4405 schema=schema, 4406 path=path, 4407 error_handling=error_handling, 4408 empty_handling=empty_handling, 4409 ) 4410 4411 def _parse_logarithm(self) -> exp.Func: 4412 # Default argument order is base, expression 4413 args = self._parse_csv(self._parse_range) 4414 4415 if len(args) > 1: 4416 if not self.LOG_BASE_FIRST: 4417 args.reverse() 4418 return exp.Log.from_arg_list(args) 4419 4420 return self.expression( 4421 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4422 ) 4423 4424 def _parse_match_against(self) -> exp.MatchAgainst: 4425 expressions = self._parse_csv(self._parse_column) 4426 4427 self._match_text_seq(")", "AGAINST", "(") 4428 4429 this = self._parse_string() 4430 4431 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4432 modifier = "IN NATURAL LANGUAGE MODE" 4433 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4434 modifier = f"{modifier} WITH QUERY EXPANSION" 4435 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4436 modifier = "IN BOOLEAN MODE" 4437 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4438 modifier = "WITH QUERY EXPANSION" 4439 else: 4440 modifier = None 4441 4442 return self.expression( 4443 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4444 ) 4445 4446 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4447 def _parse_open_json(self) -> exp.OpenJSON: 4448 this = self._parse_bitwise() 4449 path = self._match(TokenType.COMMA) and self._parse_string() 4450 4451 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4452 this = self._parse_field(any_token=True) 4453 kind = self._parse_types() 4454 path = self._parse_string() 4455 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4456 4457 return self.expression( 4458 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4459 ) 4460 4461 expressions = None 4462 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4463 self._match_l_paren() 4464 expressions = self._parse_csv(_parse_open_json_column_def) 4465 4466 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4467 4468 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4469 args = self._parse_csv(self._parse_bitwise) 4470 4471 if self._match(TokenType.IN): 4472 return self.expression( 4473 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4474 ) 4475 4476 if haystack_first: 4477 haystack = seq_get(args, 0) 4478 needle = seq_get(args, 1) 4479 else: 4480 needle = seq_get(args, 0) 4481 haystack = seq_get(args, 1) 4482 4483 return self.expression( 4484 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4485 ) 4486 4487 def _parse_predict(self) -> exp.Predict: 4488 self._match_text_seq("MODEL") 4489 this = self._parse_table() 4490 4491 self._match(TokenType.COMMA) 4492 self._match_text_seq("TABLE") 4493 4494 return self.expression( 4495 exp.Predict, 4496 this=this, 4497 expression=self._parse_table(), 4498 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4499 ) 4500 4501 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4502 args = self._parse_csv(self._parse_table) 4503 return exp.JoinHint(this=func_name.upper(), expressions=args) 4504 4505 def _parse_substring(self) -> exp.Substring: 4506 # Postgres supports the form: substring(string [from int] [for int]) 4507 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4508 4509 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4510 4511 if self._match(TokenType.FROM): 4512 args.append(self._parse_bitwise()) 4513 if self._match(TokenType.FOR): 4514 args.append(self._parse_bitwise()) 4515 4516 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4517 4518 def _parse_trim(self) -> exp.Trim: 4519 # https://www.w3resource.com/sql/character-functions/trim.php 4520 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4521 4522 position = None 4523 collation = None 4524 expression = None 4525 4526 if self._match_texts(self.TRIM_TYPES): 4527 position = self._prev.text.upper() 4528 4529 this = self._parse_bitwise() 4530 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4531 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4532 expression = self._parse_bitwise() 4533 4534 if invert_order: 4535 this, expression = expression, this 4536 4537 if self._match(TokenType.COLLATE): 4538 collation = self._parse_bitwise() 4539 4540 return self.expression( 4541 exp.Trim, this=this, position=position, expression=expression, collation=collation 4542 ) 4543 4544 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4545 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4546 4547 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4548 return self._parse_window(self._parse_id_var(), alias=True) 4549 4550 def _parse_respect_or_ignore_nulls( 4551 self, this: t.Optional[exp.Expression] 4552 ) -> t.Optional[exp.Expression]: 4553 if self._match_text_seq("IGNORE", "NULLS"): 4554 return self.expression(exp.IgnoreNulls, this=this) 4555 if self._match_text_seq("RESPECT", "NULLS"): 4556 return self.expression(exp.RespectNulls, this=this) 4557 return this 4558 4559 def _parse_window( 4560 self, this: t.Optional[exp.Expression], alias: bool = False 4561 ) -> t.Optional[exp.Expression]: 4562 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4563 self._match(TokenType.WHERE) 4564 this = self.expression( 4565 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4566 ) 4567 self._match_r_paren() 4568 4569 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4570 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4571 if self._match_text_seq("WITHIN", "GROUP"): 4572 order = self._parse_wrapped(self._parse_order) 4573 this = self.expression(exp.WithinGroup, this=this, expression=order) 4574 4575 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4576 # Some dialects choose to implement and some do not. 4577 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4578 4579 # There is some code above in _parse_lambda that handles 4580 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4581 4582 # The below changes handle 4583 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4584 4585 # Oracle allows both formats 4586 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4587 # and Snowflake chose to do the same for familiarity 4588 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4589 this = self._parse_respect_or_ignore_nulls(this) 4590 4591 # bigquery select from window x AS (partition by ...) 4592 if alias: 4593 over = None 4594 self._match(TokenType.ALIAS) 4595 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4596 return this 4597 else: 4598 over = self._prev.text.upper() 4599 4600 if not self._match(TokenType.L_PAREN): 4601 return self.expression( 4602 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4603 ) 4604 4605 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4606 4607 first = self._match(TokenType.FIRST) 4608 if self._match_text_seq("LAST"): 4609 first = False 4610 4611 partition, order = self._parse_partition_and_order() 4612 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4613 4614 if kind: 4615 self._match(TokenType.BETWEEN) 4616 start = self._parse_window_spec() 4617 self._match(TokenType.AND) 4618 end = self._parse_window_spec() 4619 4620 spec = self.expression( 4621 exp.WindowSpec, 4622 kind=kind, 4623 start=start["value"], 4624 start_side=start["side"], 4625 end=end["value"], 4626 end_side=end["side"], 4627 ) 4628 else: 4629 spec = None 4630 4631 self._match_r_paren() 4632 4633 window = self.expression( 4634 exp.Window, 4635 this=this, 4636 partition_by=partition, 4637 order=order, 4638 spec=spec, 4639 alias=window_alias, 4640 over=over, 4641 first=first, 4642 ) 4643 4644 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4645 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4646 return self._parse_window(window, alias=alias) 4647 4648 return window 4649 4650 def _parse_partition_and_order( 4651 self, 4652 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4653 return self._parse_partition_by(), self._parse_order() 4654 4655 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4656 self._match(TokenType.BETWEEN) 4657 4658 return { 4659 "value": ( 4660 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4661 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4662 or self._parse_bitwise() 4663 ), 4664 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4665 } 4666 4667 def _parse_alias( 4668 self, this: t.Optional[exp.Expression], explicit: bool = False 4669 ) -> t.Optional[exp.Expression]: 4670 any_token = self._match(TokenType.ALIAS) 4671 4672 if explicit and not any_token: 4673 return this 4674 4675 if self._match(TokenType.L_PAREN): 4676 aliases = self.expression( 4677 exp.Aliases, 4678 this=this, 4679 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4680 ) 4681 self._match_r_paren(aliases) 4682 return aliases 4683 4684 alias = self._parse_id_var(any_token) 4685 4686 if alias: 4687 return self.expression(exp.Alias, this=this, alias=alias) 4688 4689 return this 4690 4691 def _parse_id_var( 4692 self, 4693 any_token: bool = True, 4694 tokens: t.Optional[t.Collection[TokenType]] = None, 4695 ) -> t.Optional[exp.Expression]: 4696 identifier = self._parse_identifier() 4697 4698 if identifier: 4699 return identifier 4700 4701 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4702 quoted = self._prev.token_type == TokenType.STRING 4703 return exp.Identifier(this=self._prev.text, quoted=quoted) 4704 4705 return None 4706 4707 def _parse_string(self) -> t.Optional[exp.Expression]: 4708 if self._match(TokenType.STRING): 4709 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4710 return self._parse_placeholder() 4711 4712 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4713 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4714 4715 def _parse_number(self) -> t.Optional[exp.Expression]: 4716 if self._match(TokenType.NUMBER): 4717 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4718 return self._parse_placeholder() 4719 4720 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4721 if self._match(TokenType.IDENTIFIER): 4722 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4723 return self._parse_placeholder() 4724 4725 def _parse_var( 4726 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4727 ) -> t.Optional[exp.Expression]: 4728 if ( 4729 (any_token and self._advance_any()) 4730 or self._match(TokenType.VAR) 4731 or (self._match_set(tokens) if tokens else False) 4732 ): 4733 return self.expression(exp.Var, this=self._prev.text) 4734 return self._parse_placeholder() 4735 4736 def _advance_any(self) -> t.Optional[Token]: 4737 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4738 self._advance() 4739 return self._prev 4740 return None 4741 4742 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4743 return self._parse_var() or self._parse_string() 4744 4745 def _parse_null(self) -> t.Optional[exp.Expression]: 4746 if self._match_set(self.NULL_TOKENS): 4747 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4748 return self._parse_placeholder() 4749 4750 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4751 if self._match(TokenType.TRUE): 4752 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4753 if self._match(TokenType.FALSE): 4754 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4755 return self._parse_placeholder() 4756 4757 def _parse_star(self) -> t.Optional[exp.Expression]: 4758 if self._match(TokenType.STAR): 4759 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4760 return self._parse_placeholder() 4761 4762 def _parse_parameter(self) -> exp.Parameter: 4763 wrapped = self._match(TokenType.L_BRACE) 4764 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4765 self._match(TokenType.R_BRACE) 4766 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4767 4768 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4769 if self._match_set(self.PLACEHOLDER_PARSERS): 4770 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4771 if placeholder: 4772 return placeholder 4773 self._advance(-1) 4774 return None 4775 4776 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4777 if not self._match(TokenType.EXCEPT): 4778 return None 4779 if self._match(TokenType.L_PAREN, advance=False): 4780 return self._parse_wrapped_csv(self._parse_column) 4781 4782 except_column = self._parse_column() 4783 return [except_column] if except_column else None 4784 4785 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4786 if not self._match(TokenType.REPLACE): 4787 return None 4788 if self._match(TokenType.L_PAREN, advance=False): 4789 return self._parse_wrapped_csv(self._parse_expression) 4790 4791 replace_expression = self._parse_expression() 4792 return [replace_expression] if replace_expression else None 4793 4794 def _parse_csv( 4795 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4796 ) -> t.List[exp.Expression]: 4797 parse_result = parse_method() 4798 items = [parse_result] if parse_result is not None else [] 4799 4800 while self._match(sep): 4801 self._add_comments(parse_result) 4802 parse_result = parse_method() 4803 if parse_result is not None: 4804 items.append(parse_result) 4805 4806 return items 4807 4808 def _parse_tokens( 4809 self, parse_method: t.Callable, expressions: t.Dict 4810 ) -> t.Optional[exp.Expression]: 4811 this = parse_method() 4812 4813 while self._match_set(expressions): 4814 this = self.expression( 4815 expressions[self._prev.token_type], 4816 this=this, 4817 comments=self._prev_comments, 4818 expression=parse_method(), 4819 ) 4820 4821 return this 4822 4823 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4824 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4825 4826 def _parse_wrapped_csv( 4827 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4828 ) -> t.List[exp.Expression]: 4829 return self._parse_wrapped( 4830 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4831 ) 4832 4833 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4834 wrapped = self._match(TokenType.L_PAREN) 4835 if not wrapped and not optional: 4836 self.raise_error("Expecting (") 4837 parse_result = parse_method() 4838 if wrapped: 4839 self._match_r_paren() 4840 return parse_result 4841 4842 def _parse_expressions(self) -> t.List[exp.Expression]: 4843 return self._parse_csv(self._parse_expression) 4844 4845 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4846 return self._parse_select() or self._parse_set_operations( 4847 self._parse_expression() if alias else self._parse_conjunction() 4848 ) 4849 4850 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4851 return self._parse_query_modifiers( 4852 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4853 ) 4854 4855 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4856 this = None 4857 if self._match_texts(self.TRANSACTION_KIND): 4858 this = self._prev.text 4859 4860 self._match_texts({"TRANSACTION", "WORK"}) 4861 4862 modes = [] 4863 while True: 4864 mode = [] 4865 while self._match(TokenType.VAR): 4866 mode.append(self._prev.text) 4867 4868 if mode: 4869 modes.append(" ".join(mode)) 4870 if not self._match(TokenType.COMMA): 4871 break 4872 4873 return self.expression(exp.Transaction, this=this, modes=modes) 4874 4875 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4876 chain = None 4877 savepoint = None 4878 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4879 4880 self._match_texts({"TRANSACTION", "WORK"}) 4881 4882 if self._match_text_seq("TO"): 4883 self._match_text_seq("SAVEPOINT") 4884 savepoint = self._parse_id_var() 4885 4886 if self._match(TokenType.AND): 4887 chain = not self._match_text_seq("NO") 4888 self._match_text_seq("CHAIN") 4889 4890 if is_rollback: 4891 return self.expression(exp.Rollback, savepoint=savepoint) 4892 4893 return self.expression(exp.Commit, chain=chain) 4894 4895 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4896 if not self._match_text_seq("ADD"): 4897 return None 4898 4899 self._match(TokenType.COLUMN) 4900 exists_column = self._parse_exists(not_=True) 4901 expression = self._parse_field_def() 4902 4903 if expression: 4904 expression.set("exists", exists_column) 4905 4906 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4907 if self._match_texts(("FIRST", "AFTER")): 4908 position = self._prev.text 4909 column_position = self.expression( 4910 exp.ColumnPosition, this=self._parse_column(), position=position 4911 ) 4912 expression.set("position", column_position) 4913 4914 return expression 4915 4916 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4917 drop = self._match(TokenType.DROP) and self._parse_drop() 4918 if drop and not isinstance(drop, exp.Command): 4919 drop.set("kind", drop.args.get("kind", "COLUMN")) 4920 return drop 4921 4922 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4923 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4924 return self.expression( 4925 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4926 ) 4927 4928 def _parse_add_constraint(self) -> exp.AddConstraint: 4929 this = None 4930 kind = self._prev.token_type 4931 4932 if kind == TokenType.CONSTRAINT: 4933 this = self._parse_id_var() 4934 4935 if self._match_text_seq("CHECK"): 4936 expression = self._parse_wrapped(self._parse_conjunction) 4937 enforced = self._match_text_seq("ENFORCED") 4938 4939 return self.expression( 4940 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4941 ) 4942 4943 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4944 expression = self._parse_foreign_key() 4945 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4946 expression = self._parse_primary_key() 4947 else: 4948 expression = None 4949 4950 return self.expression(exp.AddConstraint, this=this, expression=expression) 4951 4952 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4953 index = self._index - 1 4954 4955 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4956 return self._parse_csv(self._parse_add_constraint) 4957 4958 self._retreat(index) 4959 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4960 return self._parse_csv(self._parse_field_def) 4961 4962 return self._parse_csv(self._parse_add_column) 4963 4964 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4965 self._match(TokenType.COLUMN) 4966 column = self._parse_field(any_token=True) 4967 4968 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4969 return self.expression(exp.AlterColumn, this=column, drop=True) 4970 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4971 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4972 4973 self._match_text_seq("SET", "DATA") 4974 return self.expression( 4975 exp.AlterColumn, 4976 this=column, 4977 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4978 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4979 using=self._match(TokenType.USING) and self._parse_conjunction(), 4980 ) 4981 4982 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4983 index = self._index - 1 4984 4985 partition_exists = self._parse_exists() 4986 if self._match(TokenType.PARTITION, advance=False): 4987 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4988 4989 self._retreat(index) 4990 return self._parse_csv(self._parse_drop_column) 4991 4992 def _parse_alter_table_rename(self) -> exp.RenameTable: 4993 self._match_text_seq("TO") 4994 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4995 4996 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4997 start = self._prev 4998 4999 if not self._match(TokenType.TABLE): 5000 return self._parse_as_command(start) 5001 5002 exists = self._parse_exists() 5003 only = self._match_text_seq("ONLY") 5004 this = self._parse_table(schema=True) 5005 5006 if self._next: 5007 self._advance() 5008 5009 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5010 if parser: 5011 actions = ensure_list(parser(self)) 5012 5013 if not self._curr: 5014 return self.expression( 5015 exp.AlterTable, 5016 this=this, 5017 exists=exists, 5018 actions=actions, 5019 only=only, 5020 ) 5021 5022 return self._parse_as_command(start) 5023 5024 def _parse_merge(self) -> exp.Merge: 5025 self._match(TokenType.INTO) 5026 target = self._parse_table() 5027 5028 if target and self._match(TokenType.ALIAS, advance=False): 5029 target.set("alias", self._parse_table_alias()) 5030 5031 self._match(TokenType.USING) 5032 using = self._parse_table() 5033 5034 self._match(TokenType.ON) 5035 on = self._parse_conjunction() 5036 5037 return self.expression( 5038 exp.Merge, 5039 this=target, 5040 using=using, 5041 on=on, 5042 expressions=self._parse_when_matched(), 5043 ) 5044 5045 def _parse_when_matched(self) -> t.List[exp.When]: 5046 whens = [] 5047 5048 while self._match(TokenType.WHEN): 5049 matched = not self._match(TokenType.NOT) 5050 self._match_text_seq("MATCHED") 5051 source = ( 5052 False 5053 if self._match_text_seq("BY", "TARGET") 5054 else self._match_text_seq("BY", "SOURCE") 5055 ) 5056 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5057 5058 self._match(TokenType.THEN) 5059 5060 if self._match(TokenType.INSERT): 5061 _this = self._parse_star() 5062 if _this: 5063 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5064 else: 5065 then = self.expression( 5066 exp.Insert, 5067 this=self._parse_value(), 5068 expression=self._match(TokenType.VALUES) and self._parse_value(), 5069 ) 5070 elif self._match(TokenType.UPDATE): 5071 expressions = self._parse_star() 5072 if expressions: 5073 then = self.expression(exp.Update, expressions=expressions) 5074 else: 5075 then = self.expression( 5076 exp.Update, 5077 expressions=self._match(TokenType.SET) 5078 and self._parse_csv(self._parse_equality), 5079 ) 5080 elif self._match(TokenType.DELETE): 5081 then = self.expression(exp.Var, this=self._prev.text) 5082 else: 5083 then = None 5084 5085 whens.append( 5086 self.expression( 5087 exp.When, 5088 matched=matched, 5089 source=source, 5090 condition=condition, 5091 then=then, 5092 ) 5093 ) 5094 return whens 5095 5096 def _parse_show(self) -> t.Optional[exp.Expression]: 5097 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5098 if parser: 5099 return parser(self) 5100 return self._parse_as_command(self._prev) 5101 5102 def _parse_set_item_assignment( 5103 self, kind: t.Optional[str] = None 5104 ) -> t.Optional[exp.Expression]: 5105 index = self._index 5106 5107 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 5108 return self._parse_set_transaction(global_=kind == "GLOBAL") 5109 5110 left = self._parse_primary() or self._parse_id_var() 5111 assignment_delimiter = self._match_texts(("=", "TO")) 5112 5113 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5114 self._retreat(index) 5115 return None 5116 5117 right = self._parse_statement() or self._parse_id_var() 5118 this = self.expression(exp.EQ, this=left, expression=right) 5119 5120 return self.expression(exp.SetItem, this=this, kind=kind) 5121 5122 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5123 self._match_text_seq("TRANSACTION") 5124 characteristics = self._parse_csv( 5125 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5126 ) 5127 return self.expression( 5128 exp.SetItem, 5129 expressions=characteristics, 5130 kind="TRANSACTION", 5131 **{"global": global_}, # type: ignore 5132 ) 5133 5134 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5135 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5136 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5137 5138 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5139 index = self._index 5140 set_ = self.expression( 5141 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5142 ) 5143 5144 if self._curr: 5145 self._retreat(index) 5146 return self._parse_as_command(self._prev) 5147 5148 return set_ 5149 5150 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5151 for option in options: 5152 if self._match_text_seq(*option.split(" ")): 5153 return exp.var(option) 5154 return None 5155 5156 def _parse_as_command(self, start: Token) -> exp.Command: 5157 while self._curr: 5158 self._advance() 5159 text = self._find_sql(start, self._prev) 5160 size = len(start.text) 5161 return exp.Command(this=text[:size], expression=text[size:]) 5162 5163 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5164 settings = [] 5165 5166 self._match_l_paren() 5167 kind = self._parse_id_var() 5168 5169 if self._match(TokenType.L_PAREN): 5170 while True: 5171 key = self._parse_id_var() 5172 value = self._parse_primary() 5173 5174 if not key and value is None: 5175 break 5176 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5177 self._match(TokenType.R_PAREN) 5178 5179 self._match_r_paren() 5180 5181 return self.expression( 5182 exp.DictProperty, 5183 this=this, 5184 kind=kind.this if kind else None, 5185 settings=settings, 5186 ) 5187 5188 def _parse_dict_range(self, this: str) -> exp.DictRange: 5189 self._match_l_paren() 5190 has_min = self._match_text_seq("MIN") 5191 if has_min: 5192 min = self._parse_var() or self._parse_primary() 5193 self._match_text_seq("MAX") 5194 max = self._parse_var() or self._parse_primary() 5195 else: 5196 max = self._parse_var() or self._parse_primary() 5197 min = exp.Literal.number(0) 5198 self._match_r_paren() 5199 return self.expression(exp.DictRange, this=this, min=min, max=max) 5200 5201 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5202 index = self._index 5203 expression = self._parse_column() 5204 if not self._match(TokenType.IN): 5205 self._retreat(index - 1) 5206 return None 5207 iterator = self._parse_column() 5208 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5209 return self.expression( 5210 exp.Comprehension, 5211 this=this, 5212 expression=expression, 5213 iterator=iterator, 5214 condition=condition, 5215 ) 5216 5217 def _find_parser( 5218 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5219 ) -> t.Optional[t.Callable]: 5220 if not self._curr: 5221 return None 5222 5223 index = self._index 5224 this = [] 5225 while True: 5226 # The current token might be multiple words 5227 curr = self._curr.text.upper() 5228 key = curr.split(" ") 5229 this.append(curr) 5230 5231 self._advance() 5232 result, trie = in_trie(trie, key) 5233 if result == TrieResult.FAILED: 5234 break 5235 5236 if result == TrieResult.EXISTS: 5237 subparser = parsers[" ".join(this)] 5238 return subparser 5239 5240 self._retreat(index) 5241 return None 5242 5243 def _match(self, token_type, advance=True, expression=None): 5244 if not self._curr: 5245 return None 5246 5247 if self._curr.token_type == token_type: 5248 if advance: 5249 self._advance() 5250 self._add_comments(expression) 5251 return True 5252 5253 return None 5254 5255 def _match_set(self, types, advance=True): 5256 if not self._curr: 5257 return None 5258 5259 if self._curr.token_type in types: 5260 if advance: 5261 self._advance() 5262 return True 5263 5264 return None 5265 5266 def _match_pair(self, token_type_a, token_type_b, advance=True): 5267 if not self._curr or not self._next: 5268 return None 5269 5270 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5271 if advance: 5272 self._advance(2) 5273 return True 5274 5275 return None 5276 5277 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5278 if not self._match(TokenType.L_PAREN, expression=expression): 5279 self.raise_error("Expecting (") 5280 5281 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5282 if not self._match(TokenType.R_PAREN, expression=expression): 5283 self.raise_error("Expecting )") 5284 5285 def _match_texts(self, texts, advance=True): 5286 if self._curr and self._curr.text.upper() in texts: 5287 if advance: 5288 self._advance() 5289 return True 5290 return False 5291 5292 def _match_text_seq(self, *texts, advance=True): 5293 index = self._index 5294 for text in texts: 5295 if self._curr and self._curr.text.upper() == text: 5296 self._advance() 5297 else: 5298 self._retreat(index) 5299 return False 5300 5301 if not advance: 5302 self._retreat(index) 5303 5304 return True 5305 5306 @t.overload 5307 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5308 ... 5309 5310 @t.overload 5311 def _replace_columns_with_dots( 5312 self, this: t.Optional[exp.Expression] 5313 ) -> t.Optional[exp.Expression]: 5314 ... 5315 5316 def _replace_columns_with_dots(self, this): 5317 if isinstance(this, exp.Dot): 5318 exp.replace_children(this, self._replace_columns_with_dots) 5319 elif isinstance(this, exp.Column): 5320 exp.replace_children(this, self._replace_columns_with_dots) 5321 table = this.args.get("table") 5322 this = ( 5323 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5324 ) 5325 5326 return this 5327 5328 def _replace_lambda( 5329 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5330 ) -> t.Optional[exp.Expression]: 5331 if not node: 5332 return node 5333 5334 for column in node.find_all(exp.Column): 5335 if column.parts[0].name in lambda_variables: 5336 dot_or_id = column.to_dot() if column.table else column.this 5337 parent = column.parent 5338 5339 while isinstance(parent, exp.Dot): 5340 if not isinstance(parent.parent, exp.Dot): 5341 parent.replace(dot_or_id) 5342 break 5343 parent = parent.parent 5344 else: 5345 if column is node: 5346 node = dot_or_id 5347 else: 5348 column.replace(dot_or_id) 5349 return node 5350 5351 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5352 return [ 5353 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5354 for value in values 5355 if value 5356 ]
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
947 def __init__( 948 self, 949 error_level: t.Optional[ErrorLevel] = None, 950 error_message_context: int = 100, 951 max_errors: int = 3, 952 ): 953 self.error_level = error_level or ErrorLevel.IMMEDIATE 954 self.error_message_context = error_message_context 955 self.max_errors = max_errors 956 self._tokenizer = self.TOKENIZER_CLASS() 957 self.reset()
969 def parse( 970 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 971 ) -> t.List[t.Optional[exp.Expression]]: 972 """ 973 Parses a list of tokens and returns a list of syntax trees, one tree 974 per parsed SQL statement. 975 976 Args: 977 raw_tokens: The list of tokens. 978 sql: The original SQL string, used to produce helpful debug messages. 979 980 Returns: 981 The list of the produced syntax trees. 982 """ 983 return self._parse( 984 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 985 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
987 def parse_into( 988 self, 989 expression_types: exp.IntoType, 990 raw_tokens: t.List[Token], 991 sql: t.Optional[str] = None, 992 ) -> t.List[t.Optional[exp.Expression]]: 993 """ 994 Parses a list of tokens into a given Expression type. If a collection of Expression 995 types is given instead, this method will try to parse the token list into each one 996 of them, stopping at the first for which the parsing succeeds. 997 998 Args: 999 expression_types: The expression type(s) to try and parse the token list into. 1000 raw_tokens: The list of tokens. 1001 sql: The original SQL string, used to produce helpful debug messages. 1002 1003 Returns: 1004 The target Expression. 1005 """ 1006 errors = [] 1007 for expression_type in ensure_list(expression_types): 1008 parser = self.EXPRESSION_PARSERS.get(expression_type) 1009 if not parser: 1010 raise TypeError(f"No parser registered for {expression_type}") 1011 1012 try: 1013 return self._parse(parser, raw_tokens, sql) 1014 except ParseError as e: 1015 e.errors[0]["into_expression"] = expression_type 1016 errors.append(e) 1017 1018 raise ParseError( 1019 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1020 errors=merge_errors(errors), 1021 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1058 def check_errors(self) -> None: 1059 """Logs or raises any found errors, depending on the chosen error level setting.""" 1060 if self.error_level == ErrorLevel.WARN: 1061 for error in self.errors: 1062 logger.error(str(error)) 1063 elif self.error_level == ErrorLevel.RAISE and self.errors: 1064 raise ParseError( 1065 concat_messages(self.errors, self.max_errors), 1066 errors=merge_errors(self.errors), 1067 )
Logs or raises any found errors, depending on the chosen error level setting.
1069 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1070 """ 1071 Appends an error in the list of recorded errors or raises it, depending on the chosen 1072 error level setting. 1073 """ 1074 token = token or self._curr or self._prev or Token.string("") 1075 start = token.start 1076 end = token.end + 1 1077 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1078 highlight = self.sql[start:end] 1079 end_context = self.sql[end : end + self.error_message_context] 1080 1081 error = ParseError.new( 1082 f"{message}. Line {token.line}, Col: {token.col}.\n" 1083 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1084 description=message, 1085 line=token.line, 1086 col=token.col, 1087 start_context=start_context, 1088 highlight=highlight, 1089 end_context=end_context, 1090 ) 1091 1092 if self.error_level == ErrorLevel.IMMEDIATE: 1093 raise error 1094 1095 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1097 def expression( 1098 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1099 ) -> E: 1100 """ 1101 Creates a new, validated Expression. 1102 1103 Args: 1104 exp_class: The expression class to instantiate. 1105 comments: An optional list of comments to attach to the expression. 1106 kwargs: The arguments to set for the expression along with their respective values. 1107 1108 Returns: 1109 The target expression. 1110 """ 1111 instance = exp_class(**kwargs) 1112 instance.add_comments(comments) if comments else self._add_comments(instance) 1113 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1120 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1121 """ 1122 Validates an Expression, making sure that all its mandatory arguments are set. 1123 1124 Args: 1125 expression: The expression to validate. 1126 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1127 1128 Returns: 1129 The validated expression. 1130 """ 1131 if self.error_level != ErrorLevel.IGNORE: 1132 for error_message in expression.error_messages(args): 1133 self.raise_error(error_message) 1134 1135 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.