PG数据库的语法解析
src\backend\parser\gram.y文件中讲解了词法解析器。
%{ Declarations %} Definitions %% Productions %% User subroutines
在词法解析器中,我们讲到的.y文件的知识点,这一篇文章主要讲解PG中是如何使用的词法解析器的。
包含的头文件,定义后文使用的函数,定义宏和别名。这部分为C代码,不做具体介绍。
Definitions在Bison的作用与Flex中的功能也差不多,在这个段定义一些Bison专有变量或相关选项.
%purge-parser
指示Bison创建一个可重入的解析器.与普通的解析器一个很大的不同的,yylval的类型是union指针而不是union.
%expect
%expect N告诉Bison,解析器应该有N个shift/reduce冲突,如果不匹配,Bison将报告编译时错误。
%name-prefix
命名函数名称,默认为yy
%name-prefix “base_yy”意味着默认的yyxx()会变成base_yyxx().比如yyparse(),yylex(),yyerror(),yylval,yychar和
yydebug.
%locations
位置
%parse-param
%parse-param声明的内容位于yyparse()的括号之间,可以声明任意多的参数.
比如%parse-param {core_yyscan_t yyscanner},参数为core_yyscan_t yyscanner.
%lex-param
%lex-param声明的内容位于yylex()的括号之间,可以声明任意多的参数.
比如%lex-param {core_yyscan_t yyscanner},参数为core_yyscan_t yyscanner.
%union
%union声明了在解析器中标识符所使用的类型.
Bison解析器,每一个标识符,包括tokens和非终结符,都有值与之关联,默认的,值的类型都是整型,但在实际应用中远远不够.
%union可以为标识符值创建C语言union声明.
%union{ core_YYSTYPE core_yystype; /* these fields must match core_YYSTYPE: */ int ival; char *str; const char *keyword; ... }
其中core_yystype的类型为core_YYSTYPE联合体.
/* * The scanner returns extra data about scanned tokens in this union type. * Note that this is a subset of the fields used in YYSTYPE of the bison * parsers built atop the scanner. */ typedef union core_YYSTYPE{ int ival; /* for integer literals */ char *str; /* for identifiers and non- integer literals */ const char *keyword; /* canonical spelling of keywords */ } core_YYSTYPE;
一旦定义了union,那需要通过将union中合适的名称放在尖括号(<>)中,用以告诉Bison哪些符号具有哪些类型的值.
%type
类型定义,如:
%type <node> stmt schema_stmt AlterEventTrigStmt AlterCollationStmt ...
表示标识符/非终结符 的类型可以是stmt/schema_stmt/AlterEventTrigStmt/…
%nonassoc
使用%nonassoc声明非关联操作符。
%left
左关联操作符
%right
右关联操作符
按照根节点依次对表达式进行解析,最终将结果汇集到根节点。
**stmtblock:**语法块根节点
/* * The target production for the whole parse. */ stmtblock: stmtmulti { pg_yyget_extra(yyscanner)->parsetree = $1; } ;
**stmt:**所有类型的节点
stmt : AlterEventTrigStmt | AlterCollationStmt | AlterDatabaseStmt | AlterDatabaseSetStmt | AlterDefaultPrivilegesStmt | AlterDomainStmt | AlterEnumStmt | AlterExtensionStmt | AlterExtensionContentsStmt | AlterFdwStmt | AlterForeignServerStmt | AlterForeignTableStmt | AlterFunctionStmt | AlterGroupStmt | AlterObjectDependsStmt | AlterObjectSchemaStmt | AlterOwnerStmt | AlterOperatorStmt | AlterPolicyStmt | AlterSeqStmt | AlterSystemStmt | AlterTableStmt | AlterTblSpcStmt | AlterCompositeTypeStmt | AlterPublicationStmt | AlterRoleSetStmt | AlterRoleStmt | AlterSubscriptionStmt | AlterTSConfigurationStmt | AlterTSDictionaryStmt | AlterUserMappingStmt | AnalyzeStmt | CallStmt | CheckPointStmt | ClosePortalStmt | ClusterStmt | CommentStmt | ConstraintsSetStmt | CopyStmt | CreateAmStmt | CreateAsStmt | CreateAssertionStmt | CreateCastStmt | CreateConversionStmt | CreateDomainStmt | CreateExtensionStmt | CreateFdwStmt | CreateForeignServerStmt | CreateForeignTableStmt | CreateFunctionStmt | CreateGroupStmt | CreateMatViewStmt | CreateOpClassStmt | CreateOpFamilyStmt | CreatePublicationStmt | AlterOpFamilyStmt | CreatePolicyStmt | CreatePLangStmt | CreateSchemaStmt | CreateSeqStmt | CreateStmt | CreateSubscriptionStmt | CreateStatsStmt | CreateTableSpaceStmt | CreateTransformStmt | CreateTrigStmt | CreateEventTrigStmt | CreateRoleStmt | CreateUserStmt | CreateUserMappingStmt | CreatedbStmt | DeallocateStmt | DeclareCursorStmt | DefineStmt | DeleteStmt | DiscardStmt | DoStmt | DropCastStmt | DropOpClassStmt | DropOpFamilyStmt | DropOwnedStmt | DropPLangStmt | DropStmt | DropSubscriptionStmt | DropTableSpaceStmt | DropTransformStmt | DropRoleStmt | DropUserMappingStmt | DropdbStmt | ExecuteStmt | ExplainStmt | FetchStmt | GrantStmt | GrantRoleStmt | ImportForeignSchemaStmt | IndexStmt | InsertStmt | ListenStmt | RefreshMatViewStmt | LoadStmt | LockStmt | NotifyStmt | PrepareStmt | ReassignOwnedStmt | ReindexStmt | RemoveAggrStmt | RemoveFuncStmt | RemoveOperStmt | RenameStmt | RevokeStmt | RevokeRoleStmt | RuleStmt | SecLabelStmt | SelectStmt | TransactionStmt | TruncateStmt | UnlistenStmt | UpdateStmt | VacuumStmt | VariableResetStmt | VariableSetStmt | VariableShowStmt | ViewStmt | /*EMPTY*/ { $$ = NULL; } ;
**a_expr:**末端表达式解析
/* * General expressions * This is the heart of the expression syntax. * * We have two expression types: a_expr is the unrestricted kind, and * b_expr is a subset that must be used in some places to avoid shift/reduce * conflicts. For example, we can't do BETWEEN as "BETWEEN a_expr AND a_expr" * because that use of AND conflicts with AND as a boolean operator. So, * b_expr is used in BETWEEN and we remove boolean keywords from b_expr. * * Note that '(' a_expr ')' is a b_expr, so an unrestricted expression can * always be used by surrounding it with parens. * * c_expr is all the productions that are common to a_expr and b_expr; * it's factored out just to eliminate redundant coding. * * Be careful of productions involving more than one terminal token. * By default, bison will assign such productions the precedence of their * last terminal, but in nearly all cases you want it to be the precedence * of the first terminal instead; otherwise you will not get the behavior * you expect! So we use %prec annotations freely to set precedences. */ a_expr: c_expr { $$ = $1; } | a_expr TYPECAST Typename { $$ = makeTypeCast($1, $3, @2); } | a_expr COLLATE any_name { CollateClause *n = makeNode(CollateClause); n->arg = $1; n->collname = $3; n->location = @2; $$ = (Node *) n; } | a_expr AT TIME ZONE a_expr %prec AT { $$ = (Node *) makeFuncCall(SystemFuncName("timezone"), list_make2($5, $1), @2); } /* * These operators must be called out explicitly in order to make use * of bison's automatic operator-precedence handling. All other * operator names are handled by the generic productions using "Op", * below; and all those operators will have the same precedence. * * If you add more explicitly-known operators, be sure to add them * also to b_expr and to the MathOp list below. */ | '+' a_expr %prec UMINUS { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "+", NULL, $2, @1); } | '-' a_expr %prec UMINUS { $$ = doNegate($2, @1); } | a_expr '+' a_expr { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "+", $1, $3, @2); } | a_expr '-' a_expr { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "-", $1, $3, @2); } | a_expr '*' a_expr { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "*", $1, $3, @2); } | a_expr '/' a_expr { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "/", $1, $3, @2); } | a_expr '%' a_expr { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "%", $1, $3, @2); } | a_expr '^' a_expr { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "^", $1, $3, @2); } | a_expr '<' a_expr { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "<", $1, $3, @2); } | a_expr '>' a_expr { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, ">", $1, $3, @2); } | a_expr '=' a_expr { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "=", $1, $3, @2); } | a_expr LESS_EQUALS a_expr { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "<=", $1, $3, @2); } | a_expr GREATER_EQUALS a_expr { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, ">=", $1, $3, @2); } | a_expr NOT_EQUALS a_expr { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "<>", $1, $3, @2); } | a_expr qual_Op a_expr %prec Op { $$ = (Node *) makeA_Expr(AEXPR_OP, $2, $1, $3, @2); } | qual_Op a_expr %prec Op { $$ = (Node *) makeA_Expr(AEXPR_OP, $1, NULL, $2, @1); } | a_expr qual_Op %prec POSTFIXOP { $$ = (Node *) makeA_Expr(AEXPR_OP, $2, $1, NULL, @2); } | a_expr AND a_expr { $$ = makeAndExpr($1, $3, @2); } | a_expr OR a_expr { $$ = makeOrExpr($1, $3, @2); } | NOT a_expr { $$ = makeNotExpr($2, @1); } | NOT_LA a_expr %prec NOT { $$ = makeNotExpr($2, @1); } | a_expr LIKE a_expr { $$ = (Node *) makeSimpleA_Expr(AEXPR_LIKE, "~~", $1, $3, @2); } | a_expr LIKE a_expr ESCAPE a_expr %prec LIKE { FuncCall *n = makeFuncCall(SystemFuncName("like_escape"), list_make2($3, $5), @2); $$ = (Node *) makeSimpleA_Expr(AEXPR_LIKE, "~~", $1, (Node *) n, @2); } | a_expr NOT_LA LIKE a_expr %prec NOT_LA { $$ = (Node *) makeSimpleA_Expr(AEXPR_LIKE, "!~~", $1, $4, @2); } | a_expr NOT_LA LIKE a_expr ESCAPE a_expr %prec NOT_LA { FuncCall *n = makeFuncCall(SystemFuncName("like_escape"), list_make2($4, $6), @2); $$ = (Node *) makeSimpleA_Expr(AEXPR_LIKE, "!~~", $1, (Node *) n, @2); } | a_expr ILIKE a_expr { $$ = (Node *) makeSimpleA_Expr(AEXPR_ILIKE, "~~*", $1, $3, @2); } | a_expr ILIKE a_expr ESCAPE a_expr %prec ILIKE { FuncCall *n = makeFuncCall(SystemFuncName("like_escape"), list_make2($3, $5), @2); $$ = (Node *) makeSimpleA_Expr(AEXPR_ILIKE, "~~*", $1, (Node *) n, @2); } | a_expr NOT_LA ILIKE a_expr %prec NOT_LA { $$ = (Node *) makeSimpleA_Expr(AEXPR_ILIKE, "!~~*", $1, $4, @2); } | a_expr NOT_LA ILIKE a_expr ESCAPE a_expr %prec NOT_LA { FuncCall *n = makeFuncCall(SystemFuncName("like_escape"), list_make2($4, $6), @2); $$ = (Node *) makeSimpleA_Expr(AEXPR_ILIKE, "!~~*", $1, (Node *) n, @2); } | a_expr SIMILAR TO a_expr %prec SIMILAR { FuncCall *n = makeFuncCall(SystemFuncName("similar_escape"), list_make2($4, makeNullAConst(-1)), @2); $$ = (Node *) makeSimpleA_Expr(AEXPR_SIMILAR, "~", $1, (Node *) n, @2); } | a_expr SIMILAR TO a_expr ESCAPE a_expr %prec SIMILAR { FuncCall *n = makeFuncCall(SystemFuncName("similar_escape"), list_make2($4, $6), @2); $$ = (Node *) makeSimpleA_Expr(AEXPR_SIMILAR, "~", $1, (Node *) n, @2); } | a_expr NOT_LA SIMILAR TO a_expr %prec NOT_LA { FuncCall *n = makeFuncCall(SystemFuncName("similar_escape"), list_make2($5, makeNullAConst(-1)), @2); $$ = (Node *) makeSimpleA_Expr(AEXPR_SIMILAR, "!~", $1, (Node *) n, @2); } | a_expr NOT_LA SIMILAR TO a_expr ESCAPE a_expr %prec NOT_LA { FuncCall *n = makeFuncCall(SystemFuncName("similar_escape"), list_make2($5, $7), @2); $$ = (Node *) makeSimpleA_Expr(AEXPR_SIMILAR, "!~", $1, (Node *) n, @2); } /* NullTest clause * Define SQL-style Null test clause. * Allow two forms described in the standard: * a IS NULL * a IS NOT NULL * Allow two SQL extensions * a ISNULL * a NOTNULL */ | a_expr IS NULL_P %prec IS { NullTest *n = makeNode(NullTest); n->arg = (Expr *) $1; n->nulltesttype = IS_NULL; n->location = @2; $$ = (Node *)n; } | a_expr ISNULL { NullTest *n = makeNode(NullTest); n->arg = (Expr *) $1; n->nulltesttype = IS_NULL; n->location = @2; $$ = (Node *)n; } | a_expr IS NOT NULL_P %prec IS { NullTest *n = makeNode(NullTest); n->arg = (Expr *) $1; n->nulltesttype = IS_NOT_NULL; n->location = @2; $$ = (Node *)n; } | a_expr NOTNULL { NullTest *n = makeNode(NullTest); n->arg = (Expr *) $1; n->nulltesttype = IS_NOT_NULL; n->location = @2; $$ = (Node *)n; } | row OVERLAPS row { if (list_length($1) != 2) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("wrong number of parameters on left side of OVERLAPS expression"), parser_errposition(@1))); if (list_length($3) != 2) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("wrong number of parameters on right side of OVERLAPS expression"), parser_errposition(@3))); $$ = (Node *) makeFuncCall(SystemFuncName("overlaps"), list_concat($1, $3), @2); } | a_expr IS TRUE_P %prec IS { BooleanTest *b = makeNode(BooleanTest); b->arg = (Expr *) $1; b->booltesttype = IS_TRUE; b->location = @2; $$ = (Node *)b; } | a_expr IS NOT TRUE_P %prec IS { BooleanTest *b = makeNode(BooleanTest); b->arg = (Expr *) $1; b->booltesttype = IS_NOT_TRUE; b->location = @2; $$ = (Node *)b; } | a_expr IS FALSE_P %prec IS { BooleanTest *b = makeNode(BooleanTest); b->arg = (Expr *) $1; b->booltesttype = IS_FALSE; b->location = @2; $$ = (Node *)b; } | a_expr IS NOT FALSE_P %prec IS { BooleanTest *b = makeNode(BooleanTest); b->arg = (Expr *) $1; b->booltesttype = IS_NOT_FALSE; b->location = @2; $$ = (Node *)b; } | a_expr IS UNKNOWN %prec IS { BooleanTest *b = makeNode(BooleanTest); b->arg = (Expr *) $1; b->booltesttype = IS_UNKNOWN; b->location = @2; $$ = (Node *)b; } | a_expr IS NOT UNKNOWN %prec IS { BooleanTest *b = makeNode(BooleanTest); b->arg = (Expr *) $1; b->booltesttype = IS_NOT_UNKNOWN; b->location = @2; $$ = (Node *)b; } | a_expr IS DISTINCT FROM a_expr %prec IS { $$ = (Node *) makeSimpleA_Expr(AEXPR_DISTINCT, "=", $1, $5, @2); } | a_expr IS NOT DISTINCT FROM a_expr %prec IS { $$ = (Node *) makeSimpleA_Expr(AEXPR_NOT_DISTINCT, "=", $1, $6, @2); } | a_expr IS OF '(' type_list ')' %prec IS { $$ = (Node *) makeSimpleA_Expr(AEXPR_OF, "=", $1, (Node *) $5, @2); } | a_expr IS NOT OF '(' type_list ')' %prec IS { $$ = (Node *) makeSimpleA_Expr(AEXPR_OF, "<>", $1, (Node *) $6, @2); } | a_expr BETWEEN opt_asymmetric b_expr AND a_expr %prec BETWEEN { $$ = (Node *) makeSimpleA_Expr(AEXPR_BETWEEN, "BETWEEN", $1, (Node *) list_make2($4, $6), @2); } | a_expr NOT_LA BETWEEN opt_asymmetric b_expr AND a_expr %prec NOT_LA { $$ = (Node *) makeSimpleA_Expr(AEXPR_NOT_BETWEEN, "NOT BETWEEN", $1, (Node *) list_make2($5, $7), @2); } | a_expr BETWEEN SYMMETRIC b_expr AND a_expr %prec BETWEEN { $$ = (Node *) makeSimpleA_Expr(AEXPR_BETWEEN_SYM, "BETWEEN SYMMETRIC", $1, (Node *) list_make2($4, $6), @2); } | a_expr NOT_LA BETWEEN SYMMETRIC b_expr AND a_expr %prec NOT_LA { $$ = (Node *) makeSimpleA_Expr(AEXPR_NOT_BETWEEN_SYM, "NOT BETWEEN SYMMETRIC", $1, (Node *) list_make2($5, $7), @2); } | a_expr IN_P in_expr { /* in_expr returns a SubLink or a list of a_exprs */ if (IsA($3, SubLink)) { /* generate foo = ANY (subquery) */ SubLink *n = (SubLink *) $3; n->subLinkType = ANY_SUBLINK; n->subLinkId = 0; n->testexpr = $1; n->operName = NIL; /* show it's IN not = ANY */ n->location = @2; $$ = (Node *)n; } else { /* generate scalar IN expression */ $$ = (Node *) makeSimpleA_Expr(AEXPR_IN, "=", $1, $3, @2); } } | a_expr NOT_LA IN_P in_expr %prec NOT_LA { /* in_expr returns a SubLink or a list of a_exprs */ if (IsA($4, SubLink)) { /* generate NOT (foo = ANY (subquery)) */ /* Make an = ANY node */ SubLink *n = (SubLink *) $4; n->subLinkType = ANY_SUBLINK; n->subLinkId = 0; n->testexpr = $1; n->operName = NIL; /* show it's IN not = ANY */ n->location = @2; /* Stick a NOT on top; must have same parse location */ $$ = makeNotExpr((Node *) n, @2); } else { /* generate scalar NOT IN expression */ $$ = (Node *) makeSimpleA_Expr(AEXPR_IN, "<>", $1, $4, @2); } } | a_expr subquery_Op sub_type select_with_parens %prec Op { SubLink *n = makeNode(SubLink); n->subLinkType = $3; n->subLinkId = 0; n->testexpr = $1; n->operName = $2; n->subselect = $4; n->location = @2; $$ = (Node *)n; } | a_expr subquery_Op sub_type '(' a_expr ')' %prec Op { if ($3 == ANY_SUBLINK) $$ = (Node *) makeA_Expr(AEXPR_OP_ANY, $2, $1, $5, @2); else $$ = (Node *) makeA_Expr(AEXPR_OP_ALL, $2, $1, $5, @2); } | UNIQUE select_with_parens { /* Not sure how to get rid of the parentheses * but there are lots of shift/reduce errors without them. * * Should be able to implement this by plopping the entire * select into a node, then transforming the target expressions * from whatever they are into count(*), and testing the * entire result equal to one. * But, will probably implement a separate node in the executor. */ ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("UNIQUE predicate is not yet implemented"), parser_errposition(@1))); } | a_expr IS DOCUMENT_P %prec IS { $$ = makeXmlExpr(IS_DOCUMENT, NULL, NIL, list_make1($1), @2); } | a_expr IS NOT DOCUMENT_P %prec IS { $$ = makeNotExpr(makeXmlExpr(IS_DOCUMENT, NULL, NIL, list_make1($1), @2), @2); } | DEFAULT { /* * The SQL spec only allows DEFAULT in "contextually typed * expressions", but for us, it's easier to allow it in * any a_expr and then throw error during parse analysis * if it's in an inappropriate context. This way also * lets us say something smarter than "syntax error". */ SetToDefault *n = makeNode(SetToDefault); /* parse analysis will fill in the rest */ n->location = @1; $$ = (Node *)n; } ;
src\backend\parser\analyze.c中做了语义解析。在exec_simple_query函数中获取词法解析树(pg_parse_query),对解析树进行优化重新(pg_analyze_and_rewrite),对解析树指定执行计划(pg_plan_queries),执行命令(PortalStart)。
该文件中主要介绍怎么获取词法解析树。
/* * Do raw parsing (only). * * A list of parsetrees (RawStmt nodes) is returned, since there might be * multiple commands in the given string. * * NOTE: for interactive queries, it is important to keep this routine * separate from the analysis & rewrite stages. Analysis and rewriting * cannot be done in an aborted transaction, since they require access to * database tables. So, we rely on the raw parser to determine whether * we've seen a COMMIT or ABORT command; when we are in abort state, other * commands are not processed any further than the raw parse stage. */ //将完整的语句传入该函数 List * pg_parse_query(const char *query_string) { List *raw_parsetree_list; TRACE_POSTGRESQL_QUERY_PARSE_START(query_string); if (log_parser_stats) ResetUsage(); //调用词法解析器解析语句 raw_parsetree_list = raw_parser(query_string); if (log_parser_stats) ShowUsage("PARSER STATISTICS"); #ifdef COPY_PARSE_PLAN_TREES /* Optional debugging check: pass raw parsetrees through copyObject() */ { List *new_list = copyObject(raw_parsetree_list); /* This checks both copyObject() and the equal() routines... */ if (!equal(new_list, raw_parsetree_list)) elog(WARNING, "copyObject() failed to produce an equal raw parse tree"); else raw_parsetree_list = new_list; } #endif /* * Currently, outfuncs/readfuncs support is missing for many raw parse * tree nodes, so we don't try to implement WRITE_READ_PARSE_PLAN_TREES * here. */ TRACE_POSTGRESQL_QUERY_PARSE_DONE(query_string); return raw_parsetree_list; }
/* * raw_parser * Given a query in string form, do lexical and grammatical analysis. * * Returns a list of raw (un-analyzed) parse trees. The immediate elements * of the list are always RawStmt nodes. */ List * raw_parser(const char *str) { core_yyscan_t yyscanner; base_yy_extra_type yyextra; int yyresult; //初始化flex /* initialize the flex scanner */ yyscanner = scanner_init(str, &yyextra.core_yy_extra, &ScanKeywords, ScanKeywordTokens); /* base_yylex() only needs this much initialization */ yyextra.have_lookahead = false; //初始化bison /* initialize the bison parser */ parser_init(&yyextra); //执行解析 /* Parse! */ yyresult = base_yyparse(yyscanner); /* Clean up (release memory) */ scanner_finish(yyscanner); if (yyresult) /* error */ return NIL; return yyextra.parsetree; }
最终base_yyparse函数被调用,返回yyextra.parsetree变量。该变量在gram.y中被赋值。完成解析。
/* * The target production for the whole parse. */ stmtblock: stmtmulti { pg_yyget_extra(yyscanner)->parsetree = $1; } ;