sql执行语句流程解析
整个处理流程在exec_simple_query函数中完成,代码架构如下:
/* * exec_simple_query * * Execute a "simple Query" protocol message. */ static void exec_simple_query(const char *query_string) { ... //原始语法树获取 /* * Do basic parsing of the query or queries (this should be safe even if * we are in aborted transaction state!) */ parsetree_list = pg_parse_query(query_string); ... //循环处理sql语句 /* * Run through the raw parsetree(s) and process each one. */ foreach(parsetree_item, parsetree_list) { ... //对原始语法树进行分析和重写,生成查询语法树 querytree_list = pg_analyze_and_rewrite(parsetree, query_string, NULL, 0, NULL); //对查询语法树进行优化,生成执行计划 plantree_list = pg_plan_queries(querytree_list, CURSOR_OPT_PARALLEL_OK, NULL); ... //执行语句 /* * Run the portal to completion, and then drop it (and the receiver). */ (void) PortalRun(portal, FETCH_ALL, true, /* always top level */ true, receiver, receiver, completionTag); ... } ... }
使用FLEX和BISON做语法解析,详见https://my.oschina.net/Greedxuji/blog/4290160
sql语句经过词法、语法解析后,将得到一个原始的语法树。查询分析的作用就是对原始语法树进行分析重写,将原始树转换成一颗或者多颗查询语法树。
该部分功能主要在pg_analyze_and_rewrite函数中完成,主要操作步骤为语法分析和优化重写。
代码框架如下:
/* * Given a raw parsetree (gram.y output), and optionally information about * types of parameter symbols ($n), perform parse analysis and rule rewriting. * * A list of Query nodes is returned, since either the analyzer or the * rewriter might expand one query to several. * * NOTE: for reasons mentioned above, this must be separate from raw parsing. */ List * pg_analyze_and_rewrite(RawStmt *parsetree, const char *query_string, Oid *paramTypes, int numParams, QueryEnvironment *queryEnv) { Query *query; List *querytree_list; TRACE_POSTGRESQL_QUERY_REWRITE_START(query_string); /* * (1) Perform parse analysis. */ if (log_parser_stats) ResetUsage(); //原始语法树分析 query = parse_analyze(parsetree, query_string, paramTypes, numParams, queryEnv); if (log_parser_stats) ShowUsage("PARSE ANALYSIS STATISTICS"); //原始语法树优化重写 /* * (2) Rewrite the queries, as necessary */ querytree_list = pg_rewrite_query(query); TRACE_POSTGRESQL_QUERY_REWRITE_DONE(query_string); return querytree_list; }
查询分析是将原始语法树转换为查询语法树。因为元素语法树为树结构,所以遍历树的节点执行相应的处理。
基本调用栈如下,由此可见,对select的 相关处理都已经包含完全了;相应的sql语句按照相应的执行节点执行就可以了。
parse_analyze ->transformTopLevelStmt ->transformOptionalSelectInto ->transformStmt ->transformInsertStmt ->transformDeleteStmt ->transformUpdateStmt ->transformSelectStmt ->transformDeclareCursorStmt ->transformExplainStmt ->transformCreateTableAsStmt ->transformCallStmt
主要函数解析
这里以“ SELECT * FROM A_TBL,B_TBL WHERE xx == xx
”为例。
命令执行时首先调用transformSelectStmt
函数。
SELECT命令包含WITH . FROM . TARGET . WHERE . HAVING . ORDER BY . GROUP BY . DISTINCT
7种信息处理。每个信息处理对应了一个处理函数。具体代码如下:
static Query * transformSelectStmt(ParseState *pstate, SelectStmt *stmt) { Query *qry = makeNode(Query); Node *qual; ListCell *l; qry->commandType = CMD_SELECT; /* process the WITH clause independently of all else */ if (stmt->withClause) { qry->hasRecursive = stmt->withClause->recursive; qry->cteList = transformWithClause(pstate, stmt->withClause); qry->hasModifyingCTE = pstate->p_hasModifyingCTE; } /* Complain if we get called from someplace where INTO is not allowed */ if (stmt->intoClause) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("SELECT ... INTO is not allowed here"), parser_errposition(pstate, exprLocation((Node *) stmt->intoClause)))); /* make FOR UPDATE/FOR SHARE info available to addRangeTableEntry */ pstate->p_locking_clause = stmt->lockingClause; /* make WINDOW info available for window functions, too */ pstate->p_windowdefs = stmt->windowClause; /* process the FROM clause */ transformFromClause(pstate, stmt->fromClause); /* transform targetlist */ qry->targetList = transformTargetList(pstate, stmt->targetList, EXPR_KIND_SELECT_TARGET); /* mark column origins */ markTargetListOrigins(pstate, qry->targetList); /* transform WHERE */ qual = transformWhereClause(pstate, stmt->whereClause, EXPR_KIND_WHERE, "WHERE"); /* initial processing of HAVING clause is much like WHERE clause */ qry->havingQual = transformWhereClause(pstate, stmt->havingClause, EXPR_KIND_HAVING, "HAVING"); /* * Transform sorting/grouping stuff. Do ORDER BY first because both * transformGroupClause and transformDistinctClause need the results. Note * that these functions can also change the targetList, so it's passed to * them by reference. */ qry->sortClause = transformSortClause(pstate, stmt->sortClause, &qry->targetList, EXPR_KIND_ORDER_BY, false /* allow SQL92 rules */ ); qry->groupClause = transformGroupClause(pstate, stmt->groupClause, &qry->groupingSets, &qry->targetList, qry->sortClause, EXPR_KIND_GROUP_BY, false /* allow SQL92 rules */ ); if (stmt->distinctClause == NIL) { qry->distinctClause = NIL; qry->hasDistinctOn = false; } else if (linitial(stmt->distinctClause) == NULL) { /* We had SELECT DISTINCT */ qry->distinctClause = transformDistinctClause(pstate, &qry->targetList, qry->sortClause, false); qry->hasDistinctOn = false; } else { /* We had SELECT DISTINCT ON */ qry->distinctClause = transformDistinctOnClause(pstate, stmt->distinctClause, &qry->targetList, qry->sortClause); qry->hasDistinctOn = true; } /* transform LIMIT */ qry->limitOffset = transformLimitClause(pstate, stmt->limitOffset, EXPR_KIND_OFFSET, "OFFSET"); qry->limitCount = transformLimitClause(pstate, stmt->limitCount, EXPR_KIND_LIMIT, "LIMIT"); /* transform window clauses after we have seen all window functions */ qry->windowClause = transformWindowDefinitions(pstate, pstate->p_windowdefs, &qry->targetList); /* resolve any still-unresolved output columns as being type text */ if (pstate->p_resolve_unknowns) resolveTargetListUnknowns(pstate, qry->targetList); qry->rtable = pstate->p_rtable; qry->jointree = makeFromExpr(pstate->p_joinlist, qual); qry->hasSubLinks = pstate->p_hasSubLinks; qry->hasWindowFuncs = pstate->p_hasWindowFuncs; qry->hasTargetSRFs = pstate->p_hasTargetSRFs; qry->hasAggs = pstate->p_hasAggs; foreach(l, stmt->lockingClause) { transformLockingClause(pstate, qry, (LockingClause *) lfirst(l), false); } assign_query_collations(pstate, qry); /* this must be done after collations, for reliable comparison of exprs */ if (pstate->p_hasAggs || qry->groupClause || qry->groupingSets || qry->havingQual) parseCheckAggregates(pstate, qry); return qry; }
transformFromClause
FROM处理时,遍历fromlist将每一个“基表”传送给transformFromClauseItem进行处理,transformFromClauseItem处理的基表可能是直接处理基表或者查询表,例如:select * from aa,(select * from bb) as BB;
所以在处理时分为一下几个类型进行处理:
在处理完成后,将所有表明添加到pstate->p_namespace中,该值用于后续对select *
中列名的解析,查询出所有的列名;或者判断查询的列名是否存在。
相关代码如下:
/* * transformFromClause - * Process the FROM clause and add items to the query's range table, * joinlist, and namespace. * * Note: we assume that the pstate's p_rtable, p_joinlist, and p_namespace * lists were initialized to NIL when the pstate was created. * We will add onto any entries already present --- this is needed for rule * processing, as well as for UPDATE and DELETE. */ void transformFromClause(ParseState *pstate, List *frmList) { ListCell *fl; /* * The grammar will have produced a list of RangeVars, RangeSubselects, * RangeFunctions, and/or JoinExprs. Transform each one (possibly adding * entries to the rtable), check for duplicate refnames, and then add it * to the joinlist and namespace. * * Note we must process the items left-to-right for proper handling of * LATERAL references. */ foreach(fl, frmList) { Node *n = lfirst(fl); RangeTblEntry *rte; int rtindex; List *namespace; n = transformFromClauseItem(pstate, n, &rte, &rtindex, &namespace); checkNameSpaceConflicts(pstate, pstate->p_namespace, namespace); /* Mark the new namespace items as visible only to LATERAL */ setNamespaceLateralState(namespace, true, true); pstate->p_joinlist = lappend(pstate->p_joinlist, n); pstate->p_namespace = list_concat(pstate->p_namespace, namespace); } /* * We're done parsing the FROM list, so make all namespace items * unconditionally visible. Note that this will also reset lateral_only * for any namespace items that were already present when we were called; * but those should have been that way already. */ setNamespaceLateralState(pstate->p_namespace, false, true); }
static Node * transformFromClauseItem(ParseState *pstate, Node *n, RangeTblEntry **top_rte, int *top_rti, List **namespace) { if (IsA(n, RangeVar)) { ... } else if (IsA(n, RangeSubselect)) { ... } else if (IsA(n, RangeFunction)) { ... } else if (IsA(n, RangeTableFunc)) { ... } else if (IsA(n, RangeTableSample)) { ... } else if (IsA(n, JoinExpr)) { ... } }
transformTargetList
当查询全部列名时,需要将*
转换为全部列名,例如“ SELECT * FROM A_TBL,B_TBL WHERE xx == xx
”。在做列名解析时,在pstate->p_namespace中验证传入表中是否存在该列名,不存在则报错。
在获取时,分为字符串型的列名和句号.
类型的列名。
*
星号,调用ExpandColumnRefStar处理:存在*
则扩展为全部列名(SELECT *, dname FROM emp, dept
)。带表名的*
列名,则需要校验表明列名不超过4个(SELECT emp.*, dname FROM emp, dept
)。将结果存储在qry->targetList中。.
类型调用ExpandIndirectionStar处理:解析表达式,验证是否存在列名,存在则存储在qry->targetList中。对应代码如下:
List * transformTargetList(ParseState *pstate, List *targetlist, ParseExprKind exprKind) { List *p_target = NIL; bool expand_star; ListCell *o_target; /* Shouldn't have any leftover multiassign items at start */ Assert(pstate->p_multiassign_exprs == NIL); /* Expand "something.*" in SELECT and RETURNING, but not UPDATE */ expand_star = (exprKind != EXPR_KIND_UPDATE_SOURCE); foreach(o_target, targetlist) { ResTarget *res = (ResTarget *) lfirst(o_target); /* * Check for "something.*". Depending on the complexity of the * "something", the star could appear as the last field in ColumnRef, * or as the last indirection item in A_Indirection. */ if (expand_star) { if (IsA(res->val, ColumnRef)) { ColumnRef *cref = (ColumnRef *) res->val; if (IsA(llast(cref->fields), A_Star)) { /* It is something.*, expand into multiple items */ p_target = list_concat(p_target, ExpandColumnRefStar(pstate, cref, true)); continue; } } else if (IsA(res->val, A_Indirection)) { A_Indirection *ind = (A_Indirection *) res->val; if (IsA(llast(ind->indirection), A_Star)) { /* It is something.*, expand into multiple items */ p_target = list_concat(p_target, ExpandIndirectionStar(pstate, ind, true, exprKind)); continue; } } } /* * Not "something.*", or we want to treat that as a plain whole-row * variable, so transform as a single expression */ p_target = lappend(p_target, transformTargetEntry(pstate, res->val, NULL, exprKind, res->name, false)); } ... }
transformWhereClause
在该函数中处理where语句,该语句处理时没有特定的函数进行处理,仍然使用transformExpr函数进行处理,当where中只有一个表达式时,transformExpr函数处理T_ColumnRef分支;当where中为多个表达式时,transformExpr函数处理T_BoolExpr分支,在transformBoolExpr函数中再拆分为T_ColumnRef分支处理。
WHERE的最终结果会存储在jointree中qry->jointree = makeFromExpr(pstate->p_joinlist, qual);
。所以后续进行计划树优化时,会对jointree进行优化处理。
代码如下:
Node * transformWhereClause(ParseState *pstate, Node *clause, ParseExprKind exprKind, const char *constructName) { Node *qual; if (clause == NULL) return NULL; qual = transformExpr(pstate, clause, exprKind); qual = coerce_to_boolean(pstate, qual, constructName); return qual; }
transformWhereClause
按照where语句进行处理
/* initial processing of HAVING clause is much like WHERE clause */ qry->havingQual = transformWhereClause(pstate, stmt->havingClause, EXPR_KIND_HAVING, "HAVING");
transformGroupClause
在group by语句进行处理时,需要与order by语句一起处理。处理时需要先进行order by排序,再进行group by分组。
以上两种未作介绍
按照pg_rewrite中定义的规则进行重写。