最近开始啃openGauss的兼容package了,先拿简单点的练练手。
UTL_URL这个包只有两个函数,“ESCAPE"和"UNESCAPE”,其实就是在其他开发语言中使用的URLENCODE和URLDECODE两个函数,主要用于对url字符串的一些特定字符进行处理,以便将url字符串作为数据传输时不引起歧义,还算是经常会用到的功能,目前已经有三方扩展支持这两个函数(https://github.com/okbob/url_encode),但是在openGauss里编译扩展有点不方便,我直接用plsql写一个,更容易安装及使用。
先尝试在oracle里还原这个包里两个函数的逻辑
UTL_URL.ESCAPE ( url IN VARCHAR2 CHARACTER SET ANY_CS, escape_reserved_chars IN BOOLEAN DEFAULT FALSE, url_charset IN VARCHAR2 DEFAULT utl_http.body_charset) RETURN VARCHAR2;
ESCAPE有3个参数,分别是url字符串、是否替换保留字符(默认不替换)、url字符集
用法举例
--只传URL,另外两个参数默认 select utl_url.escape('https://www.darkathena.top/archives/我开博了') from dual; --https://www.darkathena.top/archives/%BF%BF%BF%BF --很明显,这里由于字符集未指定,导致转换出来的结果有误 --对于非ASCII字符以UTF8编码 select utl_url.escape('https://www.darkathena.top/archives/我开博了',url_charset=>'AL32UTF8') from dual; --https://www.darkathena.top/archives/%E6%88%91%E5%BC%80%E5%8D%9A%E4%BA%86 --对于非ASCII字符以GBK进行编码 select utl_url.escape('https://www.darkathena.top/archives/我开博了',url_charset=>'ZHS16GBK') from dual; --https://www.darkathena.top/archives/%CE%D2%BF%AA%B2%A9%C1%CB --对于非ASCII字符以GBK进行编码,并替换所有保留字符 BEGIN DBMS_OUTPUT.put_line( utl_url.escape('https://www.darkathena.top/archives/我开博了',TRUE,'ZHS16GBK') ); END; / --https%3A%2F%2Fwww.darkathena.top%2Farchives%2F%CE%D2%BF%AA%B2%A9%C1%CB
看上去不是很麻烦,只需要逐个字符进行判断,识别到需要进行转换的字符时,获取它的二进制数据,以十六进制字符串表示,并对这个十六进制字符串格式化成"%FF*n"的形式。
代码如下
CREATE OR REPLACE FUNCTION URLENCODE(url IN VARCHAR2 CHARACTER SET ANY_CS, escape_reserved_chars IN BOOLEAN DEFAULT FALSE, url_charset IN VARCHAR2 DEFAULT 'al32utf8') RETURN VARCHAR2 AS L_TMP VARCHAR2(6000); L_BAD VARCHAR2(100) DEFAULT ' >%}\~];?@&<#{|^[`/:=$+''"'; l_reserved_chars VARCHAR2(100) DEFAULT ';/?:@&=+$[]'; L_CHAR CHAR(1 CHAR); BEGIN IF (url IS NULL) THEN RETURN NULL; END IF; if not escape_reserved_chars then L_BAD := translate(L_BAD, l_reserved_chars, ''); end if; FOR I IN 1 .. LENGTH(url) LOOP L_CHAR := SUBSTR(url, I, 1); IF (INSTR(L_BAD, L_CHAR) > 0 or ascii(L_CHAR) > 255) THEN L_TMP := L_TMP || regexp_replace(rawtohex(utl_raw.cast_to_raw(convert(L_CHAR, url_charset))), '(.{2})', '%\1'); ELSE L_TMP := L_TMP || L_CHAR; END IF; END LOOP; RETURN L_TMP; END URLENCODE; /
对于上面的用法举例,使用刚刚创建的URLENCODE这个函数对utl_url.escape进行替换,均可执行,且输出结果正确
UTL_URL.UNESCAPE ( url IN VARCHAR2 CHARACTER SET ANY_CS, url_charset IN VARCHAR2 DEFAULT utl_http.body_charset) RETURN VARCHAR2;
UNESCAPE有两个参数,分别是已经被urldecode的url字符串,和url字符集
用法举例
--以GBK字符集对URL进行还原 select utl_url.unescape('https://www.darkathena.top/archives/%CE%D2%BF%AA%B2%A9%C1%CB','ZHS16GBK') from dual; --https://www.darkathena.top/archives/我开博了 --以UTF8字符集对URL进行还原 select utl_url.unescape('https://www.darkathena.top/archives/%E6%88%91%E5%BC%80%E5%8D%9A%E4%BA%86','AL32UTF8') from dual; --https://www.darkathena.top/archives/我开博了
这个看上去比上面那个要难处理一点,因为我考虑到不同字符集的字节个数是不一样的,但是后来一想,可以先不用考虑字符集,普通字符转成二进制数据,十六进制字符去掉百分号直接作为十六进制形式的二进制数据,把所有的二进制数据拼起来,最后再根据指定的字符集转换成可显示字符串即可
代码如下
CREATE OR REPLACE FUNCTION URLDECODE(url IN VARCHAR2 CHARACTER SET ANY_CS, url_charset IN VARCHAR2 DEFAULT 'AL32UTF8') RETURN VARCHAR2 IS L_RETURN VARCHAR2(2000); BEGIN select CONVERT(UTL_RAW.cast_to_varchar2(LISTAGG(CASE WHEN LENGTH(A) = 3 THEN HEXTORAW(REPLACE(A, '%')) ELSE UTL_RAW.cast_to_raw(A) END) --within group (order by 1) --18c以下取消本行注释 ), url_charset) INTO L_RETURN from (select REGEXP_SUBSTR(url, '(%.{2}|.)', 1, LEVEL) A from DUAL CONNECT BY LENGTH(REGEXP_SUBSTR(url, '(%.{2}|.)', 1, LEVEL)) > 0) A; RETURN L_RETURN; END; /
同样,新建的这个函数URLDECODE可以完全替代utl_url.unescape使用
到此,逻辑已经梳理完毕,接下来就是移植到openGauss了
openGauss的语法及函数和oracle还是有不少差异的,但是既然逻辑已经写出来了,改改也不是什么麻烦事,只是要注意,编码时尽量使用内置函数以提升效率
https://gitee.com/darkathena/opengauss-oracle/blob/main/oracle-package/utl_url.sql
create schema UTL_URL; CREATE OR REPLACE FUNCTION UTL_URL.escape(url IN TEXT, escape_reserved_chars IN BOOL DEFAULT FALSE, url_charset IN TEXT DEFAULT 'UTF8') RETURNS TEXT LANGUAGE plpgsql IMMUTABLE NOT FENCED NOT SHIPPABLE AS $$ declare L_TMP TEXT DEFAULT ''; L_BAD TEXT DEFAULT ' >%}\~];?@&<#{|^[`/:=$+''"'; l_reserved_chars TEXT DEFAULT ';/?:@&=+$[]'; L_CHAR TEXT; BEGIN IF (url IS NULL) THEN RETURN NULL; END IF; if not escape_reserved_chars then L_BAD := translate(L_BAD, l_reserved_chars, ''); end if; FOR I IN 1..LENGTH(url) LOOP L_CHAR := SUBSTR(url, I, 1); IF (INSTR(L_BAD, L_CHAR) > 0 or ascii(L_CHAR) > 255) THEN L_TMP := L_TMP || regexp_replace(upper(REPLACE(convert_TO(L_CHAR, url_charset)::TEXT,'\x','')),'(.{2})', '%\1','g'); ELSE L_TMP := L_TMP || L_CHAR; END IF; END LOOP; RETURN L_TMP; END; $$; / CREATE OR REPLACE FUNCTION UTL_URL.unescape(url IN TEXT, url_charset IN TEXT DEFAULT 'UTF8') RETURNS TEXT LANGUAGE sql IMMUTABLE NOT FENCED NOT SHIPPABLE AS $$ select CONVERT_FROM(string_agg(CASE WHEN LENGTH(A) = 3 THEN decode(REPLACE(A, '%'), 'HEX') ELSE A :: bytea END, '' :: bytea), url_charset) from (select a from (select (regexp_matches(url, '(%..|.)', 'g')) [ 1 ] a ) ) A; $$; /
测试
select utl_url.escape('https://www.darkathena.top/archives/我开博了',TRUE,url_charset=>'GBK') union all select utl_url.escape('https://www.darkathena.top/archives/我开博了',url_charset=>'GBK') union all select utl_url.escape('https://www.darkathena.top/archives/我开博了',url_charset=>'UTF8') union all select utl_url.unescape('https://www.darkathena.top/archives/%CE%D2%BF%AA%B2%A9%C1%CB','GBK') union all select utl_url.unescape('https://www.darkathena.top/archives/%E6%88%91%E5%BC%80%E5%8D%9A%E4%BA%86','UTF8') ;
另外,如果是openGauss2.1.0以上版本,且安装数据库时选择了A兼容模式,那么也可以使用package的方式,不需要新建schema
CREATE OR REPLACE package pg_catalog.UTL_URL as function escape(url IN TEXT, escape_reserved_chars IN BOOL DEFAULT FALSE, url_charset IN TEXT DEFAULT 'UTF8') RETURN TEXT; FUNCTION unescape(url IN TEXT, url_charset IN TEXT DEFAULT 'UTF8') RETURN TEXT; end UTL_URL; / CREATE OR REPLACE package body pg_catalog.UTL_URL as function escape(url IN TEXT, escape_reserved_chars IN BOOL DEFAULT FALSE, url_charset IN TEXT DEFAULT 'UTF8') RETURN TEXT IMMUTABLE NOT FENCED NOT SHIPPABLE AS L_TMP TEXT DEFAULT ''; L_BAD TEXT DEFAULT ' >%}\~];?@&<#{|^[`/:=$+''"'; l_reserved_chars TEXT DEFAULT ';/?:@&=+$[]'; L_CHAR TEXT; BEGIN IF (url IS NULL) THEN RETURN NULL; END IF; if not escape_reserved_chars then L_BAD := translate(L_BAD, l_reserved_chars, ''); end if; FOR I IN 1.. LENGTH(url) LOOP L_CHAR := SUBSTR(url, I, 1); IF (INSTR(L_BAD, L_CHAR) > 0 or ascii(L_CHAR) > 255) THEN L_TMP := L_TMP || regexp_replace(upper(REPLACE(convert_TO(L_CHAR, url_charset)::TEXT,'\x','')),'(.{2})', '%\1','g'); ELSE L_TMP := L_TMP || L_CHAR; END IF; END LOOP; RETURN L_TMP; END; FUNCTION unescape(url IN TEXT, url_charset IN TEXT DEFAULT 'UTF8') RETURN TEXT IMMUTABLE NOT FENCED NOT SHIPPABLE AS l_return text; begin select CONVERT_FROM(string_agg(CASE WHEN LENGTH(A) = 3 THEN decode(REPLACE(A, '%'), 'HEX') ELSE A :: bytea END, '' :: bytea), url_charset) into l_return from (select a from (select (regexp_matches(url, '(%..|.)', 'g')) [ 1 ] a ) ) A; return l_return; end; end UTL_URL; /
- 本文作者: DarkAthena
- 本文链接: https://www.darkathena.top/archives/opengauss-utl-url-pkg
- 版权声明: 本博客所有文章除特别声明外,均采用CC BY-NC-SA 3.0 许可协议。转载请注明出处!