数仓建设过程中,在所难免的需要将大量业务数据导入值hive中,当业务比较复杂导致业务相关表过多时,
建表语句的整理和维护工作增大,手动写hive DDL容易出错且耗时,需要通过一些方法减少工作量。
该方式仅适用于MySQL,其他方法可以类似处理即可
SET SESSION group_concat_max_len = 102400;
SELECT
a.TABLE_NAME ,
b.TABLE_COMMENT ,
concat('DROP TABLE IF EXISTS ','tb_ods_',a.TABLE_NAME,';',CHAR(10 USING utf8),
'CREATE EXTERNAL TABLE IF NOT EXISTS ','tb_ods_',a.TABLE_NAME ,' (',CHAR(10 USING utf8),
group_concat(
concat(a.COLUMN_NAME,' ',
(case when a.data_type='bigint' then 'bigint'
when a.data_type='binary' then 'binary'
when a.data_type='char' then 'string'
when a.data_type='date' then 'string'
when a.data_type='datetime' then 'string'
when a.data_type='decimal' then concat('decimal','(',a.NUMERIC_PRECISION,',',a.NUMERIC_SCALE,')')
when a.data_type='double' then 'double'
when a.data_type='enum' then 'string'
when a.data_type='float' then 'double'
when a.data_type='int' then 'int'
when a.data_type='json' then 'map<string,string>'
when a.data_type='longtext' then 'string'
when a.data_type='mediumtext' then 'string'
when a.data_type='smallint' then 'int'
when a.data_type='text' then 'string'
when a.data_type='time' then 'string'
when a.data_type='timestamp' then 'string'
when a.data_type='tinyint' then 'int'
when a.data_type='varbinary' then 'binary'
when a.data_type='varchar' then 'string'
else '未知类型'
end)," COMMENT '",COLUMN_COMMENT,"'" ),CHAR(10 USING utf8)
order by a.TABLE_NAME,a.ORDINAL_POSITION) ,
") COMMENT '",b.TABLE_COMMENT ,"'",CHAR(10 USING utf8), "PARTITIONED BY (deal_date string COMMENT '数据日期')
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES ('serialization.null.format' = 'null');",CHAR(10 USING utf8)) AS ods_ddl
FROM
(
SELECT
TABLE_SCHEMA,
TABLE_NAME,
COLUMN_NAME,
ORDINAL_POSITION,
DATA_TYPE,
NUMERIC_PRECISION,
NUMERIC_SCALE,
COLUMN_COMMENT
FROM
information_schema.COLUMNS
WHERE
TABLE_SCHEMA='${DATABASE_NAME}'
AND TABLE_NAME='${TABLE_NAME}' # 该条件限制单张张表的转换,可以注释掉,注释后为整个库的转换
) AS a
LEFT JOIN
information_schema.TABLES AS b
ON
a.TABLE_NAME=b.TABLE_NAME
AND a.TABLE_SCHEMA=b.TABLE_SCHEMA
where b.TABLE_TYPE='BASE TABLE'
GROUP BY
a.TABLE_NAME,
b.TABLE_COMMENT
;
直接将该语句在MySQL查询工具执行即可,执行前需要替换几个参数
${DATABASE_NAME}指定需要转换的MySQL数据库名,将该参数替换为自己需要转换的表所在的数据库即可;
${TABLE_NAME}指定需要转换的MySQL表名,将该参数替换为自己需要转换的表即可,如果想转换整个数据库的建表语句,直接将该条件注释掉。
SET SESSION group_concat_max_len = 102400;是设置group_concat的最大拼接长度,默认值为1024,当表字段过多时,可能导致拼接的DDL语句不全;SET SESSION仅在当前窗口生效,不影响其他窗口,如需设置全局生效,执行SET GLOBAL group_concat_max_len = 102400;即可。
SETSESSION group_concat_max_len = 102400;
SELECT
a.TABLE_NAME ,
b.TABLE_COMMENT ,
concat('DROP TABLE IF EXISTS ','tb_ods_',a.TABLE_NAME,';',CHAR(10USING utf8),
'CREATE EXTERNAL TABLE IF NOT EXISTS ','tb_ods_',a.TABLE_NAME ,' (',CHAR(10USING utf8),
group_concat(
concat(a.COLUMN_NAME,' ',
(casewhen a.data_type='bigint'then'bigint'
when a.data_type='binary'then'binary'
when a.data_type='char'then'string'
when a.data_type='date'then'string'
when a.data_type='datetime'then'string'
when a.data_type='decimal'thenconcat('decimal','(',a.NUMERIC_PRECISION,',',a.NUMERIC_SCALE,')')
when a.data_type='double'then'double'
when a.data_type='enum'then'string'
when a.data_type='float'then'double'
when a.data_type='int'then'int'
when a.data_type='json'then'map<string,string>'
when a.data_type='longtext'then'string'
when a.data_type='mediumtext'then'string'
when a.data_type='smallint'then'int'
when a.data_type='text'then'string'
when a.data_type='time'then'string'
when a.data_type='timestamp'then'string'
when a.data_type='tinyint'then'int'
when a.data_type='varbinary'then'binary'
when a.data_type='varchar'then'string'
else'未知类型'
end)," COMMENT '",COLUMN_COMMENT,"'" ),CHAR(10USING utf8)
orderby a.TABLE_NAME,a.ORDINAL_POSITION) ,
") COMMENT '",b.TABLE_COMMENT ,"'",CHAR(10USING utf8), "PARTITIONED BY (deal_date string COMMENT '数据日期')
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES ('serialization.null.format' = 'null');",CHAR(10USING utf8)) AS ods_ddl
FROM
(
SELECT
TABLE_SCHEMA,
TABLE_NAME,
COLUMN_NAME,
ORDINAL_POSITION,
DATA_TYPE,
NUMERIC_PRECISION,
NUMERIC_SCALE,
COLUMN_COMMENT
FROM
information_schema.COLUMNS
WHERE
TABLE_SCHEMA='test_sql'
AND TABLE_NAME='amazon_fba_order_report'# 该条件限制单张张表的转换,可以注释掉,注释后为整个库的转换
) AS a
LEFTJOIN
information_schema.TABLES AS b
ON
a.TABLE_NAME=b.TABLE_NAME
AND a.TABLE_SCHEMA=b.TABLE_SCHEMA
where b.TABLE_TYPE='BASE TABLE'
GROUPBY
a.TABLE_NAME,
b.TABLE_COMMENT
;