直接上干货,HiveSQL高级进阶技巧,重要性不言而喻。掌握这10个技巧,你的SQL水平将有一个质的提升,达到一个较高的层次!
1.删除:
insert overwrite tmp select * from tmp where id != '666'; 复制代码
2.更新:
直接上干货,HiveSQL高级进阶技巧,重要性不言而喻。掌握这10个技巧,你的SQL水平将有一个质的提升,达到一个较高的层次!
insert overwrite tmp select id,label, if(id = '1' and label = 'grade','25',value) as value from tmp where id != '666'; 复制代码
3.行转列:
-- Step03:最后将info的内容切分 select id,split(info,':')[0] as label,split(info,':')[1] as value from ( -- Step01:先将数据拼接成“heit:180,weit:60,age:26” select id,concat('heit',':',height,',','weit',':',weight,',','age',':',age) as value from tmp ) as tmp -- Step02:然后在借用explode函数将数据膨胀至多行 lateral view explode(split(value,',')) mytable as info; 复制代码
4.列转行1:
select tmp1.id as id,tmp1.value as height,tmp2.value as weight,tmp3.value as age from (select id,label,value from tmp2 where label = 'heit') as tmp1 join on tmp1.id = tmp2.id (select id,label,value from tmp2 where label = 'weit') as tmp2 join on tmp1.id = tmp2.id (select id,label,value from tmp2 where label = 'age') as tmp3 on tmp1.id = tmp3.id; 复制代码
5.列转行2:
select id,tmpmap['height'] as height,tmpmap['weight'] as weight,tmpmap['age'] as age from ( select id, str_to_map(concat_ws(',',collect_set(concat(label,':',value))),',',':') as tmpmap from tmp2 group by id ) as tmp1; 复制代码
6.分析函数1:
select id,label,value, lead(value,1,0)over(partition by id order by label) as lead, lag(value,1,999)over(partition by id order by label) as lag, first_value(value)over(partition by id order by label) as first_value, last_value(value)over(partition by id order by label) as last_value from tmp; 复制代码
7.分析函数2:
select id,label,value, row_number()over(partition by id order by value) as row_number, rank()over(partition by id order by value) as rank, dense_rank()over(partition by id order by value) as dense_rank from tmp; 复制代码
8.多维分析1:
select col1,col2,col3,count(1), Grouping__ID from tmp group by col1,col2,col3 grouping sets(col1,col2,col3,(col1,col2),(col1,col3),(col2,col3),()) 复制代码
9.多维分析2:
select col1,col2,col3,count(1), Grouping__ID from tmp group by col1,col2,col3 with cube; 复制代码
10.数据倾斜groupby:
select label,sum(cnt) as all from ( select rd,label,sum(1) as cnt from ( select id,round(rand(),2) as rd,value from tmp1 ) as tmp group by rd,label ) as tmp group by label; 复制代码
11.数据倾斜join:
select label,sum(value) as all from ( select rd,label,sum(value) as cnt from ( select tmp1.rd as rd,tmp1.label as label,tmp1.value*tmp2.value as value from ( select id,round(rand(),1) as rd,label,value from tmp1 ) as tmp1 join ( select id,rd,label,value from tmp2 lateral view explode(split('0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9',',')) mytable as rd ) as tmp2 on tmp1.rd = tmp2.rd and tmp1.label = tmp2.label ) as tmp1 group by rd,label ) as tmp1 group by label;
关键词:大数据培训