Hive只在一个节点上安装即可
tar -zxvf hive-0.9.0.tar.gz -C /cloud/
配置HIVE_HOME环境变量 rpm -qa | grep mysql rpm -e mysql-libs-5.1.66-2.el6_3.i686 --nodeps rpm -ivh MySQL-server-5.1.73-1.glibc23.i386.rpm rpm -ivh MySQL-client-5.1.73-1.glibc23.i386.rpm 修改mysql的密码 /usr/bin/mysql_secure_installation (注意:删除匿名用户,允许用户远程连接) 登陆mysql mysql -u root -p
cp hive-default.xml.template hive-site.xml 修改hive-site.xml(删除所有内容,只留一个<property></property>) 添加如下内容: <property> <name>javax.jdo.option.ConnectionURL</name> <value>jdbc:mysql://weekend01:3306/hive?createDatabaseIfNotExist=true</value> <description>JDBC connect string for a JDBC metastore</description> </property> <property> <name>javax.jdo.option.ConnectionDriverName</name> <value>com.mysql.jdbc.Driver</value> <description>Driver class name for a JDBC metastore</description> </property> <property> <name>javax.jdo.option.ConnectionUserName</name> <value>root</value> <description>username to use against metastore database</description> </property> <property> <name>javax.jdo.option.ConnectionPassword</name> <value>root</value> <description>password to use against metastore database</description> </property>
jdbc可能需要添加参数
createDatabaseIfNotExist=true
nullCatalogMeansCurrent=true
<configuration> <!-- WARNING!!! This file is auto generated for documentation purposes ONLY! --> <!-- WARNING!!! Any changes you make to this file will be ignored by Hive. --> <!-- WARNING!!! You must make your changes in hive-site.xml instead. --> <!-- Hive Execution Parameters --> <property> <name>javax.jdo.option.ConnectionURL</name> <value>jdbc:mysql://192.168.50.56:3306/hive?createDatabaseIfNotExist=true&nullCatalogMeansCurrent=true</value> </property> <property> <name>javax.jdo.option.ConnectionDriverName</name> <value>com.mysql.cj.jdbc.Driver</value> </property> <property> <name>javax.jdo.option.ConnectionUserName</name> <value>root</value> </property> <property> <name>javax.jdo.option.ConnectionPassword</name> <value>123456</value> </property> </configuration>
如果出现没有权限的问题,在mysql授权(在安装mysql的机器上执行) mysql -uroot -p #(执行下面的语句 *.*:所有库下的所有表 %:任何IP地址或主机都可以连接) GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' IDENTIFIED BY '123' WITH GRANT OPTION; FLUSH PRIVILEGES;
001001 iphone13 64 5000 001002 iphone13pro 128 6000 001003 s21 64 4000
create table trade_detail(id bigint, account string, income double, expenses double, time string) row format delimited fields terminated by '\t'; 建分区表 partitioned by create table td_part(id bigint, account string, income double, expenses double, time string) partitioned by (logdate string) row format delimited fields terminated by '\t'; 建外部表 external create external table td_ext(id bigint, account string, income double, expenses double, time string) row format delimited fields terminated by '\t' location '/td_ext';
普通表和分区表区别:有大量数据增加的需要建分区表 create table book (id bigint, name string) partitioned by (pubdate string) row format delimited fields terminated by '\t';
load data local inpath './book.txt' overwrite into table book partition (pubdate='2010-08-22'); load data local inpath '/root/data.am' into table beauty partition (nation="USA");
select nation, avg(size) from beauties group by nation order by avg(size);
dbs
tbls
columns_v2
partition_keys
sds
hdfs://master:9000/user/hive/warehouse
drop table t_phone_1;
set hive.cli.print.header=true;
update t_phone set capity=128 where id =1001;
CREATE TABLE page_view(viewTime INT, userid BIGINT,
page_url STRING, referrer_url STRING,
ip STRING COMMENT ‘IP Address of the User’)
COMMENT ‘This is the page view table’
PARTITIONED BY(dt STRING, country STRING)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ‘\001’
STORED AS SEQUENCEFILE; TEXTFILE
create table tab_ip_seq(id int,name string,ip string,country string)
row format delimited
fields terminated by ‘,’
stored as sequencefile;
insert overwrite table tab_ip_seq select * from tab_ext;
textfile,sequencefile和rcfile的三种存储格式的本质和区别
create table tab_ip(id int,name string,ip string,country string)
row format delimited
fields terminated by ‘,’
stored as textfile;
load data local inpath ‘/home/hadoop/ip.txt’ into table tab_ext;
CREATE EXTERNAL TABLE tab_ip_ext(id int, name string,
ip STRING,
country STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘,’
STORED AS TEXTFILE
LOCATION ‘/external/hive’;
CREATE TABLE tab_ip_ctas
AS
SELECT id new_id, name new_name, ip new_ip,country new_country
FROM tab_ip_ext
SORT BY new_id;
create table tab_ip_like like tab_ip;
insert overwrite table tab_ip_like
select * from tab_ip;
create table tab_ip_cluster(id int,name string,ip string,country string)
clustered by(id) into 3 buckets;
load data local inpath ‘/home/hadoop/ip.txt’ overwrite into table tab_ip_cluster;
set hive.enforce.bucketing=true;
insert into table tab_ip_cluster select * from tab_ip;
select * from tab_ip_cluster tablesample(bucket 2 out of 3 on id);
create table tab_ip_part(id int,name string,ip string,country string)
partitioned by (part_flag string)
row format delimited fields terminated by ‘,’;
load data local inpath ‘/home/hadoop/ip.txt’ overwrite into table tab_ip_part
partition(part_flag=‘part1’);
load data local inpath ‘/home/hadoop/ip_part2.txt’ overwrite into table tab_ip_part
partition(part_flag=‘part2’);
select * from tab_ip_part;
select * from tab_ip_part where part_flag=‘part2’;
select count(*) from tab_ip_part where part_flag=‘part2’;
alter table tab_ip change id id_alter string;
ALTER TABLE tab_cts ADD PARTITION (partCol = ‘dt’) location ‘/external/hive/dt’;
show partitions tab_ip_part;
insert overwrite local directory ‘/home/hadoop/hivetemp/test.txt’ select * from tab_ip_part where part_flag=‘part1’;
insert overwrite directory ‘/hiveout.txt’ select * from tab_ip_part where part_flag=‘part1’;
insert overwrite directory ‘/hiveout.txt’ select * from tab_ip_part where part_flag=‘part1’;
create table tab_array(a array,b array)
row format delimited
fields terminated by ‘\t’
collection items terminated by ‘,’;
示例数据
tobenbrone,laihama,woshishui 13866987898,13287654321
abc,iloveyou,itcast 13866987898,13287654321
select a[0] from tab_array;
select * from tab_array where array_contains(b,‘word’);
insert into table tab_array select array(0),array(name,ip) from tab_ext t;
create table tab_map(name string,info map<string,string>)
row format delimited
fields terminated by ‘\t’
collection items terminated by ‘;’
map keys terminated by ‘:’;
示例数据:
fengjie age:18;size:36A;addr:usa
furong age:28;size:39C;addr:beijing;weight:180KG
load data local inpath ‘/home/hadoop/hivetemp/tab_map.txt’ overwrite into table tab_map;
insert into table tab_map select name,map(‘name’,name,‘ip’,ip) from tab_ext;
create table tab_struct(name string,info structage:int,tel:string,addr:string)
row format delimited
fields terminated by ‘\t’
collection items terminated by ‘,’
load data local inpath ‘/home/hadoop/hivetemp/tab_st.txt’ overwrite into table tab_struct;
insert into table tab_struct select name,named_struct(‘age’,id,‘tel’,name,‘addr’,country) from tab_ext;
hive -S -e ‘select country,count(*) from tab_ext’ > /home/hadoop/hivetemp/e.txt
有了这种执行机制,就使得我们可以利用脚本语言(bash shell,python)进行hql语句的批量执行
select * from tab_ext sort by id desc limit 5;
select a.ip,b.book from tab_ext a join tab_ip_book b on(a.name=b.name);
select if(id=1,first,no-first),name from tab_ext;
hive>add jar /home/hadoop/myudf.jar;
hive>CREATE TEMPORARY FUNCTION my_lower AS ‘org.dht.Lower’;
select my_upper(name) from tab_ext;
add jar /home/hadoop/hibevearea.jar CREATE TEMPORARY FUNCTION getarea AS 'cn.itcast.bigdata.PhoneNbrToArea'; select getarea(phonenum) area,col1,col2 from t_connect_big;
package cn.itcast.bigdata; import java.util.HashMap; import org.apache.hadoop.hive.ql.exec.UDF; public class PhoneNbrToArea extends UDF{ private static HashMap<String, String> areaMap = new HashMap<>(); static { areaMap.put("1388", "beijing"); areaMap.put("1399", "tianjin"); areaMap.put("1366", "nanjing"); } //一定要用public修饰才能被hive调用 public String evaluate(String pnb) { String result = areaMap.get(pnb.substring(0,4))==null? (pnb+" huoxing"):(pnb+" "+areaMap.get(pnb.substring(0,4))); return result; } }
<property> <name>hive.metastore.schema.verification</name> <value>true</value> </property>