练习X
Map代码
Reduce代码
Util代码
驱动代码
2.把预处理之后的数据进行入库到hive中
数据入库效果【截图】
数据入库命令【命令】
create database video;
create table video_ori(
videoId string,
uploader string,
age string,
category string,
length string,
views string,
rate string,
ratings string,
comments string,
relatedId string)
row format delimited
fields terminated by ":"
stored as textfile;create table video_user_ori(
uploader string,
videos string,
friends string)
row format delimited
fields terminated by ","
stored as textfile;create table video_orc(
videoId string,
uploader string,
age string,
category string,
length string,
views string,
rate string,
ratings string,
comments string,
relatedId string)
row format delimited
fields terminated by ":"
stored as ORC;create table video_user_orc(
uploader string,
videos string,
friends string)
row format delimited
fields terminated by ","
stored as ORC;
- 分别导入预处理之后的视频数据到原始表video_ori和导入原始用户表的数据到video_user_ori中
hive (video)> load data local inpath '/opt/part-r-00000' ovewrite into table video_ori;load data local inpath '/opt/user.txt' into table video_user_ori;
- 从原始表查询数据并插入对应的ORC表中
hive (video)> insert into table video_orc select * from video_ori;insert into table video_user_orc select * from video_user_ori;
对入库之后的数据进行hivesql查询操作
3.1从视频表中统计出视频评分为5分的视频信息,把查询结果保存到/export/rate.txt
[root@node01 opt]# hive -e "select * from video.video_orc where rate=5 " > 5.txt
- 从视频表中统计出评论数大于100条的视频信息,把查询结果保存到/export/comments.txt
hive -e "select * from video.video_orc where comments >100 " > /opt/100.txt
4、把hive分析出的数据保存到hbase中
4.1创建hive对应的数据库外部表
请写出创建rate外部表的语句:
create external table rate(videoId string, uploader string, age string, category string, length string, views string, rate string, ratings string, comments string,relatedId string)
row format delimited
fields terminated by "\t"
stored as textfile;
创建comments外部表的语句
create external table comments(videoId string, uploader string, age string, category string, length string, views string, rate string, ratings string, comments string,relatedId string)
row format delimited
fields terminated by "\t"
stored as textfile;
加载第3步的结果数据到外部表中
load data local inpath '/opt/5.txt' into table rate;
load data local inpath '/opt/100.txt' into table comments;
- 创建hive管理表与HBase进行映射
Hive中的rate,comments两个表分别对应hbase中的hbase_rate,hbase_comments两个表 - 创建hbase_rate表并进行映射:
- 创建hbase_comments表并进行映射:
create table video.hbase_rate(videoId string, uploader string, age string, category string, length string, views string, rate string, ratings string, comments string,relatedId string)
stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
with serdeproperties("hbase.columns.mapping" = "cf:uploader,cf:age,cf:category,cf:length,cf:views,cf:rate,cf:ratings,cf:comments,cf:relatedId")
tblproperties("hbase.table.name" = "hbase_rate");create table video.hbase_comments(videoId string, uploader string, age string, category string, length string, views string, rate string, ratings string, comments string,relatedId string)
stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
with serdeproperties("hbase.columns.mapping" = "cf:uploader,cf:age,cf:category,cf:length,cf:views,cf:rate,cf:ratings,cf:comments,cf:relatedId")
tblproperties("hbase.table.name" = "hbase_comments");
- 请写出通过insert overwrite select,插入hbase_rate表的语句
请写出通过insert overwrite select,插入hbase_comments表的语句
insert into table hbase_rate select * from rate;insert into table hbase_comments select * from comments;
- 通过hbaseapi进行查询操作
5.1请使用hbaseapi 对hbase_rate表,按照通过startRowKey=1和endRowKey=100进行扫描查询出结果。
5.2请使用hbaseapi对hbase_comments表,只查询comments列的值。
代码结果【截图】
代码
代码结果【截图】
代码
练习X
Map代码
Reduce代码
Util代码
驱动代码
2.把预处理之后的数据进行入库到hive中
数据入库效果【截图】
数据入库命令【命令】
create database video;
create table video_ori(
videoId string,
uploader string,
age string,
category string,
length string,
views string,
rate string,
ratings string,
comments string,
relatedId string)
row format delimited
fields terminated by ":"
stored as textfile;create table video_user_ori(
uploader string,
videos string,
friends string)
row format delimited
fields terminated by ","
stored as textfile;create table video_orc(
videoId string,
uploader string,
age string,
category string,
length string,
views string,
rate string,
ratings string,
comments string,
relatedId string)
row format delimited
fields terminated by ":"
stored as ORC;create table video_user_orc(
uploader string,
videos string,
friends string)
row format delimited
fields terminated by ","
stored as ORC;
- 分别导入预处理之后的视频数据到原始表video_ori和导入原始用户表的数据到video_user_ori中
hive (video)> load data local inpath '/opt/part-r-00000' ovewrite into table video_ori;load data local inpath '/opt/user.txt' into table video_user_ori;
- 从原始表查询数据并插入对应的ORC表中
hive (video)> insert into table video_orc select * from video_ori;insert into table video_user_orc select * from video_user_ori;
对入库之后的数据进行hivesql查询操作
3.1从视频表中统计出视频评分为5分的视频信息,把查询结果保存到/export/rate.txt
[root@node01 opt]# hive -e "select * from video.video_orc where rate=5 " > 5.txt
- 从视频表中统计出评论数大于100条的视频信息,把查询结果保存到/export/comments.txt
hive -e "select * from video.video_orc where comments >100 " > /opt/100.txt
4、把hive分析出的数据保存到hbase中
4.1创建hive对应的数据库外部表
请写出创建rate外部表的语句:
create external table rate(videoId string, uploader string, age string, category string, length string, views string, rate string, ratings string, comments string,relatedId string)
row format delimited
fields terminated by "\t"
stored as textfile;
创建comments外部表的语句
create external table comments(videoId string, uploader string, age string, category string, length string, views string, rate string, ratings string, comments string,relatedId string)
row format delimited
fields terminated by "\t"
stored as textfile;
加载第3步的结果数据到外部表中
load data local inpath '/opt/5.txt' into table rate;
load data local inpath '/opt/100.txt' into table comments;
- 创建hive管理表与HBase进行映射
Hive中的rate,comments两个表分别对应hbase中的hbase_rate,hbase_comments两个表 - 创建hbase_rate表并进行映射:
- 创建hbase_comments表并进行映射:
create table video.hbase_rate(videoId string, uploader string, age string, category string, length string, views string, rate string, ratings string, comments string,relatedId string)
stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
with serdeproperties("hbase.columns.mapping" = "cf:uploader,cf:age,cf:category,cf:length,cf:views,cf:rate,cf:ratings,cf:comments,cf:relatedId")
tblproperties("hbase.table.name" = "hbase_rate");create table video.hbase_comments(videoId string, uploader string, age string, category string, length string, views string, rate string, ratings string, comments string,relatedId string)
stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
with serdeproperties("hbase.columns.mapping" = "cf:uploader,cf:age,cf:category,cf:length,cf:views,cf:rate,cf:ratings,cf:comments,cf:relatedId")
tblproperties("hbase.table.name" = "hbase_comments");
- 请写出通过insert overwrite select,插入hbase_rate表的语句
请写出通过insert overwrite select,插入hbase_comments表的语句
insert into table hbase_rate select * from rate;insert into table hbase_comments select * from comments;
- 通过hbaseapi进行查询操作
5.1请使用hbaseapi 对hbase_rate表,按照通过startRowKey=1和endRowKey=100进行扫描查询出结果。
5.2请使用hbaseapi对hbase_comments表,只查询comments列的值。
代码结果【截图】
代码
代码结果【截图】
代码
发布评论