1.数据上传并执行ETL
把谷粒影音的两张表上传到hdfs集群上。

把上小节的maven项目打包后上传服务器后执行ETL。
yarn jar /root/tools/sust-hive-project.jar com.sust.hive.server.util.ETLDriver /myvideo/video/2008/0222 /myvideo_out

常见错误一:如果执行ETL计算任务报以下异常:

解决方案一:
调整内存参数,在hadoop的yarn-site.xml文件中更改配置文件:
<property>
<name>mapreduce.map.memory.mb</name>
<value>1536</value>
</property>
<property>
<name>mapreduce.map.java.opts</name>
<value>-Xmx1024M</value>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>3072</value>
</property>
<property>
<name>mapreduce.reduce.java.opts</name>
<value>-Xmx2560M</value>
</property>
解决方案二:
增大hadoop的虚拟内存大小,我通过增加linux的虚拟内存,依然报错,通过在配置文件中更改这个参数后,任务成功跑起来了,猜测hadoop应该是有自己的一套配置,没有直接用宿主机的linux,需要单独配置 在hadoop中更改yarn-site.xml中添加。
<property>
<name>yarn.nodemanager.vmem-pmem-ratio</name>
<value>15.5</value>
</property>
常见错误二:执行yarn任务,如果报以下错误。
Could not find or load main class org.apache.hadoop.mapreduce.v2.app.MRAppMaster
解决方案一:
在Hadoop安装目录下找到mapred-site.xml文件($HADOOP_HOME/etc/hadoop/mapred-site.xml),添加以下配置。
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
解决方案二:
执行下面命令:
hadoop classpath

在Hadoop安装目录下找到yarn-site.xml文件($HADOOP_HOME/etc/hadoop/yarn-site.xml),把刚才得到的信息配置进去,如下:
<configuration>
<property>
<name>yarn.application.classpath</name>
<value>复制的Hadoop classpath信息</value>
</property>
</configuration>
重启Hadoop集群即可。
2.创建数据分析表
使用beeline连接hive后。
create database myvideo;
#创建视频表
create table gulivideo_ori(
videoId string,
uploader string,
age int,
category array<string>,
length int,
views int,
rate float,
ratings int,
comments int,
relatedId array<string>)
row format delimited
fields terminated by "\t"
collection items terminated by "&"
stored as textfile;
create table gulivideo_orc(
videoId string,
uploader string,
age int,
category array<string>,
length int,
views int,
rate float,
ratings int,
comments int,
relatedId array<string>)
row format delimited fields terminated by "\t"
collection items terminated by "&"
stored as orc;
#创建用户表
create table gulivideo_user_ori(
uploader string,
videos int,
friends int)
row format delimited
fields terminated by "\t"
stored as textfile;
create table gulivideo_user_orc(
uploader string,
videos int,
friends int)
row format delimited
fields terminated by "\t"
stored as orc;
0: jdbc:hive2://hadoop-master:10000> show tables;
INFO : Compiling command(queryId=root_20230704220146_fd8b179c-e9ed-4be4-8de2-e71619b7ea81): show tables
INFO : Concurrency mode is disabled, not creating a lock manager
INFO : Semantic Analysis Completed (retrial = false)
INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:tab_name, type:string, comment:from deserializer)], properties:null)
INFO : Completed compiling command(queryId=root_20230704220146_fd8b179c-e9ed-4be4-8de2-e71619b7ea81); Time taken: 0.018 seconds
INFO : Concurrency mode is disabled, not creating a lock manager
INFO : Executing command(queryId=root_20230704220146_fd8b179c-e9ed-4be4-8de2-e71619b7ea81): show tables
INFO : Starting task [Stage-0:DDL] in serial mode
INFO : Completed executing command(queryId=root_20230704220146_fd8b179c-e9ed-4be4-8de2-e71619b7ea81); Time taken: 0.024 seconds
INFO : OK
INFO : Concurrency mode is disabled, not creating a lock manager
+---------------------+
| tab_name |
+---------------------+
| gulivideo_orc |
| gulivideo_ori |
| gulivideo_user_orc |
| gulivideo_user_ori |
+---------------------+
4 rows selected (0.161 seconds)
导入ETL后的数据。
load data inpath "/myvideo_out" into table gulivideo_ori;
load data inpath "/myvideo/user/2008/0903" into table gulivideo_user_ori;
向ORC表插入数据
insert into table gulivideo_orc select * from gulivideo_ori;
insert into table gulivideo_user_orc select * from gulivideo_user_ori;
3.数据分析
统计视频观看数Top10
select
distinct (videoId),
views
from
gulivideo_orc
order by views desc
limit 10;
-----------------------------------------------
+--------------+-----------+
| videoid | views |
+--------------+-----------+
| dMH0bHeiRNg | 42513417 |
| 0XxI-hvPRRA | 20282464 |
| 1dmVU08zVpA | 16087899 |
| RB-wUgnyGv0 | 15712924 |
| QjA5faZF1A8 | 15256922 |
| -_CSo1gOd48 | 13199833 |
| 49IDp76kjPw | 11970018 |
| tYnn51C3X_w | 11823701 |
| pv5zWaTEVkI | 11672017 |
| D2kJZOfq7zk | 11184051 |
+--------------+-----------+
统计视频类别热度Top10
select
category_name,
count(*) category_count
from
(select
videoId,
category_name
from
gulivideo_orc
lateral view explode(category) tmp_category as category_name)t1
group by
category_name
order by
category_count desc
limit 10;
---------------------------------------
+----------------+-----------------+
| category_name | category_count |
+----------------+-----------------+
| Music | 179049 |
| Entertainment | 127674 |
| Comedy | 87818 |
| Film | 73293 |
| Animation | 73293 |
| Sports | 67329 |
| Gadgets | 59817 |
| Games | 59817 |
| Blogs | 48890 |
| People | 48890 |
+----------------+-----------------+
统计视频观看数Top20所属类别以及类别包含的Top20的视频个数.
select
category_name,
count(*) category_count
from
(select
category_name
from
(select
videoId,
views,
category
from
gulivideo_orc
order by
views desc
limit 20)t1
lateral view explode(category) tmp_category as category_name)t2
group by
category_name
order by
category_count desc;
-------------------------------------------------
+----------------+-----------------+
| category_name | category_count |
+----------------+-----------------+
| Comedy | 6 |
| Entertainment | 6 |
| Music | 5 |
| Blogs | 2 |
| People | 2 |
| UNA | 1 |
+----------------+-----------------+
统计视频观看数Top50所关联视频的所属类别Rank
select
category_name,
count(*) category_count
from
(select
explode(category) category_name
from
(select
category
from
(select
related_id
from
(select
relatedId,
views
from
gulivideo_orc
order by
views desc
limit 50)t1
lateral view explode(relatedId) tmp_relatedId as related_id
group by
related_id)t2
join gulivideo_orc orc
on t2.related_id=orc.videoId)t3)t4
group by
category_name
order by
category_count desc;
-----------------------------------------
+----------------+-----------------+
| category_name | category_count |
+----------------+-----------------+
| Comedy | 232 |
| Entertainment | 216 |
| Music | 195 |
| People | 51 |
| Blogs | 51 |
| Film | 47 |
| Animation | 47 |
| Politics | 22 |
| News | 22 |
| Gadgets | 20 |
| Games | 20 |
| Sports | 19 |
| DIY | 14 |
| Howto | 14 |
| UNA | 13 |
| Travel | 12 |
| Places | 12 |
| Animals | 11 |
| Pets | 11 |
| Autos | 4 |
| Vehicles | 4 |
+----------------+-----------------+
统计每个类别中的视频热度Top10,以Music为例
思路:
最终代码:
#创建类别表。
create table gulivideo_category(
videoId string,
uploader string,
age int,
categoryId string,
length int,
views int,
rate float,
ratings int,
comments int,
relatedId array<string>)
row format delimited
fields terminated by "\t"
collection items terminated by "&"
stored as orc;
#向category展开的表中插入数据。
insert into table gulivideo_category
select
videoId,
uploader,
age,
categoryId,
length,
views,
rate,
ratings,
comments,
relatedId
from
gulivideo_orc lateral view explode(category) catetory as categoryId;
#统计对应类别(Music)中的视频热度。
select
videoId,
views
from
gulivideo_category
where
categoryId = "Music"
order by
views desc
limit 10;
-----------------------------------------
+--------------+-----------+
| videoid | views |
+--------------+-----------+
| QjA5faZF1A8 | 15256922 |
| tYnn51C3X_w | 11823701 |
| pv5zWaTEVkI | 11672017 |
| 8bbTtPL1jRs | 9579911 |
| UMf40daefsI | 7533070 |
| -xEzGIuY7kw | 6946033 |
| d6C0bNDqf3Y | 6935578 |
| HSoVKUVOnfQ | 6193057 |
| 3URfWTEPmtE | 5581171 |
| thtmaZnxk_0 | 5142238 |
+--------------+-----------+
统计每个类别中视频流量Top10
select
videoId,
ratings
from
gulivideo_category
where
categoryId = "Music"
order by
ratings desc
limit 10;
-----------------------------------
+--------------+----------+
| videoid | ratings |
+--------------+----------+
| QjA5faZF1A8 | 120506 |
| pv5zWaTEVkI | 42386 |
| UMf40daefsI | 31886 |
| tYnn51C3X_w | 29479 |
| 59ZX5qdIEB0 | 21481 |
| FLn45-7Pn2Y | 21249 |
| -xEzGIuY7kw | 20828 |
| HSoVKUVOnfQ | 19803 |
| ARHyRI9_NB4 | 19243 |
| gg5_mlQOsUQ | 19190 |
+--------------+----------+
统计上传视频最多的用户Top10以及他们上传的视频
select
t1.uploader,
video.videoId,
video.views
from
(select
uploader,
videos
from
gulivideo_user_orc
order by
videos desc
limit 10)t1
join
gulivideo_orc video
on
t1.uploader=video.uploader
order by
views desc
limit 20;
-----------------------------------------------
+----------------+----------------+--------------+
| t1.uploader | video.videoid | video.views |
+----------------+----------------+--------------+
| expertvillage | -IxHBW0YpZw | 39059 |
| expertvillage | BU-fT5XI_8I | 29975 |
| expertvillage | ADOcaBYbMl0 | 26270 |
| expertvillage | yAqsULIDJFE | 25511 |
| expertvillage | vcm-t0TJXNg | 25366 |
| expertvillage | 0KYGFawp14c | 24659 |
| expertvillage | j4DpuPvMLF4 | 22593 |
| expertvillage | Msu4lZb2oeQ | 18822 |
| expertvillage | ZHZVj44rpjE | 16304 |
| expertvillage | foATQY3wovI | 13576 |
| expertvillage | -UnQ8rcBOQs | 13450 |
| expertvillage | crtNd46CDks | 11639 |
| expertvillage | D1leA0JKHhE | 11553 |
| expertvillage | NJu2oG1Wm98 | 11452 |
| expertvillage | CapbXdyv4j4 | 10915 |
| expertvillage | epr5erraEp4 | 10817 |
| expertvillage | IyQoDgaLM7U | 10597 |
| expertvillage | tbZibBnusLQ | 10402 |
| expertvillage | _GnCHodc7mk | 9422 |
| expertvillage | hvEYlSlRitU | 7123 |
+----------------+----------------+--------------+
统计每个类别视频观看数Top10
select
categoryId,
videoId,
views
from
(select
categoryId,
videoId,
views,
rank() over(partition by categoryId order by views desc) rk
from
gulivideo_category)t1
where rk<=10;
--------------------------------------------
+----------------+--------------+-----------+
| categoryid | videoid | views |
+----------------+--------------+-----------+
| Comedy | dMH0bHeiRNg | 42513417 |
| Comedy | 0XxI-hvPRRA | 20282464 |
| Comedy | 49IDp76kjPw | 11970018 |
| Comedy | 5P6UU6m3cqk | 10107491 |
| Comedy | _BuRwH59oAo | 9566609 |
| Comedy | MNxwAU_xAMk | 7066676 |
| Comedy | pYak2F1hUYA | 6322117 |
| Comedy | h0zAlXr1UOs | 5826923 |
| Comedy | C8rjr4jmWd0 | 5587299 |
| Comedy | R4cQ3BoHFas | 5508079 |
| UNA | aRNzWyD7C9o | 8825788 |
| UNA | jtExxsiLgPM | 5320895 |
| UNA | PxNNR4symuE | 4033376 |
| UNA | 8cjTSvvoddc | 3486368 |
| UNA | LIhbap3FlGc | 2849832 |
| UNA | lCSTULqmmYE | 2179562 |
| UNA | UyTxWvp8upM | 2106933 |
| UNA | y6oXEWowirI | 1666084 |
| UNA | _x2-AmY8FI8 | 1403113 |
| UNA | ICoDFooBXpU | 1376215 |
| Autos | RjrEQaG5jPM | 2803140 |
| Autos | cv157ZIInUk | 2773979 |
| Autos | Gyg9U1YaVk8 | 1832224 |
| Autos | 6GNB7xT3rNE | 1412497 |
| Autos | tth9krDtxII | 1347317 |
| Autos | 46LQd9dXFRU | 1262173 |
| Autos | pdiuDXwgrjQ | 1013697 |
| Autos | kY_cDpENQLE | 956665 |
| Autos | YtxfbxGz1u4 | 942604 |
| Autos | aCamHfJwSGU | 847442 |
| Blogs | -_CSo1gOd48 | 13199833 |
| Blogs | D2kJZOfq7zk | 11184051 |
| Blogs | pa_7P5AbUww | 5705136 |
| Blogs | f4B-r8KJhlE | 4937616 |
| Blogs | LB84A3zcmVo | 4866739 |
| Blogs | tXNquTYnyg0 | 3613323 |
| Blogs | EYppbbbSxjc | 2896562 |
| Blogs | LH7vrLlDZ6U | 2615359 |
| Blogs | bTV85fQhj0E | 2192656 |
| Blogs | eVFF98kNg8Q | 1813803 |
| DIY | hut3VRL5XRE | 2684989 |
| DIY | YYTpb-QXV0k | 2492153 |
| DIY | Pf3z935R37E | 2096661 |
| DIY | Yd99gyE4jCk | 1918946 |
| DIY | koQFjKwVFB0 | 1757071 |
| DIY | f5Fg6KFcOsU | 1751817 |
| DIY | STQ3nhXuuEM | 1713974 |
| DIY | FtKuBKIaVvs | 1520774 |
| DIY | M0ODskdEPnQ | 1503351 |
| DIY | uFwCk4UPtlM | 1500110 |
| Travel | bNF_P281Uu4 | 5231539 |
| Travel | s5ipz_0uC_U | 1198840 |
| Travel | 6jJW7aSNCzU | 1143287 |
| Travel | dVRUBIyRAYk | 1000309 |
| Travel | lqbt6X4ZgEI | 921593 |
| Travel | RIH1I1doUI4 | 879577 |
| Travel | AlPqL7IUT6M | 845180 |
| Travel | _5QUdvUhCZc | 819974 |
| Travel | m9A_vxIOB-I | 677876 |
| Travel | CL6f3Cyh85w | 611786 |
| Animation | sdUUx5FdySs | 5840839 |
| Animation | 6B26asyGKDo | 5147533 |
| Animation | H20dhY01Xjk | 3772116 |
| Animation | 55YYaJIrmzo | 3356163 |
| Animation | JzqumbhfxRo | 3230774 |
| Animation | eAhfZUZiwSE | 3114215 |
| Animation | h7svw0m-wO0 | 2866490 |
| Animation | tAq3hWBlalU | 2830024 |
| Animation | AJzU3NjDikY | 2569611 |
| Animation | ElrldD02if0 | 2337238 |
| Music | QjA5faZF1A8 | 15256922 |
| Music | tYnn51C3X_w | 11823701 |
| Music | pv5zWaTEVkI | 11672017 |
| Music | 8bbTtPL1jRs | 9579911 |
| Music | UMf40daefsI | 7533070 |
| Music | -xEzGIuY7kw | 6946033 |
| Music | d6C0bNDqf3Y | 6935578 |
| Music | HSoVKUVOnfQ | 6193057 |
| Music | 3URfWTEPmtE | 5581171 |
| Music | thtmaZnxk_0 | 5142238 |
| Gadgets | pFlcqWQVVuU | 3651600 |
| Gadgets | bcu8ZdJ2dQo | 2617568 |
| Gadgets | -G7h626wJwM | 2565170 |
| Gadgets | oMaTZFCLbq0 | 2554620 |
| Gadgets | GxSdKF5Fd38 | 2468395 |
| Gadgets | z1lj87UyvfY | 2373875 |
| Gadgets | KhCmfX_PQ7E | 1967929 |
| Gadgets | 2SVMFCZgvNM | 1813794 |
| Gadgets | gPutYwiiE0o | 1633482 |
| Gadgets | 7wt5FiZQrgM | 1399531 |
| Places | bNF_P281Uu4 | 5231539 |
| Places | s5ipz_0uC_U | 1198840 |
| Places | 6jJW7aSNCzU | 1143287 |
| Places | dVRUBIyRAYk | 1000309 |
| Places | lqbt6X4ZgEI | 921593 |
| Places | RIH1I1doUI4 | 879577 |
| Places | AlPqL7IUT6M | 845180 |
| Places | _5QUdvUhCZc | 819974 |
| Places | m9A_vxIOB-I | 677876 |
| Places | CL6f3Cyh85w | 611786 |
+----------------+--------------+-----------+
| categoryid | videoid | views |
+----------------+--------------+-----------+
| Sports | Ugrlzm7fySE | 2867888 |
| Sports | q8t7iSGAKik | 2735003 |
| Sports | 7vL19q8yL54 | 2527713 |
| Sports | g3dXfFZ6SH0 | 2295871 |
| Sports | P-bWsOK-h98 | 2268107 |
| Sports | HD8f_Qgwc50 | 2165475 |
| Sports | qjWQNwv-GJ4 | 2132591 |
| Sports | eN0V-rJQSHE | 2124653 |
| Sports | fM38G1450Ew | 2052778 |
| Sports | 3PGzrfE8rJg | 2013466 |
| Vehicles | RjrEQaG5jPM | 2803140 |
| Vehicles | cv157ZIInUk | 2773979 |
| Vehicles | Gyg9U1YaVk8 | 1832224 |
| Vehicles | 6GNB7xT3rNE | 1412497 |
| Vehicles | tth9krDtxII | 1347317 |
| Vehicles | 46LQd9dXFRU | 1262173 |
| Vehicles | pdiuDXwgrjQ | 1013697 |
| Vehicles | kY_cDpENQLE | 956665 |
| Vehicles | YtxfbxGz1u4 | 942604 |
| Vehicles | aCamHfJwSGU | 847442 |
| Animals | 2GWPOPSXGYI | 3660009 |
| Animals | xmsV9R8FsDA | 3164582 |
| Animals | 12PsUW-8ge4 | 3133523 |
| Animals | OeNggIGSKH8 | 2457750 |
| Animals | WofFb_eOxxA | 2075728 |
| Animals | AgEmZ39EtFk | 1999469 |
| Animals | a-gW3RbJd8U | 1836870 |
| Animals | 8CL2hetqpfg | 1646808 |
| Animals | QmroaYVD_so | 1645984 |
| Animals | Sg9x5mUjbH8 | 1527238 |
| Film | sdUUx5FdySs | 5840839 |
| Film | 6B26asyGKDo | 5147533 |
| Film | H20dhY01Xjk | 3772116 |
| Film | 55YYaJIrmzo | 3356163 |
| Film | JzqumbhfxRo | 3230774 |
| Film | eAhfZUZiwSE | 3114215 |
| Film | h7svw0m-wO0 | 2866490 |
| Film | tAq3hWBlalU | 2830024 |
| Film | AJzU3NjDikY | 2569611 |
| Film | ElrldD02if0 | 2337238 |
| People | -_CSo1gOd48 | 13199833 |
| People | D2kJZOfq7zk | 11184051 |
| People | pa_7P5AbUww | 5705136 |
| People | f4B-r8KJhlE | 4937616 |
| People | LB84A3zcmVo | 4866739 |
| People | tXNquTYnyg0 | 3613323 |
| People | EYppbbbSxjc | 2896562 |
| People | LH7vrLlDZ6U | 2615359 |
| People | bTV85fQhj0E | 2192656 |
| People | eVFF98kNg8Q | 1813803 |
| Politics | hr23tpWX8lM | 4706030 |
| Politics | YgW7or1TuFk | 2899397 |
| Politics | nda_OSWeyn8 | 2817078 |
| Politics | 7SV2sfoPAY8 | 2803520 |
| Politics | HBa9wdOANHw | 2348709 |
| Politics | xDh_pvv1tUM | 2335060 |
| Politics | p_YMigZmUuk | 2326680 |
| Politics | QCVxQ_3Ejkg | 2318782 |
| Politics | a9WB_PXjTBo | 2310583 |
| Politics | qSM_3fyiaxM | 2291369 |
| Games | pFlcqWQVVuU | 3651600 |
| Games | bcu8ZdJ2dQo | 2617568 |
| Games | -G7h626wJwM | 2565170 |
| Games | oMaTZFCLbq0 | 2554620 |
| Games | GxSdKF5Fd38 | 2468395 |
| Games | z1lj87UyvfY | 2373875 |
| Games | KhCmfX_PQ7E | 1967929 |
| Games | 2SVMFCZgvNM | 1813794 |
| Games | gPutYwiiE0o | 1633482 |
| Games | 7wt5FiZQrgM | 1399531 |
| Howto | hut3VRL5XRE | 2684989 |
| Howto | YYTpb-QXV0k | 2492153 |
| Howto | Pf3z935R37E | 2096661 |
| Howto | Yd99gyE4jCk | 1918946 |
| Howto | koQFjKwVFB0 | 1757071 |
| Howto | f5Fg6KFcOsU | 1751817 |
| Howto | STQ3nhXuuEM | 1713974 |
| Howto | FtKuBKIaVvs | 1520774 |
| Howto | M0ODskdEPnQ | 1503351 |
| Howto | uFwCk4UPtlM | 1500110 |
| Pets | 2GWPOPSXGYI | 3660009 |
| Pets | xmsV9R8FsDA | 3164582 |
| Pets | 12PsUW-8ge4 | 3133523 |
| Pets | OeNggIGSKH8 | 2457750 |
| Pets | WofFb_eOxxA | 2075728 |
| Pets | AgEmZ39EtFk | 1999469 |
| Pets | a-gW3RbJd8U | 1836870 |
| Pets | 8CL2hetqpfg | 1646808 |
| Pets | QmroaYVD_so | 1645984 |
| Pets | Sg9x5mUjbH8 | 1527238 |
| Entertainment | 1dmVU08zVpA | 16087899 |
| Entertainment | RB-wUgnyGv0 | 15712924 |
| Entertainment | vr3x_RRJdd4 | 10786529 |
| Entertainment | lsO6D1rwrKc | 10334975 |
| Entertainment | ixsZy2425eY | 7456875 |
| Entertainment | RUCZJVJ_M8o | 6952767 |
| Entertainment | tFXLbXyXy6M | 5810013 |
| Entertainment | 7uwCEnDgd5o | 5280504 |
| Entertainment | 2KrdBUFeFtY | 4676195 |
| Entertainment | vD4OnHCRd_4 | 4230610 |
+----------------+--------------+-----------+
| categoryid | videoid | views |
+----------------+--------------+-----------+
| News | hr23tpWX8lM | 4706030 |
| News | YgW7or1TuFk | 2899397 |
| News | nda_OSWeyn8 | 2817078 |
| News | 7SV2sfoPAY8 | 2803520 |
| News | HBa9wdOANHw | 2348709 |
| News | xDh_pvv1tUM | 2335060 |
| News | p_YMigZmUuk | 2326680 |
| News | QCVxQ_3Ejkg | 2318782 |
| News | a9WB_PXjTBo | 2310583 |
| News | qSM_3fyiaxM | 2291369 |
+----------------+--------------+-----------+