# Hive面试题
# 求TopN
日志如下
1,huangxiaoming,45,a-c-d-f
2,huangzitao,36,b-c-d-e
3,huanglei,41,c-d-e
4,liushishi,22,a-d-e
5,liudehua,39,e-f-d
6,liuyifei,35,a-d-e
# 需求:
求出每种爱好中,年龄最大的两个人(爱好,年龄,姓名) 注意思考一个问题:如果某个爱好中的第二大年龄有多个相同的怎么办?
create database if not exists exercise;
use exercise;
drop table if exists exercise5;
create table exercise5(id int, name string, age int, favors string) row format delimited fields terminated by ",";
load data local inpath "/home/hadoop/exercise5.txt" into table exercise5;
# 分析步骤
需要把这种数据:
6,liuyifei,35,a-d-e
变成:
6,liuyifei,35,a
6,liuyifei,35,d
6,liuyifei,35,e
借助于虚拟视图技术: leteral view
select a.id as id, a.name as name, a.age as age, favor_view.favor
from exercise5 a
LATERAL VIEW explode(split(a.favors, "-")) favor_view as favor;
求出每种爱好的最大的年龄的最终的SQL:
select aa.favor, max(aa.age) as maxage
from
(
select a.id as id, a.name as name, a.age as age, favor_view.favor
from exercise5 a
LATERAL VIEW explode(split(a.favors, "-")) favor_view as favor
) aa
group by aa.favor;
结果:
a 45
b 36
c 45
d 45
e 41
f 45
# 扩展
1、你如何帮我把这个年龄的姓名拿出来呢? 2、如果要去每种爱好中的前2名呢?
select c.favor, c.name, c.age from
(
select aa.id, aa.name, aa.age, aa.favor,
row_number() over (distribute by aa.favor sort by aa.age desc) as index
from
(
select a.id as id, a.name as name, a.age as age, favor_view.favor
from exercise5 a
LATERAL VIEW explode(split(a.favors, "-")) favor_view as favor
) aa
) c
where c.index <= 2;
每种爱好中,年龄最大的前2个人:
a huangbo 45
a liuyifei 35
b xuzheng 36
c huangbo 45
c huanglei 41
d huangbo 45
d huanglei 41
e huanglei 41
e liudehua 39
f huangbo 45
f liudehua 39
# Hive 的 sort by 和 order by 的区别
# hive 如何调优
# Hive UADF 聚合函数
# Hive UDF 自定义函数 他们作用的数据范围是什么?
Hive自定义函数(UDF、UDAF) - CSDN博客 (opens new window)
# hive 动态分区是怎么回事?
hive动态分区插入实验 - CSDN博客 (opens new window) Hive动态分区 - 简书 (opens new window) [一起学Hive]之六-Hive的动态分区 – lxw的大数据田地 (opens new window)
# Hive:insert into table 与 insert overwrite table 区别
Hive:insert into table 与 insert overwrite table 区别 - cctext - 博客园 (opens new window)