建表、insert数据
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
|
create table tmp_login ( user_id int (11) , login_date datetime ); insert into tmp_login values (2, '2020-05-29 11:12:12' ); insert into tmp_login values (2, '2020-05-29 15:12:12' ); insert into tmp_login values (2, '2020-05-30 11:12:12' ); insert into tmp_login values (2, '2020-05-31 11:12:12' ); insert into tmp_login values (2, '2020-06-01 11:12:12' ); insert into tmp_login values (2, '2020-06-02 11:12:12' ); insert into tmp_login values (2, '2020-06-03 11:12:12' ); insert into tmp_login values (2, '2020-06-04 11:12:12' ); insert into tmp_login values (2, '2020-06-05 11:12:12' ); insert into tmp_login values (2, '2020-06-06 11:12:12' ); insert into tmp_login values (2, '2020-06-07 11:12:12' ); insert into tmp_login values (7, '2020-06-01 11:12:12' ); insert into tmp_login values (7, '2020-06-02 11:12:12' ); insert into tmp_login values (7, '2020-06-03 11:12:12' ); insert into tmp_login values (7, '2020-06-05 11:12:12' ); insert into tmp_login values (7, '2020-06-06 11:12:12' ); insert into tmp_login values (7, '2020-06-07 11:12:12' ); insert into tmp_login values (7, '2020-06-08 11:12:12' ); |
方法一 row_number()
1.查询所有用户的每日登录记录
1
2
|
select distinct user_id, date (login_date) as days from tmp_login; |
2.row_number()计算登录时间排序
1
2
3
|
select user_id, days, row_number() over(partition by user_id order by days) as rn from ( select distinct user_id, date (login_date) as days from tmp_login) t1; |
3.用登录时间 - row_number(),如果得到的日期相同,则认为是连续登录日期
1
2
3
4
5
6
|
select *, date_sub(days, interval rn day ) as results from ( select user_id, days, row_number() over(partition by user_id order by days) as rn from ( select distinct user_id, date (login_date) as days from tmp_login) t1 ) t2; |
4. 按user_id、results分组就可得出连续登录天数
1
2
3
4
5
6
7
8
9
|
select user_id, count (*) as num_days from ( select *, date_sub(days, interval rn day ) as results from ( select user_id, days, row_number() over(partition by user_id order by days) as rn from ( select distinct user_id, date (login_date) as days from tmp_login) t1 ) t2) t3 group by user_id , results; |
直接用日期减去row_number(),不用date_sub的话,遇到登录日期跨月时会计算错误,
方法二lead() 或 lag()
这种情况适合的场景是,需要查找连续登录超过n天的用户,n为确定值
如果n为4,即计算连续登录超过4天的用户
1
2
3
4
5
6
7
|
-- lead计算连续登录 select distinct user_id from ( select user_id, days, datediff(lead(days, 3, '1970-01-01' ) over(partition by user_id order by days), days) as results from ( select distinct user_id, date (login_date) as days from tmp_login) t1) t2 where results = 3; |
连续登录4天,则日期差应该为3。
以上为个人经验,希望能给大家一个参考,也希望大家多多支持服务器之家。
原文链接:https://blog.csdn.net/BurningSilence/article/details/121558764