时间:2023-03-16 00:24:02 | 来源:电子商务
时间:2023-03-16 00:24:02 来源:电子商务
字段 | 说明 |
---|---|
user_id | 整数型,用户ID |
item_id | 整数型,商品ID |
category_id | 整数型,商品品类ID |
behavior_type | 字符型,行为类型('pv','buy','cart','fav') |
timestamp | 整数型,时间戳 |
行为类型 | 说明 |
---|---|
pv | page view,浏览 |
buy | 购买 |
cart | 加入购物车 |
fav | 收藏 |
select *from userbehaviorgroup by user_id,item_id,behavior_type,timestamphaving count(*) > 1;
查找出来所有字段均为Null值,说明该数据集中没有重复值。select *from userbehaviorwhere user_id is NULL or category_id is NULLor item_id is NULL or behavior_type is NULLor timestamp is NULL;
查找出来所有字段均为Null值,说明该数据集中没有空值。-- 给表添加一列叫dates,用于记录行为发生的日期,如'2017-11-25'为2017年11月25日alter table userbehavior add datesvarchar(255);-- 给新列dates赋值update userbehaviorset dates =from_unixtime(timestamp,'%Y-%m-%d');-- 给表添加一列叫hours,用于记录发生的小时时间,如'21'为晚上9点alter table userbehavior add hoursvarchar(255);-- 给新列hours赋值-- 特别注意,此处的时间格式中,"小时“部分的字符需要用大写的H,才能转为24小时制update userbehaviorset hours = hour(from_unixtime(timestamp,'%H:%i:%s'));-- 给表添加一列叫datetime,用于记录行为发生的日期和时间,如'2017-11-25 21:30:00'为2017年11月25日晚上9点30分alter table userbehavior add datetimevarchar(255);-- 给新列datetime赋值update userbehaviorset datetime =from_unixtime(timestamp,'%Y-%m-%d %H:%i:%s');-- 给表添加一列叫weekday,用于记录行为发生在星期几,如'1'为星期一alter table userbehavior add weekdayvarchar(255);/*-mysql中,可以通过函数WEEKDAY()将日期转化为星期几,但是索引是从0开始,即0表示星期一、1表示星期二.....-为了使该字段更符合阅读习惯,可以将其进行加1操作,结果即为1对应星期一,2对应星期二...*/update userbehaviorset weekday = WEEKDAY(datetime) + 1;
接着,我们将超出分析范围的时间范围剔除掉:-- 将超出指定时间范围的数据剔除掉DELETEfrom userbehaviorwhere dates < '2017-11-25'or dates > '2017-12-03';
经过处理后,数据集如下(以下给出部分数据):select count(distinct user_id) as 会员数from userbehavior;
即在2017年11月25日至2017年12月3日之间,总共记录了983名会员的行为数据。A(Awareness)认知:广告曝光、页面浏览人群AIPL营销模型可以将会员行为与相应的运营周期联系起来。
I(Interested)兴趣:关注互动、浏览店铺主页、浏览产品详情页、收藏加购
P(Purchase)购买:支付
L(Loyalty)忠诚:复购、评论、分享
create view behavior ASselect user_id,datetime,dates,hours,max(case behavior_type when 'pv' then 1 else 0 end) as view,max(case behavior_type when 'fav' then 1 else 0 end) as favor,max(case behavior_type when 'cart' then 1 else 0 end) as cart,max(case behavior_type when 'buy' then 1 else 0 end) as buyfrom userbehaviorgroup by user_id,datetime;
创建出的视图如下所示(给出部分数据):select sum(view) as 'A',sum(favor)+sum(cart) as 'I',sum(buy) as 'P'from behavior;
计算L的值:select user_id,datetime,buy,dense_rank() over (partition by user_id order by datetime) as n_consumefrom behaviorwhere buy = 1order by user_id,datetime;
第二步,剔除掉用户第一次购买的记录并计算复购次数select sum(buy) as Lfrom(select user_id,datetime,buy,dense_rank() over (partition by user_id order by datetime asc) as n_consumefrom behaviorwhere buy = 1) as twhere n_consume > 1;
3、总结#第一步,查询出每个小时会员的浏览次数select hour(datetime) as '小时',count(*) as 'A-浏览'from behaviorwhere view = 1group by hour(datetime)order by hour(datetime) ASC;
#第二步,查询出每个小时会员进行收藏与加入购物车行为的次数select hour(datetime) as '小时',count(*) as 'I-兴趣'from behaviorwhere favor = 1 or cart = 1group by hour(datetime)order by hour(datetime) asc;
#第三步,根据前两步计算出A→I转化率的均值select a.`小时`,avg(`A-浏览`) as `A-浏览 均值`,avg(`I-兴趣`) as `I-兴趣 均值`,concat(round(avg(`I-兴趣`)/avg(`A-浏览`),3)*100,'%') as 'A→I转化率'from(select hour(datetime) as '小时',count(*) as 'A-浏览'from behaviorwhere view = 1group by hour(datetime)) as aleft join(select hour(datetime) as '小时',count(*) as 'I-兴趣'from behaviorwhere favor = 1 or cart = 1group by hour(datetime)) as ion a.`小时` = i.`小时`group by a.`小时`order by a.`小时`;
可以看到,在查询结果中,有很多不同的值,根据对比分析方法,我们需要【比较基准】才知道这些转化率哪些高,哪些低,才可进一步判断。这里,以常见的“平均值”作为对比的基准,代码实现如下(即上一步去掉分组的步骤):select a.`小时`,avg(`A-浏览`) as `A-浏览 均值`,avg(`I-兴趣`) as `I-兴趣 均值`,concat(round(avg(`I-兴趣`)/avg(`A-浏览`),3)*100,'%') as 'A→I转化率'from(select hour(datetime) as '小时',count(*) as 'A-浏览'from behaviorwhere view = 1group by hour(datetime)) as aleft join(select hour(datetime) as '小时',count(*) as 'I-兴趣'from behaviorwhere favor = 1 or cart = 1group by hour(datetime)) as ion a.`小时` = i.`小时`order by a.`小时`;
每个时段的浏览人数都不同,选择浏览值大于平均值3733的时段来观察,在这些时段中,我们选择转化率大于平均值9.1%的时段。select count(distinct item_id) as 'A'from userbehaviorwhere behavior_type = 'pv';
结果如图所示,共有60592款商品被有效浏览。select count(distinct item_id) as 'I'from userbehaviorwhere behavior_type in ('favor','cart');
接下来,结合【假设检验分析方法】,针对有效浏览与产生兴趣的商品数差异较大的情况,提出以下的假设。#第一步,查询出被浏览最多的前100款商品,按降序排列select item_id,count(*) as 'A'from userbehaviorwhere behavior_type = 'pv'group by item_idorder by A DESClimit 100;
查询结果如下(给出部分数据预览):#第二步,查询出用户最感兴趣的前100款商品,按降序排列select item_id,count(*) as 'I'from userbehaviorwhere behavior_type in ('favor','cart')group by item_idorder by I DESClimit 100;
查询结果如下(给出部分数据预览):#第三步,计算交叉数量select count(*) as 'AI交叉商品数'from(select item_id,count(*) as 'A'from userbehaviorwhere behavior_type = 'pv'group by item_idorder by A DESClimit 100) as ACinner JOIN(select item_id,count(*) as 'I'from userbehaviorwhere behavior_type in ('favor','cart')group by item_idorder by I DESClimit 100) as ICon AC.item_id = IC.item_id;
结果显示,仅有5款商品既是高浏览量,也是高收藏/加购的,活动:天猫双12年终品牌盛典
预热时间:2017年12月7日00:00:00-2017年12月11日23:59:59;
上线时间:2017年12月12日00:00:00-2017年12月12日23:59:59。
#创建一个视图,记录所有用户的购买记录CREATE view consumeas select `user_id`,`dates`,dense_rank() OVER (PARTITION BY `user_id` ORDER BY `dates`) AS `consume_num` from `behavior` where `buy` = 1group by `user_id`,`dates` order by `user_id`,`dates`;
结果如下(给出部分数据预览):-- 计算出每个用户每次消费的回购周期select a.user_id,a.dates,a.consume_num,datediff(a.dates,b.dates) as '回购周期(天)'FROM(select * from consume) as ainner join(select * from consume) as bon a.user_id = b.user_idand b.consume_num = a.consume_num - 1;
结果如下(给出部分数据预览):-- 将每人次的回购周期进行平均,取得最终的平均回购周期select avg(datediff(a.dates,b.dates)) as '平均回购周期(天)'from(select * from consume) as ainner join(select * from consume) as bwhere a.user_id = b.user_idand b.consume_num = a.consume_num - 1;
结论:根据计算结果,预计在用户购买行为发生后的2-3天内对其进行触达会有较好的效果。#创建视图,统计每个客户的购买率与其他数据create view user_featureASselectuser_id,sum(view) as '浏览',sum(favor) as '收藏',sum(cart) as '加购',sum(buy) as '购买',(sum(cart) + sum(favor))/sum(view) as '加购率',sum(buy)/(sum(view) + sum(cart) + sum(favor)) as '购买率',dense_rank() over (order by sum(buy)/(sum(view) + sum(cart) + sum(favor)) desc) as '购买率排序'from behaviorgroup by user_id;
创建完的视图如下所示(给出部分数据预览):select *from user_featureorder by 购买率 desc;
计算完每个客户的购买率之后,接着挖掘高购买率客户与低购买率客户的特征。select count(distinct 购买率排序) as 总名次from user_feature;
根据387 * 20% ≈ 77,高购买率客户可以被定义为前77名的客户,而根据387 - 77 = 310,低购买率客户可以被定义为310名之后的客户。select avg(浏览) as '平均浏览数',avg(收藏 + 加购) as '平均兴趣数',avg(加购率) as '平均加购率'from user_featurewhere '购买率排序' <= 77;
select count(distinct category_id) as '购买品类集中度' from userbehaviorwhere behavior_type ='buy'and user_id in (select distinct user_id from user_featurewhere `购买率排序` <= 77);
第三步,查询出低购买率客户的特征select avg(`浏览`) as '平均浏览数',avg(`收藏`+`加购`) as '平均兴趣数',avg(`加购率`) as '平均加购率'from user_featurewhere `购买率排序` > 310 and `购买率` > 0;
select count(distinct category_id) as '购买品类集中度' from userbehaviorwhere behavior_type ='buy'and user_id in (select distinct user_id from user_featurewhere `购买率排序` > 310);
总结:#第一步,计算每位用户最近一次购买行为的间隔天数select user_id,datediff('2017-12-03',dates) as 'R'from consume as awhere consume_num = (select max(consume_num)from consume as bwhere a.user_id = b.user_id);
查询结果如下(给出部分数据):select user_id,(sum(favor) + sum(cart)) as 'F',sum(buy) as 'M'from behaviorgroup by user_id;
查询结果如下(给出部分数据):create view RFMasselect r.user_id,R,F,Mfrom(select user_id,datediff('2017-12-03',dates) as 'R'from consume as awhere consume_num = (select max(consume_num)from consume as bwhere a.user_id = b.user_id)) as rleft join (select user_id,(sum(favor) + sum(cart)) as 'F',sum(buy) as 'M'from behaviorgroup by user_id) as fmon r.user_id = fm.user_id;
查询结果如下(给出部分数据):#第四步,计算出RFM的阈值(即比较基准)select avg(R),avg(F),avg(M)from RFM;
第五步,根据RFM的比较基准以及RFM的定义,将每位可以划分到不同的用户价值组中create view user_RFM asselect user_id, case when R < 2.4501 and F > 9.3115 and M > 2.8018 then '重要价值用户' when R < 2.4501 and F > 9.3115 and M < 2.8018 then '一般价值用户' when R < 2.4501 and F < 9.3115 and M > 2.8018 then '重要发展用户' when R < 2.4501 and F < 9.3115 and M < 2.8018 then '一般发展用户' when R > 2.4501 and F > 9.3115 and M > 2.8018 then '重要唤回用户' when R > 2.4501 and F > 9.3115 and M < 2.8018 then '一般唤回用户' when R > 2.4501 and F < 9.3115 and M > 2.8018 then '重要挽留用户' when R > 2.4501 and F < 9.3115 and M < 2.8018 then '一般挽留用户' end as '用户价值族群'from RFM;
结果如下所示(给出部分数据预览):select `用户价值族群`, count(user_id) '用户数', concat(round(count(user_id)/983*100,3),'%') as '用户数占比'from user_RFMgroup by `用户价值族群`order by `用户价值族群`;
结果如下:关键词:数据,分析,用户,平台