时间:2023-03-15 22:54:01 | 来源:电子商务
时间:2023-03-15 22:54:01 来源:电子商务
SELECT DISTINCT user_id, item_id, category_id, behavior, timestampsFROM userbehavior;
查询结果中没有发现重复值SELECT count( userid ), count( itemid ), count( categoryid ), count( behavior ), count( timestamps ) FROM userbehavior WHERE userid IS NULL OR itemid IS NULL OR categoryid IS NULL OR behavior IS NULL OR timestamps IS NULL;
查询结果中没有发现重复值# 将timestamps列转换为日期格式UPDATE userbehavior SET timestamps=FROM_UNIXTIME(timestamps,'%Y-%m-%d %H:%i:%s');# 新建date列,从timestamp列截取日期ALTER TABLE userbehavior add date varchar(10);UPDATE userbehavior SET date=SUBSTRING(timestamps,1,10);# 新建hour列,从timestamp列截取小时ALTER TABLE userbehavior add hours varchar(10);UPDATE userbehavior SET hours=SUBSTRING(timestamps,12,2);
6.异常值处理# 删除指定日期之外的异常值DELETE FROM userbehavior WHERE date > '2017-12-03' or date < '2017-11-25';# 查询日期列最小值和最大值检查是否处理异常值SELECT min(date),max(date) FROM userbehavior;
# 统计数据集中所有行为数据,总用户数,商品数,商品类别数,用户行为数SELECT count( * ) AS 行为数, count( DISTINCT userid ) AS 用户数, count( DISTINCT itemid ) AS 商品数, count( DISTINCT categoryid ) AS 商品类别数, count( DISTINCT behavior ) AS 用户行为数 FROM userbehavior
由上可知,数据集中总共有99956条,一共包含983位用户,64440个商品,3128个类别,4种用户行为。SELECT behavior, count(*)FROM userbehaviorGROUP BY behavior;
从上图可以看出,从浏览到有购买意向只有9%的转化率。只有少量用户是直接购买而未通过收藏和加入购物车,从页面浏览到收藏/加购的转化率偏低,该阶段的转化率应重点提升。SELECT behavior, count(distinct userid) AS '用户数'FROM `userbehavior`GROUP BY behavior
如图所示,约有68%的付费用户,用户付费转化率相当高。select sum(case when pv>0 then 1 else 0 end) as 'pv', sum(case when pv>0 and buy>0 and cart=0 and fav=0 then 1 else 0 end) as 'pv-buy', sum(case when pv>0 and (cart>0 or fav>0) then 1 else 0 end) as 'pv-cart/fav', sum(case when pv>0 and (cart>0 or fav>0) and buy>0 then 1 else 0 end) as 'pv-cart/fav-buy', sum(case when pv>0 and (cart>0 or fav>0) and buy=0 then 1 else 0 end) as 'pv-cart/fav-lost', sum(case when pv>0 and cart=0 and fav=0 and buy=0 then 1 else 0 end) as 'pv-lost' from userbehavior;
结果如下:# 创建浏览量TOP10商品种类视图create view pv_top10_category asselect categoryid,count(*) as 浏览量top10from userbehaviorWHERE behavior='pv'GROUP BY categoryidORDER BY count(*) desclimit 10; # 创建购买量TOP10商品种类视图create view buy_top10_category asselect categoryid,count(*) as 购买量top10from userbehaviorWHERE behavior='buy'GROUP BY categoryidORDER BY count(*) desclimit 10;
根据categoryid对比分析浏览量TOP10和购买量TOP10重合部分商品种类# 浏览量TOP10商品种类SELECT * FROM pv_top10_category as aleft join buy_top10_category as bUSING(categoryid); # 购买量TOP10商品种类SELECT * FROM buy_top10_category as aleft join pv_top10_category as bUSING(categoryid);
结果如下表所示:# 创建浏览量TOP10商品视图create view pv_top10_item asselect itemid,count(*) as 浏览量top10from userbehaviorWHERE behavior='pv'GROUP BY itemidORDER BY count(*) desclimit 10; # 创建购买量TOP10商品视图create view buy_top10_item asselect itemid,count(*) as 购买量top10from userbehaviorWHERE behavior='buy'GROUP BY itemidORDER BY count(*) desclimit 10;
根据itemid对比分析浏览量TOP10和购买量TOP10重合部分商品# 浏览量TOP10商品SELECT * FROM pv_top10_item as aleft join buy_top10_item as bUSING(itemid);# 购买量TOP10商品SELECT * FROM buy_top10_item as aleft join pv_top10_item as bUSING(itemid);
结果如下表所示:# 整体商品类目与对应的商品数量SELECT count( DISTINCT categoryid ) AS 商品类目数量, count( DISTINCT itemid ) AS 商品数量 FROM userbehavior;
结果如下表所示:# 每个商品的购买次数SELECT itemid,count(*) AS 购买次数 FROM userbehavior WHERE behavior = 'buy' GROUP BY itemid ORDER BY count(*) DESC;
结果如下表所示下:# 购买次数占比SELECT a.购买次数, count( a.itemid ) AS 商品数, concat( round( count( a.itemid ) / ( SELECT count( DISTINCT itemid ) FROM userbehavior WHERE behavior = 'buy' ) * 100, 2 ), '%' ) AS 占比 FROM ( SELECT itemid, count( * ) AS 购买次数 FROM userbehavior WHERE behavior = 'buy' GROUP BY itemid ) AS a GROUP BYa. 购买次数;
结果如下表所示:select concat(round(sum(pv)/select count(*) from userbehavior)*100,2),'%')as 跳出率from(select userid,sum(case behavior when 'pv' then 1 else 0 end ) as pv sum(case behavior when 'fav' then 1 else 0 end ) as 'cart'sum(case behavior when 'cart' then 1 else 0 end ) as 'cart' sum(case behavior when 'buy' then 1 else 0 end ) as 'buy'from userbehaviorgroupby userid) as awhere 'pv'>0 and fav=0 and cart=0 and buy=0
结果如下表:复购用户数select count(*) as 用户复购数 from (select userid,count(*) as 用户复购数 from userbehavior where behavior='buy'group by useridhaving count(distinct date)>=2) as a 总购买用户数select count(distinct userid) from userbehavior where behavior='buy'
得出结论:用户复购率=复购用户数/总购买用户数=365/671=54.40%。用户的复购率超过50%,用户具有较高的忠诚度。# 复购用户购买次数SELECT sum(购买次数) as 复购用户购买次数FROM # 查找复购用户(SELECT userid,count(*) as 购买次数 FROM `userbehavior`WHERE behavior='buy'GROUP BY useridHAVING count(DISTINCT date)>=2) as a; # 总购买次数SELECT count(*) as 总购买次数 FROM userbehaviorWHERE behavior='buy';
得出结论:用户的复购率超过了50%,同时复购用户购买次数占比达到了79.40%,说明平台用户粘性很高,且购买积极性也高。# 计算每日留存率select a.date, count(distinct a.userid) as 日新增用户数,concat(round(100*count(distinct if(datediff(b.date,a.date)=1, b.userid, null))/count(distinct a.userid),2),'%') as 次日留存率,concat(round(100*count(distinct if(datediff(b.date,a.date)=2, b.userid, null))/count(distinct a.userid),2),'%') as 二日留存率,concat(round(100*count(distinct if(datediff(b.date,a.date)=3, b.userid, null))/count(distinct a.userid),2),'%') as 三日留存率,concat(round(100*count(distinct if(datediff(b.date,a.date)=4, b.userid, null))/count(distinct a.userid),2),'%') as 四日留存率,concat(round(100*count(distinct if(datediff(b.date,a.date)=5, b.userid, null))/count(distinct a.userid),2),'%') as 五日留存率,concat(round(100*count(distinct if(datediff(b.date,a.date)=6, b.userid, null))/count(distinct a.userid),2),'%') as 六日留存率,concat(round(100*count(distinct if(datediff(b.date,a.date)=7, b.userid, null))/count(distinct a.userid),2),'%') as 七日留存率,concat(round(100*count(distinct if(datediff(b.date,a.date)=8, b.userid, null))/count(distinct a.userid),2),'%') as 八日留存率from #统计每日新增的用户名单 (select d1.* from lading as d1 left join lading as d2 on d1.userid=d2.userid and d1.date>d2.date where d2.date is null) as a left joinlading as b on a.userid=b.useridgroup by a.date;
得出结论:每日的留存率都处在较高的水平,这说明用户的忠诚度较高。SELECT date as 日期,sum(case when behavior='pv' then 1 else 0 end) as 点击量,count(DISTINCT userid) as 用户数,(sum(case when behavior='pv' then 1 else 0 end)/count(DISTINCT userid)) as 重复访问量FROM userbehaviorGROUP BY date
结果如下表所示:SELECT date as '日期',count(behavior) as 用户行为总量,sum(case when behavior='pv' then 1 else 0 end) as 点击量,sum(case when behavior='fav' then 1 else 0 end) as 收藏数,sum(case when behavior='cart' then 1 else 0 end) as 加购数,sum(case when behavior='buy' then 1 else 0 end) as 购买数FROM userbehaviorGROUP BY date;
结果如下表所示:SELECT hours as '时段',count(behavior) as 用户行为总量,sum(case when behavior='pv' then 1 else 0 end) as 点击量,sum(case when behavior='fav' then 1 else 0 end) as 收藏数,sum(case when behavior='cart' then 1 else 0 end) as 加购数,sum(case when behavior='buy' then 1 else 0 end) as 购买数FROM userbehaviorGROUP BY hoursORDER BY hours;
结果如下表所示:SELECT DATE_FORMAT(date,'%W') as '星期',count(behavior) as 用户行为总量,sum(case when behavior='pv' then 1 else 0 end) as 点击量,sum(case when behavior='fav' then 1 else 0 end) as 收藏数,sum(case when behavior='cart' then 1 else 0 end) as 加购数,sum(case when behavior='buy' then 1 else 0 end) as 购买数FROM userbehaviorGROUP BY DATE_FORMAT(date,'%W')ORDER BY WEEKDAY(date);
结果如下表所示:# 最近一次消费时间间隔R和消费频率FSELECT userid,DATEDIFF('2017-12-04',max(date)) AS R,count(*) as FFROM userbehavior WHERE behavior = 'buy' GROUP BY userid;
结果如下表所示# 对最近一次消费时间间隔(R)和消费频率(F)进行评分,并创建视图rfm create view rfm asSELECT a.*,(casewhen R>20 then 1 when R between 10 and 20 then 2when R between 5 and 10 then 3when R between 3 and 5 then 4when R<=3 then 5else 0 end) as Rscore,(casewhen F<=2 then 1 when F between 2 and 6 then 2when F between 6 and 8 then 3when F between 10 and 20 then 4when F>20 then 5else 0 end) as FscoreFROM(SELECT userid,DATEDIFF('2017-12-04',max(date)) AS R,count(*) as FFROM userbehavior WHERE behavior = 'buy' GROUP BY userid) as a;
结果如下表所示:# 计算打分平均值SELECT avg(Rscore) as avg_R, avg(Fscore) as avg_FFROM rfm;
结果如下表所示:# 用户分类select userid,Rscore,Fscore, (case when Rscore>4.158 then '高' else '低' end) as 'R值高低', (case when Fscore>1.5484 then '高' else '低' end) as 'F值高低' from rfm;
结果如下表所示:#对用户进行标记select userid,R值高低,F值高低,(case when R值高低='高' and F值高低='高' then '重要价值用户' when R值高低='低' and F值高低='高' then '重要保持用户' when R值高低='高' and F值高低='低' then '重要发展用户' when R值高低='低' and F值高低='低' then '一般价值用户' else 0 end) as '用户分类' from (select userid,Rscore,Fscore, (case when Rscore>4.158 then '高' else '低' end) as 'R值高低', (case when Fscore>1.5484 then '高' else '低' end) as 'F值高低' from rfm) as a;
结果如下表所示:# 对用户分类统计select 用户分类,count(*) as 用户数FROM(select userid,R值高低,F值高低,(case when R值高低='高' and F值高低='高' then '重要价值用户' when R值高低='低' and F值高低='高' then '重要保持用户' when R值高低='高' and F值高低='低' then '重要发展用户' when R值高低='低' and F值高低='低' then '一般价值用户' else 0 end) as '用户分类' from (select userid,Rscore,Fscore, (case when Rscore>4.158 then '高' else '低' end) as 'R值高低', (case when Fscore>1.5484 then '高' else '低' end) as 'F值高低' from rfm) as a) as bGROUP BY 用户分类;
结果如下表所示:SELECT product_buytimes, COUNT(*) AS product_type_countFROM(SELECT COUNT(user_id) AS product_buytimesFROM UserWHERE behavior_type = 'buy'GROUP BY item_id) AS product_buypoolGROUP BY product_buytimesORDER BY product_buytimes ASC;
本次分析的商品共有64440中,用户购买的商品共有16743种,购买数量非常集中的商品比较少;在本次统计的数据中,只购买一次的商品有56154种,占用户购买商品数的79.26%,说明商品售卖主要依靠长尾商品的累积效应,而非爆款商品的带动。SELECT categoryid , COUNT(*) AS cat_countFROM userWHERE behavior = 'buy'GROUP BY category_idORDER BY cat_count DESC;
如图所示,商品类目销售情况中有较为明显的集中趋势,可根据畅销类目优化商品展示、加强商品捆绑,进而提高销量。关键词:数据,分析,用户