时间:2023-03-15 23:34:01 | 来源:电子商务
时间:2023-03-15 23:34:01 来源:电子商务
一、背景目标SELECT * FROM UserBehavior_newGROUP BY user_id, item_id, cat_id, behavior, timestampsHAVING COUNT(*) > 1;
2.2 检查缺失值:未发现缺失值。SELECT COUNT(user_id), COUNT(item_id), COUNT(cat_id), COUNT(behavior), COUNT(timestamps)FROM UserBehavior_newWHERE user_id IS NULLOR item_id IS NULLOR cat_id IS NULLOR behavior IS NULLOR timestamps IS NULL;
2.3 预处理:-- 1.将时间戳转化为日期格式ALTER TABLE UserBehavior_new ADD COLUMN datetime VARCHAR(256);UPDATE UserBehavior_new SET datetime = FROM_UNIXTIME(timestamps);ALTER TABLE UserBehavior_new ADD COLUMN date VARCHAR(256);UPDATE UserBehavior_new SET date = FROM_UNIXTIME(timestamps, '%Y-%m-%d');ALTER TABLE UserBehavior_new ADD COLUMN time VARCHAR(256);UPDATE UserBehavior_new SET time = FROM_UNIXTIME(timestamps, '%H-%i-%s');
-- 2.确保所有用户行为发生在2017/11/25-2017/12/03期间SELECT date FROM UserBehavior_new WHERE date < '2017-11-25' or date > '2017-12-03';-- 删除异常数据DELETE FROM UserBehavior_new WHERE date < '2017-11-25' or date > '2017-12-03';
三、数据分析-- 1.页面浏览量(PV):895636SELECT COUNT(behavior) AS PV FROM UserBehavior_new WHERE behavior = 'pv';-- 2.页面访客数(UV):9739SELECT COUNT(DISTINCT user_id) AS UV FROM UserBehavior_new;-- 3.平均页面访客数 = PV/UV :92-- 4.以日期为基准对各指标变化情况的分析CREATE VIEW UserBehavior_date AS SELECT date, SUM(CASE WHEN behavior = 'pv' THEN 1 ELSE 0 END) AS '浏览量',SUM(CASE WHEN behavior = 'cart' THEN 1 ELSE 0 END) AS '加购量',SUM(CASE WHEN behavior = 'fav' THEN 1 ELSE 0 END) AS '收藏量',SUM(CASE WHEN behavior = 'buy' THEN 1 ELSE 0 END) AS '购买量'FROM UserBehavior_newGROUP BY dateORDER BY date;
-- 5.以时间为基准对各指标变化情况的分析CREATE VIEW UserBehavior_time AS SELECT time,SUM(CASE WHEN behavior = 'PV' THEN 1 ELSE 0 END) AS '浏览量',SUM(CASE WHEN behavior = 'cart' THEN 1 ELSE 0 END) AS '加购量',SUM(CASE WHEN behavior = 'fav' THEN 1 ELSE 0 END) AS '收藏量',SUM(CASE WHEN behavior = 'buy' THEN 1 ELSE 0 END) AS '购买量'FROM UserBehavior_newGROUP BY timeORDER BY time;
-- 6.用户整体行为SELECT SUM(总量) AS 总量, SUM(浏览量) AS 浏览量, SUM(加购量) AS 加购量,SUM(收藏量) AS 收藏量, SUM(购买量) AS 购买量 FROM UserBehavior_view;SELECT CONCAT(ROUND(SUM(浏览量)/SUM(总量)*100,2),'%') AS 浏览转化率,CONCAT(ROUND((SUM(加购量)+SUM(收藏量))/SUM(总量)*100,2),'%') AS '加购/收藏转化率',CONCAT(ROUND(SUM(购买量)/SUM(总量)*100,2),'%') AS 购买转化率FROM UserBehavior_view;
-- 7.用户各环节行为路径SELECT SUM(CASE WHEN 浏览量 > 0 THEN 1 ELSE 0 END) AS '浏览量',SUM(CASE WHEN 浏览量 > 0 AND 购买量 > 0 AND 加购量 = 0 AND 收藏量 = 0 THEN 1 ELSE 0 END) AS '浏览→购买',SUM(CASE WHEN 浏览量 > 0 AND (加购量 > 0 OR 收藏量 > 0) THEN 1 ELSE 0 END) AS '浏览→加购/收藏',SUM(CASE WHEN 浏览量 > 0 AND 购买量 = 0 AND 加购量 = 0 AND 收藏量 = 0 THEN 1 ELSE 0 END) AS '浏览→流失',SUM(CASE WHEN 浏览量 > 0 AND (加购量 > 0 OR 收藏量 > 0) AND 购买量 > 0 THEN 1 ELSE 0 END) AS '浏览→加购/收藏→购买',SUM(CASE WHEN 浏览量 > 0 AND (加购量 > 0 OR 收藏量 > 0) AND 购买量 = 0 THEN 1 ELSE 0 END) AS '浏览→加购/收藏→流失'FROM UserBehavior_view;
-- 根据cat_id创建视图CREATE VIEW pv_top10_cat ASSELECT cat_id, COUNT(*) AS 浏览量TOP10 FROM UserBehavior_newWHERE behavior = 'pv'GROUP BY cat_idORDER BY COUNT(*) DESCLIMIT 10;CREATE VIEW buy_top10_cat ASSELECT cat_id, COUNT(*) AS 购买量TOP10 FROM UserBehavior_newWHERE behavior = 'buy'GROUP BY cat_idORDER BY COUNT(*) DESCLIMIT 10;-- 根据item_id创建视图CREATE VIEW pv_top10_item ASSELECT item_id, COUNT(*) AS 浏览量TOP10 FROM UserBehavior_newWHERE behavior = 'pv'GROUP BY item_idORDER BY COUNT(*) DESCLIMIT 10;CREATE VIEW buy_top10_item ASSELECT item_id, COUNT(*) AS 购买量TOP10 FROM UserBehavior_newWHERE behavior = 'buy'GROUP BY item_idORDER BY COUNT(*) DESCLIMIT 10;
SELECT COUNT(DISTINCT cat_id) AS 商品类目数量,COUNT(DISTINCT item_id) AS 商品数量FROM UserBehavior_new;
SELECT a.购买次数, COUNT(a.item_id) AS 商品数,CONCAT(ROUND(COUNT(a.item_id) / (SELECT COUNT(DISTINCT item_id) FROM UserBehavior_new WHERE behavior = 'buy') *100, 2), '%') AS 占比FROM (SELECT item_id, COUNT(*) AS 购买次数 FROM UserBehavior_new WHERE behavior = 'buy' GROUP BY item_id) AS aGROUP BY a.购买次数ORDER BY a.购买次数;
-- 1.总订单量:20359-- 人均购买量 = 总订单量/总用户数 : 2.09SELECT COUNT(behavior) FROM UserBehavior_newWHERE behavior = 'buy';-- 2.复购率CREATE VIEW buy_f_view(user_id, frequency) ASSELECT user_id, COUNT(behavior) AS frequency FROM UserBehavior_newWHERE behavior = 'buy'GROUP BY user_idORDER BY frequency;
-- 1.计算R、F值,并按价值打分CREATE VIEW rf AS SELECT a.*,(CASE WHEN R > 8 THEN 1WHEN R BETWEEN 7 AND 8 THEN 2WHEN R BETWEEN 5 AND 6 THEN 3WHEN R BETWEEN 3 AND 4 THEN 4WHEN R < 3 THEN 5 ELSE 0 END) AS R_scores,(CASE WHEN F < 2 THEN 1WHEN F BETWEEN 2 AND 4 THEN 2WHEN F BETWEEN 5 AND 10 THEN 3WHEN F BETWEEN 11 AND 20 THEN 4WHEN F > 20 THEN 5 ELSE 0 END) AS F_scoresFROM (SELECT user_id, DATEDIFF('2017-12-03', MAX(date)) AS R, COUNT(*) AS FFROM UserBehavior_newWHERE behavior = 'buy'GROUP BY user_id) AS a;
-- 2.计算打分的平均值SELECT AVG(R_SCORES) AS avg_r, AVG(F_scores) AS avg_f FROM rf;
-- 3.按照R值和F值的平均分对用户进行分类,并统计SELECT 用户分类, COUNT(*) AS 用户数 FROM(SELECT user_id, R值, F值,(CASE WHEN R值='高' AND F值='高' THEN '重要价值客户'WHEN R值='高' AND F值='低' THEN '重要发展客户'WHEN R值='低' AND F值='高' THEN '重要保持展客户'WHEN R值='低' AND F值='低' THEN '一般价值客户' ELSE 0 END) AS 用户分类FROM (SELECT user_id, R_scores, F_scores,(CASE WHEN R_scores > 4.2625 THEN '高' ELSE '低' END) AS R值,(CASE WHEN F_scores > 1.8710 THEN '高' ELSE '低' END) AS F值FROM rf) AS a) AS bGROUP BY 用户分类ORDER BY 用户数 DESC;
关键词:数据,分析,报告,用户