时间:2023-03-26 07:30:02 | 来源:电子商务
时间:2023-03-26 07:30:02 来源:电子商务
CREATE TABLE 表3销售情况(用户ID VARCHAR(255) NOT NULL,购买行为编号 VARCHAR(255) NOT NULL,品类ID VARCHAR(255) NOT NULL,产品类目 VARCHAR(255) NOT NULL,商品属性 LONGTEXT NOT NULL,购买数量 DECIMAL(65) NOT NULL,购买时间 date NOT NULL,出生日期 date NOT NULL,性别 VARCHAR(255) NOT NULL,PRIMARY KEY(用户ID))ENGINE=INNODB;
2)表1、表2通过“用户ID”字段相互联结,查询结果保留两表间共同的数据记录,最后把查询数据存储在新表中。INSERT INTO `表3销售情况`SELECT `表1购买商品`.*, `表2婴儿信息`.出生日期,`表2婴儿信息`.性别FROM `表1购买商品`,`表2婴儿信息`WHERE `表1购买商品`.用户ID = `表2婴儿信息`.用户ID;
3.5 一致化处理UPDATE `表3销售情况` SET `性别` = REPLACE(`性别`,'0','男');UPDATE `表3销售情况` SET `性别` = REPLACE(`性别`,'1','女');UPDATE `表3销售情况` SET `性别` = REPLACE(`性别`,'2','未知');
3.6 异常值处理CREATE TABLE age(用户ID VARCHAR(255) NOT NULL,age DECIMAL(65) NOT NULL,PRIMARY KEY(用户ID))ENGINE=INNODB;INSERT INTO age SELECT `表3销售情况`.`用户ID`,ROUND(DATEDIFF(`购买时间`,`出生日期`)/365,2)FROM `表3销售情况`;
3)完善表3“年龄”空缺列update `表3销售总体情况`,age set `表3销售总体情况`.`年龄` = age.age where `表3销售总体情况`.`用户ID` = age.`用户ID`;
4)删除异常值DELETE FROM `表3销售情况`WHERE `表3销售情况`.`用户ID`='89520261';
UPDATE `表3销售情况`SET `表3销售情况`.`年龄分组` = CASE WHEN `年龄`<=0 THEN '未出生' WHEN `年龄`>0 and `年龄`<=1 THEN '0-1岁' WHEN `年龄`>1 and `年龄`<=2 THEN '1-2岁' WHEN `年龄`>2 and `年龄`<=3 THEN '2-3岁' WHEN `年龄`>3 and `年龄`<=4 THEN '3-4岁' WHEN `年龄`>4 and `年龄`<=5 THEN '4-5岁' WHEN `年龄`>5 and `年龄`<=6 THEN '5-6岁' WHEN `年龄`>6 and `年龄`<=7 THEN '6-7岁' WHEN `年龄`>7 and `年龄`<=8 THEN '7-8岁' WHEN `年龄`>8 and `年龄`<=9 THEN '8-9岁' WHEN `年龄`>9 and `年龄`<=10 THEN '9-10岁' ELSE '10岁以上'END ;
2)整体分布情况SELECT `性别`,SUM(CASE WHEN `年龄分组`='未出生' THEN 1 ELSE 0 END) AS '未出生',SUM(CASE WHEN `年龄分组`='0-1岁' THEN 1 ELSE 0 END) AS '0-1岁',SUM(CASE WHEN `年龄分组`='1-2岁' THEN 1 ELSE 0 END) AS '1-2岁',SUM(CASE WHEN `年龄分组`='2-3岁' THEN 1 ELSE 0 END) AS '2-3岁',SUM(CASE WHEN `年龄分组`='3-4岁' THEN 1 ELSE 0 END) AS '3-4岁',SUM(CASE WHEN `年龄分组`='5-6岁' THEN 1 ELSE 0 END) AS '5-6岁',SUM(CASE WHEN `年龄分组`='6-7岁' THEN 1 ELSE 0 END) AS '6-7岁',SUM(CASE WHEN `年龄分组`='7-8岁' THEN 1 ELSE 0 END) AS '7-8岁',SUM(CASE WHEN `年龄分组`='8-9岁' THEN 1 ELSE 0 END) AS '8-9岁',SUM(CASE WHEN `年龄分组`='9-10岁' THEN 1 ELSE 0 END) AS '9-10岁',SUM(CASE WHEN `年龄分组`='10岁以上' THEN 1 ELSE 0 END) AS '10岁以上',CONCAT(ROUND(COUNT(*)/952*100,2),'%') AS '占比'FROM `表3销售情况`GROUP BY `性别`ORDER BY COUNT(*) DESC;
从以上可以看出,该电商平台在销售母婴产品方面主要面向的消费人群是0-1岁婴儿,其次是1-2岁,第三是未出生的。在性别占比方面,男性与女性的比例相差不大,但男性儿童占比51.26%略高于女性儿童46.01%,说明有超过一半比例的消费者在该平台购买男性儿童使用的母婴产品。SELECT `产品大类`,SUM(CASE WHEN `性别`='男' THEN 1 ELSE 0 END) AS '男',SUM(CASE WHEN `性别`='女' THEN 1 ELSE 0 END) AS '女',SUM(CASE WHEN `性别`='未知' THEN 1 ELSE 0 END) AS '未知'FROM `表3销售情况`GROUP BY `产品大类`ORDER BY SUM(CASE WHEN `性别`='男' THEN 1 ELSE 0 END) DESC;
从查询结果可以看出,消费者主要偏好购买50008168大类产品,50008168和50014815以男婴儿居多,剩余的大类男女占比旗鼓相当,无较大差距。SELECT `产品大类`,SUM(`购买数量`) AS `购买数量`,COUNT(`产品中类`) AS '竞品数目',CONCAT(ROUND(COUNT(`产品中类`)/SUM(`购买数量`)*100,1),'%') AS '竞品饱和度'FROM `表1购买商品`GROUP BY `产品大类`ORDER BY SUM(`购买数量`) DESC;
该电商平台销售母婴用品有六大类别。其中ID28大类销量最高,但竞品数目相对缺乏,饱和度低;与之情况相同的ID50014815,销量次于ID28,说明这两大类产品有潜在市场空间,可在产品布局上适当增加对应类目其他竞品,以达到销售最大化。ID50008168大类销量位居第三,而竞品饱和度相对较高,该类目下竞品数量充足,消费者可供选择产品的余地更多,这也说明了以上消费者为什么主要偏好购买50008168了。而剩余的三大类产品,销量越低竞品饱和度则越高,说明商家在经营策略上有待考量。SELECT `产品中类`,SUM(CASE WHEN `性别`='男' THEN `购买数量` ELSE 0 END) AS '男',SUM(CASE WHEN `性别`='女' THEN `购买数量` ELSE 0 END) AS '女',SUM(CASE WHEN `性别`='未知' THEN `购买数量` ELSE 0 END) AS '未知',SUM(CASE WHEN `性别`='男' THEN `购买数量` ELSE 0 END)+SUM(CASE WHEN `性别`='女' THEN `购买数量` ELSE 0 END) +SUM(CASE WHEN `性别`='未知' THEN `购买数量` ELSE 0 END) AS '总计'FROM `表3销售情况`WHERE `产品大类`='50008168'GROUP BY `产品中类`ORDER BY SUM(CASE WHEN `性别`='男' THEN `购买数量` ELSE 0 END)+SUM(CASE WHEN `性别`='女' THEN `购买数量` ELSE 0 END) +SUM(CASE WHEN `性别`='未知' THEN `购买数量` ELSE 0 END) DESC;
以上结果可知,总体来说该大类下的TOP15每一竞品对应的男女购买人数相差不大,说明产品能满足大部分用户需求。SELECT `产品中类`,SUM(CASE WHEN `性别`='男' THEN `购买数量` ELSE 0 END) AS '男',SUM(CASE WHEN `性别`='女' THEN `购买数量` ELSE 0 END) AS '女',SUM(CASE WHEN `性别`='未知' THEN `购买数量` ELSE 0 END) AS '未知',SUM(CASE WHEN `性别`='男' THEN `购买数量` ELSE 0 END)+SUM(CASE WHEN `性别`='女' THEN `购买数量` ELSE 0 END) +SUM(CASE WHEN `性别`='未知' THEN `购买数量` ELSE 0 END) AS '总计'FROM `表3销售情况`WHERE `产品大类`='28'GROUP BY `产品中类`ORDER BY SUM(CASE WHEN `性别`='男' THEN `购买数量` ELSE 0 END)+SUM(CASE WHEN `性别`='女' THEN `购买数量` ELSE 0 END) +SUM(CASE WHEN `性别`='未知' THEN `购买数量` ELSE 0 END) DESC;
从查询结果发现如下问题:在大类ID28下,销量较为可观的TOP3产品分别是50013187、50011993、250822,消费群体以男婴儿为主,用户产品可选择的余地较少。SELECT `产品中类`,SUM(CASE WHEN `性别`='男' THEN `购买数量` ELSE 0 END) AS '男',SUM(CASE WHEN `性别`='女' THEN `购买数量` ELSE 0 END) AS '女',SUM(CASE WHEN `性别`='未知' THEN `购买数量` ELSE 0 END) AS '未知',SUM(CASE WHEN `性别`='男' THEN `购买数量` ELSE 0 END)+SUM(CASE WHEN `性别`='女' THEN `购买数量` ELSE 0 END) +SUM(CASE WHEN `性别`='未知' THEN `购买数量` ELSE 0 END) AS '总计'FROM `表3销售情况`WHERE `产品大类`='50014815'GROUP BY `产品中类`ORDER BY SUM(CASE WHEN `性别`='男' THEN `购买数量` ELSE 0 END)+SUM(CASE WHEN `性别`='女' THEN `购买数量` ELSE 0 END) +SUM(CASE WHEN `性别`='未知' THEN `购买数量` ELSE 0 END) DESC;
从以上图表可以知道,在同一类别所有产品中,只有50018831最为畅销,其余商品销量低迷,说明消费者选择单一,暂且说明该商品能满足消费者的需求和爱好。SELECT DATE_FORMAT(`购买时间`,'20%y') AS 'yr',SUM(CASE WHEN DATE_FORMAT(`购买时间`,'%m-%d')>='01-01' AND DATE_FORMAT(`购买时间`,'%m-%d')<='03-31' THEN `购买数量` ELSE 0 END) AS '第一季度',SUM(CASE WHEN DATE_FORMAT(`购买时间`,'%m-%d')>='04-01' AND DATE_FORMAT(`购买时间`,'%m-%d')<='06-30' THEN `购买数量` ELSE 0 END) AS '第二季度',SUM(CASE WHEN DATE_FORMAT(`购买时间`,'%m-%d')>='07-01' AND DATE_FORMAT(`购买时间`,'%m-%d')<='09-30' THEN `购买数量` ELSE 0 END) AS '第三季度',SUM(CASE WHEN DATE_FORMAT(`购买时间`,'%m-%d')>='10-01' AND DATE_FORMAT(`购买时间`,'%m-%d')<='12-31' THEN `购买数量` ELSE 0 END) AS '第四季度',(SUM(CASE WHEN DATE_FORMAT(`购买时间`,'%m-%d')>='01-01' AND DATE_FORMAT(`购买时间`,'%m-%d')<='03-31' THEN `购买数量` ELSE 0 END)+SUM(CASE WHEN DATE_FORMAT(`购买时间`,'%m-%d')>='04-01' AND DATE_FORMAT(`购买时间`,'%m-%d')<='06-30' THEN `购买数量` ELSE 0 END)+SUM(CASE WHEN DATE_FORMAT(`购买时间`,'%m-%d')>='07-01' AND DATE_FORMAT(`购买时间`,'%m-%d')<='09-30' THEN `购买数量` ELSE 0 END)+SUM(CASE WHEN DATE_FORMAT(`购买时间`,'%m-%d')>='10-01' AND DATE_FORMAT(`购买时间`,'%m-%d')<='12-31' THEN `购买数量` ELSE 0 END)) AS '总计'FROM `表1购买商品`GROUP BY DATE_FORMAT(`购买时间`,'20%y')ORDER BY DATE_FORMAT(`购买时间`,'20%y');
SELECT SUM(`表1购买商品`.`购买数量`)/COUNT(`表3销售情况`.`用户ID`) AS '连带率'FROM `表1购买商品` JOIN `表3销售情况` ON `表1购买商品`.`用户ID`=`表3销售情况`.`用户ID`
从查询结果得出顾客连带率为1.61,说明交易中顾客平均买走2件商品。因为这里没有可对比的对象,所以无法判断连带率高低。若在此基础上提高连带率,我们可以从商品搭配、联合推荐角度出发,提高商品间的关联度。Sub test()Set d = CreateObject("scripting.dictionary")For i = 2 To Cells(Rows.Count, 3).End(xlUp).Row If Cells(i, Columns.Count).End(xlToLeft).Column - 3 = 0 Then d(Rng) = d(Rng) + 1 Else arr = Range(Cells(i, 3), Cells(i, Cells(i, Columns.Count).End(xlToLeft).Column)) For Each Rng In arr d(Rng) = d(Rng) + 1 Next End IfNext[A1].Resize(d.Count) = Application.Transpose(d.Keys)[B1].Resize(d.Count) = Application.Transpose(d.Items)End Sub
结论:排名前三产品属性分别是1628665:3233938、1628665:29790、1628665:3233939,但数据集并没有对这属性有详细说明,所以在产品布局、上新、品类拓展等方面依据主要商品属性规划。关键词:数据,分析,用品