【大数据_数仓架构-06_统计每天、每个省份、每个城市、鲜花品类、玫瑰花 下了多少笔订单,多少交易额,从ODS层一直建设到ADS层】
● 端到端建设:每日各省市鲜花-玫瑰订单数与交易额
业务需求拆解:
- 统计周期:每天
- 维度:省份、城市、品类(鲜花)、子品类(玫瑰花)
- 指标:下单笔数、交易额
- 粒度:日 × 省 × 市 × 类目 × 商品(玫瑰)
一、整体链路图
源系统 ODS DIM/DWD DWS ADS
───────── ───────────── ──────────────── ────────────── ──────────────
订单库 ─┐
├─→ ods_trade_order_di
├─→ ods_trade_order_item_di ─→ dwd_trade_order_pay_di
商品库 ─┘ (打宽:含类目、品牌、地区)
│
商品库 ──→ ods_item_info_df ──┐ │
类目库 ──→ ods_item_cate_df ├→ dim_item_df ─┘
│ ▼
地区库 ──→ ods_region_df ─────→ dim_region_df dws_cate_region_trade_1d ──→ ads_flower_rose_geo_1d
(类目+地区+日 汇总) (玫瑰花地域看板)
用户库 ──→ ods_member_user_df ─→ dim_member_user_df
二、ODS 层(贴源层)
按业务系统组织,原样落地。
- ods_trade_order_di(订单主表 - 增量)
CREATE TABLE ods_trade_order_di (
order_id BIGINT COMMENT ‘订单ID’,
user_id BIGINT COMMENT ‘用户ID’,
shop_id BIGINT COMMENT ‘店铺ID’,
order_status STRING COMMENT ‘订单状态’,
pay_status STRING COMMENT ‘支付状态:1已支付 0未支付’,
order_amount DECIMAL(16,2) COMMENT ‘订单金额’,
pay_amount DECIMAL(16,2) COMMENT ‘实付金额’,
receiver_province_code STRING COMMENT ‘收货省编码’,
receiver_city_code STRING COMMENT ‘收货市编码’,
order_create_time TIMESTAMP COMMENT ‘下单时间’,
pay_time TIMESTAMP COMMENT ‘支付时间’,
is_test INT COMMENT ‘是否测试单’,
etl_time TIMESTAMP
) PARTITIONED BY (dt STRING)
STORED AS ORC;
- ods_trade_order_item_di(订单商品明细 - 增量)
CREATE TABLE ods_trade_order_item_di (
order_item_id BIGINT,
order_id BIGINT,
item_id BIGINT COMMENT ‘商品ID’,
sku_id BIGINT,
item_title STRING,
buy_qty INT COMMENT ‘购买数量’,
item_price DECIMAL(16,2),
item_amount DECIMAL(16,2) COMMENT ‘商品行金额’,
etl_time TIMESTAMP
) PARTITIONED BY (dt STRING);
- ods_item_info_df(商品维 - 全量)
CREATE TABLE ods_item_info_df (
item_id BIGINT,
item_title STRING,
cate_id BIGINT COMMENT ‘叶子类目ID’,
brand_id BIGINT,
shop_id BIGINT,
item_status STRING,
etl_time TIMESTAMP
) PARTITIONED BY (dt STRING);
- ods_item_cate_df(类目维 - 全量)
CREATE TABLE ods_item_cate_df (
cate_id BIGINT,
cate_name STRING COMMENT ‘类目名:玫瑰花/百合/…’,
parent_cate_id BIGINT,
cate_level INT COMMENT ‘类目层级’,
cate_level1_id BIGINT COMMENT ‘一级类目ID’,
cate_level1_name STRING COMMENT ‘一级类目名:鲜花/绿植/…’,
cate_level2_id BIGINT,
cate_level2_name STRING,
is_leaf INT,
etl_time TIMESTAMP
) PARTITIONED BY (dt STRING);
- ods_region_df(地区维 - 全量)
CREATE TABLE ods_region_df (
region_code STRING COMMENT ‘地区编码’,
region_name STRING,
region_level INT COMMENT ‘1省 2市 3区’,
parent_code STRING,
province_code STRING,
province_name STRING,
city_code STRING,
city_name STRING,
etl_time TIMESTAMP
) PARTITIONED BY (dt STRING);
三、DIM 维度层
- dim_item_df(商品维表,打通类目层级)
CREATE TABLE dim_item_df (
item_id BIGINT,
item_title STRING,
shop_id BIGINT,
brand_id BIGINT,
– 类目层级展平
cate_id BIGINT COMMENT ‘叶子类目’,
cate_name STRING COMMENT ‘叶子类目名(如:玫瑰花)’,
cate_level1_id BIGINT,
cate_level1_name STRING COMMENT ‘一级类目(如:鲜花)’,
cate_level2_id BIGINT,
cate_level2_name STRING,
item_status STRING
) PARTITIONED BY (dt STRING);
– 加工逻辑
INSERT OVERWRITE TABLE dim_item_df PARTITION (dt=‘bizdate′)SELECTi.itemid,i.itemtitle,i.shopid,i.brandid,i.cateid,c.catename,c.catelevel1id,c.catelevel1name,c.catelevel2id,c.catelevel2name,i.itemstatusFROModsiteminfodfiLEFTJOINodsitemcatedfcONi.cateid=c.cateidANDc.dt=′{bizdate}') SELECT i.item_id, i.item_title, i.shop_id, i.brand_id, i.cate_id, c.cate_name, c.cate_level1_id, c.cate_level1_name, c.cate_level2_id, c.cate_level2_name, i.item_status FROM ods_item_info_df i LEFT JOIN ods_item_cate_df c ON i.cate_id = c.cate_id AND c.dt = 'bizdate′)SELECTi.itemid,i.itemtitle,i.shopid,i.brandid,i.cateid,c.catename,c.catelevel1id,c.catelevel1name,c.catelevel2id,c.catelevel2name,i.itemstatusFROModsiteminfodfiLEFTJOINodsitemcatedfcONi.cateid=c.cateidANDc.dt=′{bizdate}’
WHERE i.dt = ‘${bizdate}’;
- dim_region_df(地区维表)
CREATE TABLE dim_region_df (
city_code STRING,
city_name STRING,
province_code STRING,
province_name STRING,
region_zone STRING COMMENT ‘华北/华东/华南…’
) PARTITIONED BY (dt STRING);
四、DWD 明细数据层
dwd_trade_order_pay_di(支付明细事实表,宽表)
粒度:一行 = 一个订单中的一个商品(order_id + item_id)
CREATE TABLE dwd_trade_order_pay_di (
– 主键
order_id BIGINT,
order_item_id BIGINT,
-- 用户、店铺
user_id BIGINT,
shop_id BIGINT,
-- 商品 + 类目(维度退化)
item_id BIGINT,
item_title STRING,
brand_id BIGINT,
cate_id BIGINT COMMENT '叶子类目ID',
cate_name STRING COMMENT '叶子类目名(玫瑰花/百合/...)',
cate_level1_id BIGINT COMMENT '一级类目ID',
cate_level1_name STRING COMMENT '一级类目名(鲜花/绿植/...)',
-- 收货地区(维度退化)
province_code STRING,
province_name STRING,
city_code STRING,
city_name STRING,
-- 度量
buy_qty INT,
item_price DECIMAL(16,2),
item_amount DECIMAL(16,2) COMMENT '该商品行的实付金额',
-- 时间
pay_time TIMESTAMP,
-- 标识
is_valid_order INT COMMENT '1有效 0无效(测试单/退款单等)'
) PARTITIONED BY (dt STRING)
STORED AS ORC;
– 加工逻辑
INSERT OVERWRITE TABLE dwd_trade_order_pay_di PARTITION (dt=‘${bizdate}’)
SELECT
o.order_id,
oi.order_item_id,
o.user_id,
o.shop_id,
oi.item_id,
oi.item_title,
i.brand_id,
i.cate_id,
i.cate_name,
i.cate_level1_id,
i.cate_level1_name,
o.receiver_province_code AS province_code,
r1.province_name,
o.receiver_city_code AS city_code,
r1.city_name,
oi.buy_qty,
oi.item_price,
oi.item_amount,
o.pay_time,
CASE WHEN o.is_test = 0
AND o.pay_status = '1'
AND o.order_amount > 0
THEN 1 ELSE 0 END AS is_valid_order
FROM ods_trade_order_di o
JOIN ods_trade_order_item_di oi
ON o.order_id = oi.order_id AND oi.dt = ‘bizdate′LEFTJOINdimitemdfiONoi.itemid=i.itemidANDi.dt=′{bizdate}' LEFT JOIN dim_item_df i ON oi.item_id = i.item_id AND i.dt = 'bizdate′LEFTJOINdimitemdfiONoi.itemid=i.itemidANDi.dt=′{bizdate}’
LEFT JOIN dim_region_df r1
ON o.receiver_city_code = r1.city_code AND r1.dt = ‘bizdate′WHEREo.dt=′{bizdate}' WHERE o.dt = 'bizdate′WHEREo.dt=′{bizdate}’
AND o.pay_status = ‘1’; – 只取已支付订单
关键点:
- 把类目层级和地区维度冗余进事实表 → 下游不用再 join
- is_valid_order 在 DWD 落实有效订单口径
五、DWS 汇总数据层
dws_cate_region_trade_1d(类目-地区交易日汇总宽表)
粒度:dt × 省 × 市 × 一级类目 × 叶子类目
通用性:这张表能服务"鲜花-玫瑰看板",也能服务"绿植-多肉"、"鲜花-百合"等任何"类目+地区"分析需求。
CREATE TABLE dws_cate_region_trade_1d (
province_code STRING,
province_name STRING,
city_code STRING,
city_name STRING,
cate_level1_id BIGINT,
cate_level1_name STRING,
cate_id BIGINT COMMENT '叶子类目ID',
cate_name STRING COMMENT '叶子类目名',
-- 原子指标
pay_order_cnt BIGINT COMMENT '支付订单数(去重)',
pay_item_cnt BIGINT COMMENT '支付商品行数',
pay_qty BIGINT COMMENT '支付件数',
pay_amount DECIMAL(18,2) COMMENT '支付金额',
pay_buyer_cnt BIGINT COMMENT '支付买家数(去重)'
) PARTITIONED BY (dt STRING);
– 加工逻辑
INSERT OVERWRITE TABLE dws_cate_region_trade_1d PARTITION (dt=‘${bizdate}’)
SELECT
province_code,
MAX(province_name) AS province_name,
city_code,
MAX(city_name) AS city_name,
cate_level1_id,
MAX(cate_level1_name) AS cate_level1_name,
cate_id,
MAX(cate_name) AS cate_name,
COUNT(DISTINCT order_id) AS pay_order_cnt,
COUNT(order_item_id) AS pay_item_cnt,
SUM(buy_qty) AS pay_qty,
SUM(item_amount) AS pay_amount,
COUNT(DISTINCT user_id) AS pay_buyer_cnt
FROM dwd_trade_order_pay_di
WHERE dt = ‘${bizdate}’
AND is_valid_order = 1
GROUP BY
province_code, city_code,
cate_level1_id, cate_id;
为什么这样设计 DWS:
- 不写死"鲜花"或"玫瑰",保持通用性
- 任何类目维度的地域分析,都能从这一张表查
- 同一张 DWS 可以喂养:玫瑰看板、百合看板、鲜花大盘、绿植大盘…
六、ADS 应用数据层
ads_flower_rose_geo_1d(鲜花-玫瑰地域订单看板)
专属应用:服务运营团队的"玫瑰花地域销售看板"。
CREATE TABLE ads_flower_rose_geo_1d (
dt STRING,
province_code STRING,
province_name STRING,
city_code STRING,
city_name STRING,
-- 核心指标
rose_order_cnt BIGINT COMMENT '玫瑰花下单笔数',
rose_pay_amount DECIMAL(18,2) COMMENT '玫瑰花交易额',
rose_pay_qty BIGINT COMMENT '玫瑰花销售件数',
rose_buyer_cnt BIGINT COMMENT '玫瑰花买家数',
rose_avg_order_value DECIMAL(16,2) COMMENT '玫瑰花客单价',
-- 业务派生指标(带业务规则)
city_rank_in_province INT COMMENT '该城市玫瑰销售在省内排名',
city_rank_nationwide INT COMMENT '该城市玫瑰销售全国排名',
is_top10_city INT COMMENT '是否全国TOP10城市',
-- 占比类指标
rose_amount_share_in_flower DECIMAL(8,4) COMMENT '玫瑰销售额占该城市鲜花的比例',
-- 同环比
rose_pay_amount_yoy DECIMAL(8,4) COMMENT '同比',
rose_pay_amount_dod DECIMAL(8,4) COMMENT '环比'
);
– 加工逻辑
INSERT OVERWRITE TABLE ads_flower_rose_geo_1d
SELECT
‘${bizdate}’ AS dt,
t.province_code,
t.province_name,
t.city_code,
t.city_name,
t.pay_order_cnt AS rose_order_cnt,
t.pay_amount AS rose_pay_amount,
t.pay_qty AS rose_pay_qty,
t.pay_buyer_cnt AS rose_buyer_cnt,
ROUND(t.pay_amount / NULLIF(t.pay_order_cnt, 0), 2) AS rose_avg_order_value,
-- 业务规则:排名
ROW_NUMBER() OVER (
PARTITION BY t.province_code
ORDER BY t.pay_amount DESC
) AS city_rank_in_province,
ROW_NUMBER() OVER (
ORDER BY t.pay_amount DESC
) AS city_rank_nationwide,
-- 业务规则:TOP10 标记
CASE WHEN ROW_NUMBER() OVER (ORDER BY t.pay_amount DESC) <= 10
THEN 1 ELSE 0 END AS is_top10_city,
-- 占比:玫瑰额 / 整个鲜花一级类目额
ROUND(t.pay_amount / NULLIF(f.flower_total_amount, 0), 4)
AS rose_amount_share_in_flower,
-- 同比:与去年同期对比
ROUND((t.pay_amount - y.pay_amount) / NULLIF(y.pay_amount, 0), 4)
AS rose_pay_amount_yoy,
-- 环比:与昨日对比
ROUND((t.pay_amount - d.pay_amount) / NULLIF(d.pay_amount, 0), 4)
AS rose_pay_amount_dod
FROM (
– 当日玫瑰花数据
SELECT * FROM dws_cate_region_trade_1d
WHERE dt = ‘${bizdate}’
AND cate_name = ‘玫瑰花’
AND cate_level1_name = ‘鲜花’
) t
– 关联:同城市同日的鲜花一级类目总额(算占比)
LEFT JOIN (
SELECT province_code, city_code,
SUM(pay_amount) AS flower_total_amount
FROM dws_cate_region_trade_1d
WHERE dt = ‘${bizdate}’
AND cate_level1_name = ‘鲜花’
GROUP BY province_code, city_code
) f ON t.city_code = f.city_code
– 关联:去年同日玫瑰花数据
LEFT JOIN (
SELECT city_code, pay_amount FROM dws_cate_region_trade_1d
WHERE dt = date_sub(‘${bizdate}’, 365)
AND cate_name = ‘玫瑰花’
) y ON t.city_code = y.city_code
– 关联:昨日玫瑰花数据
LEFT JOIN (
SELECT city_code, pay_amount FROM dws_cate_region_trade_1d
WHERE dt = date_sub(‘${bizdate}’, 1)
AND cate_name = ‘玫瑰花’
) d ON t.city_code = d.city_code;
ADS 的特征体现:
- 写死了业务条件:cate_name = ‘玫瑰花’、cate_level1_name = ‘鲜花’
- 包含业务规则:排名、TOP10 判定、占比、同环比
- 直接面向业务方:运营拿到这张表就能直接做看板,不用再算
七、最终消费 SQL(业务方使用)
– 看板查询:每天每省每市玫瑰花下单数与交易额
SELECT
dt,
province_name,
city_name,
rose_order_cnt AS 下单笔数,
rose_pay_amount AS 交易额,
city_rank_nationwide AS 全国排名,
rose_pay_amount_yoy AS 同比
FROM ads_flower_rose_geo_1d
WHERE dt = ‘2026-06-11’
ORDER BY rose_pay_amount DESC;
八、分层职责回顾对照
┌─────┬────────────────────────────────────────────────────────────────────────────────────────────────────┬───────────────────────────────────────────────────────────┐
│ 层 │ 表 │ 干了什么 │
├─────┼────────────────────────────────────────────────────────────────────────────────────────────────────┼───────────────────────────────────────────────────────────┤
│ ODS │ ods_trade_order_di / ods_trade_order_item_di / ods_item_info_df / ods_item_cate_df / ods_region_df │ 各源系统原样落地 │
├─────┼────────────────────────────────────────────────────────────────────────────────────────────────────┼───────────────────────────────────────────────────────────┤
│ DIM │ dim_item_df / dim_region_df │ 把类目层级、省市层级打平成一致性维表 │
├─────┼────────────────────────────────────────────────────────────────────────────────────────────────────┼───────────────────────────────────────────────────────────┤
│ DWD │ dwd_trade_order_pay_di │ 订单+商品+类目+地区打宽,过滤无效订单,统一有效订单口径 │
├─────┼────────────────────────────────────────────────────────────────────────────────────────────────────┼───────────────────────────────────────────────────────────┤
│ DWS │ dws_cate_region_trade_1d │ 按"日×地区×类目"通用聚合,不写死业务条件 │
├─────┼────────────────────────────────────────────────────────────────────────────────────────────────────┼───────────────────────────────────────────────────────────┤
│ ADS │ ads_flower_rose_geo_1d │ 锁定"鲜花-玫瑰",加排名/同环比/占比等业务规则,直接喂看板 │
└─────┴────────────────────────────────────────────────────────────────────────────────────────────────────┴───────────────────────────────────────────────────────────┘
九、关键设计取舍
- 为什么 DWS 不直接做"玫瑰花"汇总?
❌ 反例:建一张 dws_rose_region_1d 只算玫瑰花
- 如果要看"百合",又得建一张
- 100 个叶子类目 → 100 张 DWS 表
✅ 正解:dws_cate_region_trade_1d 保留 cate_id 维度
- 任何类目都能从这一张表分析
- ADS 层再 WHERE cate_name = ‘xxx’ 过滤
- 为什么 DWD 要做维度退化?
如果 DWD 只有 cate_id,DWS 每次都要 join dim_item_df 拿 cate_name —— 浪费算力且容易因维表更新导致历史数据漂移。冗余进 DWD,一次到位。
- 同环比为什么放 ADS 不放 DWS?
同环比是应用计算,不同业务方对"同比"定义不一样(去年同日?同比周?同比月?)。DWS 只保留绝对值,ADS 按业务定义算同环比。
一句话总结
▎ 这条链路核心思想:ODS 贴源 → DWD 打宽统一口径 → DWS 通用聚合 → ADS 业务定制。
▎ "玫瑰花"这三个字直到 ADS 层才出现 —— 越往上越通用,越往下越业务,这才是分层数仓的精髓。
更多推荐



所有评论(0)