iBrokeTheCode commited on
Commit
a312419
·
1 Parent(s): f0142a5

chore: Add SQL scripts for revenue and delivery analysis

Browse files
sql/.gitkeep DELETED
File without changes
sql/delivery_date_difference.sql ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -- Calculates the average difference in days between the estimated delivery date and the actual delivery date for all orders that have been delivered.
2
+ --
3
+ -- Explanation step by step:
4
+ -- 1. Select the customer state and the average difference in days between the estimated delivery date and the actual delivery date for all orders that have been delivered.
5
+ -- 2. Join the olist_orders table with the olist_customers table on the customer_id column.
6
+ -- 3. Filter the results to only include orders that have been delivered and have an actual delivery date.
7
+ -- 4. Group the results by the customer state.
8
+ -- 5. Order the results by the average difference in days between the estimated delivery date and the actual delivery date.
9
+ SELECT
10
+ oc.customer_state AS State,
11
+ CAST(
12
+ AVG(
13
+ julianday (
14
+ STRFTIME ('%Y-%m-%d', oo.order_estimated_delivery_date)
15
+ ) - julianday (
16
+ STRFTIME ('%Y-%m-%d', oo.order_delivered_customer_date)
17
+ )
18
+ ) AS INTEGER
19
+ ) AS Delivery_Difference
20
+ FROM
21
+ olist_orders oo
22
+ JOIN olist_customers oc ON oo.customer_id = oc.customer_id
23
+ WHERE
24
+ oo.order_status = 'delivered'
25
+ AND oo.order_delivered_customer_date IS NOT NULL
26
+ GROUP BY
27
+ oc.customer_state
28
+ ORDER BY
29
+ Delivery_Difference ASC;
sql/get_freight_value_weight_relationship.sql ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -- Calculates the total freight value and the total weight of the products in each order where the order status is 'delivered'.
2
+ --
3
+ -- Explanation step by step:
4
+ -- 1. Select the order ID, the total freight value, and the total weight of the products in each order where the order status is 'delivered'.
5
+ -- 2. Join the olist_orders table with the olist_order_items table on the order_id column.
6
+ -- 3. Join the olist_order_items table with the olist_products table on the product_id column.
7
+ -- 4. Filter the results to only include orders where the order status is 'delivered'.
8
+ -- 5. Group the results by the order ID.
9
+ -- 6. Order the results by the order ID.
10
+ SELECT
11
+ ooi.order_id,
12
+ SUM(ooi.freight_value) AS freight_value,
13
+ SUM(op.product_weight_g) AS product_weight_g
14
+ FROM
15
+ olist_orders o
16
+ JOIN olist_order_items ooi ON o.order_id = ooi.order_id
17
+ JOIN olist_products op ON ooi.product_id = op.product_id
18
+ WHERE
19
+ o.order_status = 'delivered'
20
+ GROUP BY
21
+ ooi.order_id
22
+ ORDER BY
23
+ ooi.order_id
sql/global_amount_order_status.sql ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -- Calculates the amount of orders for each order status.
2
+ --
3
+ -- Explanation step by step:
4
+ -- 1. Select the order status and the amount of orders for each order status.
5
+ -- 2. Join the olist_orders table with the olist_order_items table on the order_id column.
6
+ -- 3. Group the results by the order status.
7
+ SELECT
8
+ oo.order_status,
9
+ COUNT(oo.order_status) AS Ammount
10
+ FROM
11
+ olist_orders oo
12
+ GROUP BY
13
+ oo.order_status;
sql/orders_per_day_and_holidays_2017.sql ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -- Calculates the number of orders per day and whether each day is a holiday.
2
+ --
3
+ -- Explanation step by step:
4
+ -- 1. Select the number of orders per day and whether each day is a holiday.
5
+ -- 2. Join the olist_orders table with the public_holidays table on the date column.
6
+ -- 3. Filter the results to only include orders from 2017.
7
+ -- 4. Group the results by the date.
8
+ -- 5. Order the results by the date.
9
+ SELECT
10
+ COUNT(o.order_id) AS order_count,
11
+ CAST(
12
+ STRFTIME ('%s', DATE(o.order_purchase_timestamp)) AS INTEGER
13
+ ) * 1000 AS date,
14
+ CASE
15
+ WHEN DATE(h.date) IS NOT NULL THEN 'true'
16
+ ELSE 'false'
17
+ END AS holiday
18
+ FROM
19
+ olist_orders o
20
+ LEFT JOIN public_holidays h ON DATE(o.order_purchase_timestamp) = DATE(h.date)
21
+ WHERE
22
+ STRFTIME ('%Y', o.order_purchase_timestamp) = '2017'
23
+ GROUP BY
24
+ DATE(o.order_purchase_timestamp)
25
+ ORDER BY
26
+ DATE(o.order_purchase_timestamp);
sql/real_vs_estimated_delivered_time.sql ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -- Calculate the real and estimated delivery time for each month
2
+ --
3
+ -- It will have different columns:
4
+ -- 1. month_no, with the month numbers going from 01 to 12
5
+ -- 2. month, with the 3 first letters of each month (e.g. Jan, Feb)
6
+ -- 3. Year2016_real_time, with the average delivery time per month of 2016 (NaN if it doesn't exist)
7
+ -- 4. Year2017_real_time, with the average delivery time per month of 2017 (NaN if it doesn't exist)
8
+ -- 5. Year2018_real_time, with the average delivery time per month of 2018 (NaN if it doesn't exist)
9
+ -- 6. Year2016_estimated_time, with the average estimated delivery time per month of 2016 (NaN if it doesn't exist)
10
+ -- 7. Year2017_estimated_time, with the average estimated delivery time per month of 2017 (NaN if it doesn't exist)
11
+ -- 8. Year2018_estimated_time, with the average estimated delivery time per month of 2018 (NaN if it doesn't exist).
12
+ --
13
+ -- Explanation step by step:
14
+ -- 1. Calculate the real and estimated delivery time for each order
15
+ -- 2. Group the data by month
16
+ -- 3. Calculate the average real and estimated delivery time for each month
17
+ WITH
18
+ base AS (
19
+ SELECT
20
+ STRFTIME ('%m', oo.order_purchase_timestamp) AS month_no,
21
+ STRFTIME ('%Y', oo.order_purchase_timestamp) AS year,
22
+ julianday (oo.order_delivered_customer_date) - julianday (oo.order_purchase_timestamp) AS real_time,
23
+ julianday (oo.order_estimated_delivery_date) - julianday (oo.order_purchase_timestamp) AS estimated_time
24
+ FROM
25
+ olist_orders oo
26
+ WHERE
27
+ oo.order_status = 'delivered'
28
+ AND oo.order_delivered_customer_date IS NOT NULL
29
+ ),
30
+ pivot AS (
31
+ SELECT
32
+ b.month_no,
33
+ AVG(
34
+ CASE
35
+ WHEN year = '2016' THEN b.real_time
36
+ END
37
+ ) AS Year2016_real_time,
38
+ AVG(
39
+ CASE
40
+ WHEN year = '2017' THEN b.real_time
41
+ END
42
+ ) AS Year2017_real_time,
43
+ AVG(
44
+ CASE
45
+ WHEN year = '2018' THEN b.real_time
46
+ END
47
+ ) AS Year2018_real_time,
48
+ AVG(
49
+ CASE
50
+ WHEN year = '2016' THEN b.estimated_time
51
+ END
52
+ ) AS Year2016_estimated_time,
53
+ AVG(
54
+ CASE
55
+ WHEN year = '2017' THEN b.estimated_time
56
+ END
57
+ ) AS Year2017_estimated_time,
58
+ AVG(
59
+ CASE
60
+ WHEN year = '2018' THEN b.estimated_time
61
+ END
62
+ ) AS Year2018_estimated_time
63
+ FROM
64
+ base b
65
+ GROUP BY
66
+ month_no
67
+ )
68
+ SELECT
69
+ p.month_no,
70
+ CASE p.month_no
71
+ WHEN '01' THEN 'Jan'
72
+ WHEN '02' THEN 'Feb'
73
+ WHEN '03' THEN 'Mar'
74
+ WHEN '04' THEN 'Apr'
75
+ WHEN '05' THEN 'May'
76
+ WHEN '06' THEN 'Jun'
77
+ WHEN '07' THEN 'Jul'
78
+ WHEN '08' THEN 'Aug'
79
+ WHEN '09' THEN 'Sep'
80
+ WHEN '10' THEN 'Oct'
81
+ WHEN '11' THEN 'Nov'
82
+ WHEN '12' THEN 'Dec'
83
+ END AS month,
84
+ p.Year2016_real_time,
85
+ p.Year2017_real_time,
86
+ p.Year2018_real_time,
87
+ p.Year2016_estimated_time,
88
+ p.Year2017_estimated_time,
89
+ p.Year2018_estimated_time
90
+ FROM
91
+ pivot p
92
+ ORDER BY
93
+ p.month_no;
sql/revenue_by_month_year.sql ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -- Calculates revenue by month and year
2
+ --
3
+ -- It will have different columns:
4
+ -- 1. month_no, with the month numbers going from 01 to 12
5
+ -- 2. month, with the 3 first letters of each month (e.g. Jan, Feb)
6
+ -- 3. Year2016, with the revenue per month of 2016 (0.00 if it doesn't exist)
7
+ -- 4. Year2017, with the revenue per month of 2017 (0.00 if it doesn't exist)
8
+ -- 5. Year2018, with the revenue per month of 2018 (0.00 if it doesn't exist)
9
+ --
10
+ -- Explanation step by step:
11
+ -- 1. Calculate the revenue for each order
12
+ -- 2. Group the data by month
13
+ -- 3. Calculate the average revenue for each month
14
+ WITH
15
+ month_names AS (
16
+ SELECT
17
+ '01' AS month_no,
18
+ 'Jan' AS month
19
+ UNION ALL
20
+ SELECT
21
+ '02',
22
+ 'Feb'
23
+ UNION ALL
24
+ SELECT
25
+ '03',
26
+ 'Mar'
27
+ UNION ALL
28
+ SELECT
29
+ '04',
30
+ 'Apr'
31
+ UNION ALL
32
+ SELECT
33
+ '05',
34
+ 'May'
35
+ UNION ALL
36
+ SELECT
37
+ '06',
38
+ 'Jun'
39
+ UNION ALL
40
+ SELECT
41
+ '07',
42
+ 'Jul'
43
+ UNION ALL
44
+ SELECT
45
+ '08',
46
+ 'Aug'
47
+ UNION ALL
48
+ SELECT
49
+ '09',
50
+ 'Sep'
51
+ UNION ALL
52
+ SELECT
53
+ '10',
54
+ 'Oct'
55
+ UNION ALL
56
+ SELECT
57
+ '11',
58
+ 'Nov'
59
+ UNION ALL
60
+ SELECT
61
+ '12',
62
+ 'Dec'
63
+ ),
64
+ -- Get the minimum payment per order
65
+ min_payments AS (
66
+ SELECT
67
+ oop.order_id,
68
+ MIN(oop.payment_value) AS min_payment
69
+ FROM
70
+ olist_order_payments oop
71
+ GROUP BY
72
+ oop.order_id
73
+ ),
74
+ -- Calculate revenue grouped by year and month
75
+ revenue AS (
76
+ SELECT
77
+ strftime ('%m', oo.order_delivered_customer_date) AS month_no,
78
+ strftime ('%Y', oo.order_delivered_customer_date) AS year,
79
+ SUM(mp.min_payment) AS total_revenue
80
+ FROM
81
+ olist_orders oo
82
+ JOIN min_payments mp ON oo.order_id = mp.order_id
83
+ WHERE
84
+ oo.order_status = 'delivered'
85
+ AND oo.order_delivered_customer_date IS NOT NULL
86
+ AND strftime ('%Y', oo.order_delivered_customer_date) IN ('2016', '2017', '2018')
87
+ GROUP BY
88
+ month_no,
89
+ year
90
+ )
91
+ -- Final Select
92
+ SELECT
93
+ mn.month_no,
94
+ mn.month,
95
+ COALESCE(
96
+ MAX(
97
+ CASE
98
+ WHEN r.year = '2016' THEN r.total_revenue
99
+ END
100
+ ),
101
+ 0.0
102
+ ) AS Year2016,
103
+ COALESCE(
104
+ MAX(
105
+ CASE
106
+ WHEN r.year = '2017' THEN r.total_revenue
107
+ END
108
+ ),
109
+ 0.0
110
+ ) AS Year2017,
111
+ COALESCE(
112
+ MAX(
113
+ CASE
114
+ WHEN r.year = '2018' THEN r.total_revenue
115
+ END
116
+ ),
117
+ 0.0
118
+ ) AS Year2018
119
+ FROM
120
+ month_names mn
121
+ LEFT JOIN revenue r ON mn.month_no = r.month_no
122
+ GROUP BY
123
+ mn.month_no,
124
+ mn.month
125
+ ORDER BY
126
+ mn.month_no;
sql/revenue_per_state.sql ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -- Calculate the revenue per state
2
+ --
3
+ -- It will have different columns:
4
+ -- 1. customer_state, with the state of the customer
5
+ -- 2. Revenue, with the revenue per state
6
+ --
7
+ -- Explanation step by step:
8
+ -- 1. Calculate the revenue for each order
9
+ -- 2. Group the data by state
10
+ -- 3. Calculate the average revenue for each state
11
+ -- 4. Order the data by revenue
12
+ -- 5. Limit the data to the top 10
13
+ SELECT
14
+ oc.customer_state AS customer_state,
15
+ SUM(oop.payment_value) AS Revenue
16
+ FROM
17
+ olist_orders oo
18
+ JOIN olist_customers oc ON oo.customer_id = oc.customer_id
19
+ JOIN olist_order_payments oop ON oop.order_id = oo.order_id
20
+ WHERE
21
+ oo.order_status = 'delivered'
22
+ AND oo.order_delivered_customer_date IS NOT NULL
23
+ GROUP BY
24
+ oc.customer_state
25
+ ORDER BY
26
+ Revenue DESC
27
+ LIMIT
28
+ 10;
sql/top_10_least_revenue_categories.sql ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -- Calculates the top 10 least revenue categories
2
+ --
3
+ -- It will have different columns:
4
+ -- 1. Category, with the category name
5
+ -- 2. Num_order, with the number of orders
6
+ -- 3. Revenue, with the revenue
7
+ --
8
+ -- Explanation step by step:
9
+ -- 1. Calculate the revenue for each order
10
+ -- 2. Group the data by category
11
+ -- 3. Calculate the average revenue for each category
12
+ -- 4. Order the data by revenue
13
+ -- 5. Limit the data to the top 10
14
+ SELECT
15
+ pcnt.product_category_name_english AS Category,
16
+ COUNT(DISTINCT oo.order_id) AS Num_order,
17
+ SUM(p.payment_value) AS Revenue
18
+ FROM
19
+ olist_orders oo
20
+ JOIN olist_order_items ooi ON oo.order_id = ooi.order_id
21
+ JOIN olist_products op ON ooi.product_id = op.product_id
22
+ JOIN product_category_name_translation pcnt ON op.product_category_name = pcnt.product_category_name
23
+ JOIN olist_order_payments p ON oo.order_id = p.order_id
24
+ WHERE
25
+ oo.order_status = 'delivered'
26
+ AND oo.order_delivered_customer_date IS NOT NULL
27
+ AND op.product_category_name IS NOT NULL
28
+ GROUP BY
29
+ Category
30
+ ORDER BY
31
+ Revenue ASC
32
+ LIMIT
33
+ 10;
sql/top_10_revenue_categories.sql ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -- Calculates the top 10 revenue categories
2
+ --
3
+ -- It will have different columns:
4
+ -- 1. Category, with the category name
5
+ -- 2. Num_order, with the number of orders
6
+ -- 3. Revenue, with the revenue
7
+ --
8
+ -- Explanation step by step:
9
+ -- 1. Calculate the revenue for each order
10
+ -- 2. Group the data by category
11
+ -- 3. Calculate the average revenue for each category
12
+ -- 4. Order the data by revenue
13
+ -- 5. Limit the data to the top 10
14
+ SELECT
15
+ pcnt.product_category_name_english AS Category,
16
+ COUNT(DISTINCT oo.order_id) AS Num_order,
17
+ SUM(p.payment_value) AS Revenue
18
+ FROM
19
+ olist_orders oo
20
+ JOIN olist_order_items ooi ON oo.order_id = ooi.order_id
21
+ JOIN olist_products op ON ooi.product_id = op.product_id
22
+ JOIN product_category_name_translation pcnt ON op.product_category_name = pcnt.product_category_name
23
+ JOIN olist_order_payments p ON oo.order_id = p.order_id
24
+ WHERE
25
+ oo.order_status = 'delivered'
26
+ AND oo.order_delivered_customer_date IS NOT NULL
27
+ AND op.product_category_name IS NOT NULL
28
+ GROUP BY
29
+ Category
30
+ ORDER BY
31
+ Revenue DESC
32
+ LIMIT
33
+ 10;