Aghiless commited on
Commit
83ecde8
·
verified ·
1 Parent(s): 6f33c97

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +161 -97
src/streamlit_app.py CHANGED
@@ -1,97 +1,161 @@
1
- Installing packages into ‘/usr/local/lib/R/site-library’
2
- (as ‘lib’ is unspecified)
3
-
4
- also installing the dependency ‘vroom’
5
-
6
-
7
- Warning message in install.packages(c("readr", "dplyr", "ggplot2", "corrplot")):
8
- “installation of package ‘ggplot2’ had non-zero exit status”
9
-
10
- Attaching package: ‘dplyr’
11
-
12
-
13
- The following objects are masked from ‘package:stats’:
14
-
15
- filter, lag
16
-
17
-
18
- The following objects are masked from ‘package:base’:
19
-
20
- intersect, setdiff, setequal, union
21
-
22
-
23
- corrplot 0.95 loaded
24
-
25
- Rows: 52820 Columns: 5
26
- ── Column specification ────────────────────────────────────────────────────────
27
- Delimiter: ","
28
- dbl (5): price, surface, rooms, price_m2, neighborhood_score
29
-
30
- ℹ Use `spec()` to retrieve the full column specification for this data.
31
- Specify the column types or set `show_col_types = FALSE` to quiet this message.
32
- [1] "Dataset loaded successfully"
33
- A tibble: 6 × 5
34
- price surface rooms price_m2 neighborhood_score
35
- <dbl> <dbl> <dbl> <dbl> <dbl>
36
- 121000 69 2 1753.623 6.200
37
- 246000 49 2 5020.408 5.015
38
- 318050 115 6 2765.652 6.350
39
- 163000 42 2 3880.952 6.350
40
- 150000 93 5 1612.903 6.350
41
- 153000 46 2 3326.087 5.935
42
- spc_tbl_ [52,820 × 5] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
43
- $ price : num [1:52820] 121000 246000 318050 163000 150000 ...
44
- $ surface : num [1:52820] 69 49 115 42 93 46 32 83 70 25 ...
45
- $ rooms : num [1:52820] 2 2 6 2 5 2 2 4 3 1 ...
46
- $ price_m2 : num [1:52820] 1754 5020 2766 3881 1613 ...
47
- $ neighborhood_score: num [1:52820] 6.2 5.02 6.35 6.35 6.35 ...
48
- - attr(*, "spec")=
49
- .. cols(
50
- .. price = col_double(),
51
- .. surface = col_double(),
52
- .. rooms = col_double(),
53
- .. price_m2 = col_double(),
54
- .. neighborhood_score = col_double()
55
- .. )
56
- - attr(*, "problems")=<externalptr>
57
- price surface rooms price_m2
58
- Min. : 1 Min. : 1.00 Min. : 0.000 Min. : 0
59
- 1st Qu.: 185000 1st Qu.: 40.00 1st Qu.: 2.000 1st Qu.: 3193
60
- Median : 290000 Median : 62.00 Median : 3.000 Median : 4800
61
- Mean : 405173 Mean : 69.16 Mean : 3.116 Mean : 7332
62
- 3rd Qu.: 464598 3rd Qu.: 88.00 3rd Qu.: 4.000 3rd Qu.: 8341
63
- Max. :2990000 Max. :482.00 Max. :24.000 Max. :666667
64
- neighborhood_score
65
- Min. : 3.885
66
- 1st Qu.: 6.727
67
- Median : 7.780
68
- Mean : 7.870
69
- 3rd Qu.: 9.290
70
- Max. :20.504
71
-
72
-
73
-
74
-
75
-
76
- Call:
77
- lm(formula = price ~ surface + rooms + neighborhood_score, data = train)
78
-
79
- Residuals:
80
- Min 1Q Median 3Q Max
81
- -1815140 -145637 -68531 35243 2767231
82
-
83
- Coefficients:
84
- Estimate Std. Error t value Pr(>|t|)
85
- (Intercept) -467418.10 10358.66 -45.12 <2e-16 ***
86
- surface 4932.35 76.81 64.21 <2e-16 ***
87
- rooms -29954.53 2059.06 -14.55 <2e-16 ***
88
- neighborhood_score 79383.79 1120.02 70.88 <2e-16 ***
89
- ---
90
- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
91
-
92
- Residual standard error: 341000 on 42252 degrees of freedom
93
- Multiple R-squared: 0.2284, Adjusted R-squared: 0.2283
94
- F-statistic: 4169 on 3 and 42252 DF, p-value: < 2.2e-16
95
- [1] "RMSE: 339999.604607665"
96
-
97
- [1] "Model results saved successfully"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+
5
+ # ---------------------------------------------------
6
+ # PAGE CONFIG
7
+ # ---------------------------------------------------
8
+
9
+ st.set_page_config(
10
+ page_title="Île-de-France Housing Price Estimator",
11
+ page_icon="🏠",
12
+ layout="wide"
13
+ )
14
+
15
+ # ---------------------------------------------------
16
+ # LOAD DATA
17
+ # ---------------------------------------------------
18
+
19
+ df = pd.read_csv("housing_analysis_dataset-3.csv")
20
+
21
+ # ---------------------------------------------------
22
+ # MODEL COEFFICIENTS (from R)
23
+ # ---------------------------------------------------
24
+
25
+ INTERCEPT = -467418.10
26
+ COEF_SURFACE = 4932.35
27
+ COEF_ROOMS = -29954.53
28
+ COEF_NEIGHBORHOOD = 79383.79
29
+
30
+
31
+ def predict_price(surface, rooms, neighborhood_score):
32
+
33
+ price = (
34
+ INTERCEPT
35
+ + COEF_SURFACE * surface
36
+ + COEF_ROOMS * rooms
37
+ + COEF_NEIGHBORHOOD * neighborhood_score
38
+ )
39
+
40
+ return price
41
+
42
+
43
+ # ---------------------------------------------------
44
+ # TITLE
45
+ # ---------------------------------------------------
46
+
47
+ st.title("🏠 Île-de-France Housing Price Estimator")
48
+
49
+ st.write(
50
+ """
51
+ This dashboard estimates housing prices using a **linear regression model**
52
+ trained on the **DVF real estate dataset**.
53
+ """
54
+ )
55
+
56
+ # ---------------------------------------------------
57
+ # SIDEBAR INPUTS
58
+ # ---------------------------------------------------
59
+
60
+ st.sidebar.header("Property characteristics")
61
+
62
+ surface = st.sidebar.slider(
63
+ "Surface (m²)",
64
+ 20,
65
+ 300,
66
+ 70
67
+ )
68
+
69
+ rooms = st.sidebar.slider(
70
+ "Number of rooms",
71
+ 1,
72
+ 10,
73
+ 3
74
+ )
75
+
76
+ neighborhood_score = st.sidebar.slider(
77
+ "Neighborhood score",
78
+ 3.0,
79
+ 10.0,
80
+ 7.0
81
+ )
82
+
83
+ # ---------------------------------------------------
84
+ # PREDICTION
85
+ # ---------------------------------------------------
86
+
87
+ if st.sidebar.button("Estimate price"):
88
+
89
+ price = predict_price(surface, rooms, neighborhood_score)
90
+
91
+ st.subheader("Estimated Property Price")
92
+
93
+ st.success(f"{int(price):,} €")
94
+
95
+
96
+ # ---------------------------------------------------
97
+ # DATASET OVERVIEW
98
+ # ---------------------------------------------------
99
+
100
+ st.write("---")
101
+ st.header("Dataset Overview")
102
+
103
+ col1, col2, col3 = st.columns(3)
104
+
105
+ col1.metric("Number of properties", len(df))
106
+ col2.metric("Average price", f"{int(df['price'].mean()):,} €")
107
+ col3.metric("Average price per m²", f"{int(df['price_m2'].mean()):,} €")
108
+
109
+
110
+ # ---------------------------------------------------
111
+ # PRICE DISTRIBUTION
112
+ # ---------------------------------------------------
113
+
114
+ st.write("---")
115
+ st.header("Price Distribution")
116
+
117
+ fig, ax = plt.subplots()
118
+
119
+ ax.hist(df["price"], bins=50)
120
+
121
+ ax.set_xlabel("Price (€)")
122
+ ax.set_ylabel("Number of properties")
123
+
124
+ st.pyplot(fig)
125
+
126
+
127
+ # ---------------------------------------------------
128
+ # PRICE VS SURFACE
129
+ # ---------------------------------------------------
130
+
131
+ st.write("---")
132
+ st.header("Price vs Surface")
133
+
134
+ fig2, ax2 = plt.subplots()
135
+
136
+ ax2.scatter(df["surface"], df["price"], alpha=0.3)
137
+
138
+ ax2.set_xlabel("Surface (m²)")
139
+ ax2.set_ylabel("Price (€)")
140
+
141
+ st.pyplot(fig2)
142
+
143
+
144
+ # ---------------------------------------------------
145
+ # MODEL EXPLANATION
146
+ # ---------------------------------------------------
147
+
148
+ st.write("---")
149
+ st.header("Model")
150
+
151
+ st.write("The prediction is based on the following linear regression model:")
152
+
153
+ st.latex(
154
+ r'''
155
+ Price =
156
+ -467418
157
+ + 4932 \times Surface
158
+ - 29954 \times Rooms
159
+ + 79383 \times NeighborhoodScore
160
+ '''
161
+ )