Wewoo commited on
Commit
c9903cd
·
verified ·
1 Parent(s): 7201bc4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -141
app.py CHANGED
@@ -1,158 +1,72 @@
1
- # app.py
2
- import os
3
- import joblib
4
  import numpy as np
5
- import pandas as pd
6
  from sklearn.datasets import fetch_california_housing
7
  from sklearn.ensemble import RandomForestRegressor
8
- from sklearn.model_selection import train_test_split
9
- from sklearn.pipeline import Pipeline
10
- from sklearn.preprocessing import StandardScaler
11
- import gradio as gr
12
-
13
- MODEL_PATH = "model.joblib"
14
 
15
- def train_and_save_model():
16
- # load dataset (scikit-learn sẽ tự tải nếu chưa có)
17
- cal = fetch_california_housing(as_frame=True)
18
- X = cal.data
19
- y = cal.target # median house value (in 100k$)
20
- # quick train-test split
21
- X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.15, random_state=42)
22
- # pipeline: scaler + random forest
23
- pipe = Pipeline([
24
- ("scaler", StandardScaler()),
25
- ("rf", RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1))
26
- ])
27
- pipe.fit(X_train, y_train)
28
- joblib.dump(pipe, MODEL_PATH)
29
- return pipe
30
 
31
- def load_model():
32
- if os.path.exists(MODEL_PATH):
33
- return joblib.load(MODEL_PATH)
34
- else:
35
- return train_and_save_model()
36
 
37
- model = load_model()
 
 
38
 
39
- FEATURE_NAMES = [
40
- "MedInc", # median income in block
41
- "HouseAge", # median house age in block
42
- "AveRooms", # average rooms
43
- "AveBedrms", # average bedrooms
44
- "Population",
45
- "AveOccup", # average occupants per household
46
- "Latitude",
47
- "Longitude"
48
- ]
49
 
50
- def predict_from_inputs(medinc, houseage, averooms, avebedrms, population, aveoccup, lat, long):
51
- # construct dataframe with same order as dataset
52
- x = pd.DataFrame([[
53
- medinc, houseage, averooms, avebedrms, population, aveoccup, lat, long
54
- ]], columns=FEATURE_NAMES)
55
- pred = model.predict(x)[0] # value in 100k$ units
56
- usd = pred * 100000
57
- return f"Estimated median house value: {pred:.3f} (×100k$) → ${usd:,.0f}"
58
 
59
- # A friendly "chat-like" textarea where user can paste a CSV row or natural description:
60
- def predict_from_text(text_input):
61
- """
62
- Accepts either:
63
- - comma-separated numeric values in the FEATURE_NAMES order, OR
64
- - simple natural-language like "income=3.5 age=20 rooms=5 beds=1 population=1000 occ=2 lat=34 long=-118"
65
- This parser is permissive; if parsing fails it falls back to average values.
66
- """
67
- # try CSV parse first
68
- vals = None
69
- try:
70
- parts = [p.strip() for p in text_input.split(",")]
71
- if len(parts) == len(FEATURE_NAMES):
72
- vals = [float(p) for p in parts]
73
- except Exception:
74
- vals = None
75
 
76
- if vals is None:
77
- # try key=value pairs
78
- try:
79
- tok = {}
80
- for piece in text_input.replace(",", " ").split():
81
- if "=" in piece:
82
- k,v = piece.split("=",1)
83
- tok[k.lower()] = float(v)
84
- # map known keys to features
85
- mapping = {
86
- "medinc":"MedInc","income":"MedInc",
87
- "houseage":"HouseAge","age":"HouseAge",
88
- "averooms":"AveRooms","rooms":"AveRooms",
89
- "avebedrms":"AveBedrms","beds":"AveBedrms","bedrooms":"AveBedrms",
90
- "population":"Population","pop":"Population",
91
- "aveoccup":"AveOccup","occup":"AveOccup","occ":"AveOccup",
92
- "lat":"Latitude","latitude":"Latitude",
93
- "long":"Longitude","lon":"Longitude","lng":"Longitude"
94
- }
95
- vals = []
96
- # use dataset mean when not provided
97
- df_sample = fetch_california_housing(as_frame=True).data
98
- means = df_sample.mean().to_dict()
99
- for f in FEATURE_NAMES:
100
- # find mapping key if exists
101
- found = None
102
- for k,v in mapping.items():
103
- if v == f and k in tok:
104
- found = tok[k]
105
- break
106
- if found is None:
107
- vals.append(float(means[f]))
108
- else:
109
- vals.append(float(found))
110
- except Exception:
111
- vals = None
112
 
113
- if vals is None:
114
- # fallback: use dataset means
115
- df_sample = fetch_california_housing(as_frame=True).data
116
- vals = df_sample.mean().tolist()
117
 
118
- x = pd.DataFrame([vals], columns=FEATURE_NAMES)
119
- pred = model.predict(x)[0]
120
- usd = pred * 100000
121
- return f"Estimated median house value: {pred:.3f} (×100k$) → ${usd:,.0f}"
122
 
123
- # Build Gradio UI
124
- with gr.Blocks(title="HousePriceAI - demo") as demo:
125
- gr.Markdown("## HousePriceAI — Predict median house value (demo)\nEnter features or paste a CSV row. Model trained quickly on California housing dataset.")
126
- with gr.Row():
127
- with gr.Column(scale=2):
128
- medinc = gr.Number(value=3.0, label="MedInc")
129
- houseage = gr.Number(value=30.0, label="HouseAge")
130
- averooms = gr.Number(value=5.0, label="AveRooms")
131
- avebedrms = gr.Number(value=1.0, label="AveBedrms")
132
- population = gr.Number(value=1000, label="Population")
133
- aveoccup = gr.Number(value=3.0, label="AveOccup")
134
- lat = gr.Number(value=34.0, label="Latitude")
135
- long = gr.Number(value=-118.0, label="Longitude")
136
- predict_btn = gr.Button("Predict (form)")
137
- output = gr.Textbox(label="Prediction")
138
- with gr.Column(scale=1):
139
- gr.Markdown("### Or paste free text / CSV")
140
- text_in = gr.Textbox(lines=6, placeholder="e.g. 3.2, 25, 5.4, 1.1, 1500, 2.5, 34.1, -118.2 OR income=3.2 age=25 ...", label="Text input")
141
- predict_text_btn = gr.Button("Predict (text)")
142
 
143
- predict_btn.click(
144
- fn=lambda a,b,c,d,e,f,g,h: predict_from_inputs(a,b,c,d,e,f,g,h),
145
- inputs=[medinc, houseage, averooms, avebedrms, population, aveoccup, lat, long],
146
- outputs=[output]
147
- )
148
- predict_text_btn.click(fn=predict_from_text, inputs=[text_in], outputs=[output])
149
 
150
- gr.Examples(examples=[
151
- [3.0, 20, 5.5, 1.1, 1200, 2.8, 34.0, -118.2],
152
- ["income=4.5 age=35 rooms=6 beds=1.2 population=800 occ=2.5 lat=37 long=-122"]
153
- ], inputs=[medinc, houseage, averooms, avebedrms, population, aveoccup, lat, long])
154
 
155
- gr.Markdown("Model: RandomForestRegressor trained on scikit-learn California Housing dataset. (This is a demo; for production use a real dataset and careful validation.)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
- if __name__ == "__main__":
158
- demo.launch()
 
1
+ import gradio as gr
 
 
2
  import numpy as np
 
3
  from sklearn.datasets import fetch_california_housing
4
  from sklearn.ensemble import RandomForestRegressor
 
 
 
 
 
 
5
 
6
+ # ======================
7
+ # 1. Load data & train model
8
+ # ======================
9
+ data = fetch_california_housing()
10
+ X = data.data
11
+ y = data.target # đơn vị: 100,000 USD
 
 
 
 
 
 
 
 
 
12
 
13
+ model = RandomForestRegressor(n_estimators=100, random_state=42)
14
+ model.fit(X, y)
 
 
 
15
 
16
+ # ======================
17
+ # 2. Hàm điều chỉnh theo năm
18
+ # ======================
19
 
20
+ BASE_YEAR = 2000
21
+ YEARLY_RATE = 0.04 # 4% / năm
 
 
 
 
 
 
 
 
22
 
23
+ def adjust_by_year(price, year):
24
+ years_diff = year - BASE_YEAR
25
+ adjusted_price = price * ((1 + YEARLY_RATE)**years_diff)
26
+ return adjusted_price
 
 
 
 
27
 
28
+ # ======================
29
+ # 3. Hàm dự đoán chính
30
+ # ======================
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
+ def predict_price(
33
+ medinc, houseage, averooms, avebedrms,
34
+ population, aveoccup, latitude, longitude,
35
+ year
36
+ ):
37
+ features = np.array([[
38
+ medinc, houseage, averooms, avebedrms,
39
+ population, aveoccup, latitude, longitude
40
+ ]])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
+ base_pred = model.predict(features)[0] # ×100,000$
43
+ adjusted = adjust_by_year(base_pred, year) # theo năm
 
 
44
 
45
+ usd = adjusted * 100000
 
 
 
46
 
47
+ return f"💰 Giá nhà dự đoán năm {year}: {usd:,.2f} USD"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
 
 
 
 
 
 
49
 
50
+ # ======================
51
+ # 4. Giao diện Gradio (chỉ dùng form)
52
+ # ======================
 
53
 
54
+ interface = gr.Interface(
55
+ fn=predict_price,
56
+ inputs=[
57
+ gr.Slider(0, 20, value=3, label="MedInc (Thu nhập trung vị)"),
58
+ gr.Slider(0, 100, value=20, label="HouseAge (Tuổi nhà)"),
59
+ gr.Slider(1, 10, value=5, label="AveRooms (Số phòng TB)"),
60
+ gr.Slider(0.5, 5, value=1, label="AveBedrms (Phòng ngủ TB)"),
61
+ gr.Number(value=800, label="Population (Dân số)"),
62
+ gr.Slider(1, 10, value=3, label="AveOccup (Số người/hộ TB)"),
63
+ gr.Number(value=34.05, label="Latitude (Vĩ độ)"),
64
+ gr.Number(value=-118.24, label="Longitude (Kinh độ)"),
65
+ gr.Slider(1990, 2050, value=2024, step=1, label="Năm muốn dự đoán")
66
+ ],
67
+ outputs="text",
68
+ title="🏠 AI Dự đoán giá nhà theo năm (California)",
69
+ description="Mô hình AI + điều chỉnh theo năm (4%/năm). Không sử dụng text input."
70
+ )
71
 
72
+ interface.launch()