Wewoo commited on
Commit
23d836e
·
verified ·
1 Parent(s): e1bed7c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +158 -78
app.py CHANGED
@@ -1,78 +1,158 @@
1
- import gradio as gr
2
- import pandas as pd
3
- import joblib
4
- from huggingface_hub import hf_hub_download
5
- from datetime import datetime
6
- import os
7
-
8
- # Load pretrained XGBoost model từ HF Hub
9
- model_path = hf_hub_download(
10
- repo_id="sdaoudi/house-price-regression-xgb",
11
- filename="xgb_model.pkl"
12
- )
13
- model = joblib.load(model_path)
14
-
15
- # Tạo folder lưu lịch sử nếu chưa tồn tại
16
- if not os.path.exists("history"):
17
- os.makedirs("history")
18
-
19
- def predict_price(
20
- bedrooms, bathrooms, sqft_living, sqft_lot, floors,
21
- waterfront, view, condition, grade, yr_built, zipcode, lat, long
22
- ):
23
- data = {
24
- "bedrooms": [bedrooms],
25
- "bathrooms": [bathrooms],
26
- "sqft_living": [sqft_living],
27
- "sqft_lot": [sqft_lot],
28
- "floors": [floors],
29
- "waterfront": [int(waterfront)],
30
- "view": [view],
31
- "condition": [condition],
32
- "grade": [grade],
33
- "yr_built": [yr_built],
34
- "zipcode": [zipcode],
35
- "lat": [lat],
36
- "long": [long]
37
- }
38
-
39
- df = pd.DataFrame(data)
40
- pred = model.predict(df)[0]
41
- result_text = f"💰 Giá nhà dự đoán: {pred:,.0f} USD"
42
-
43
- # Lưu lịch sử dự đoán
44
- df['predicted_price'] = pred
45
- df['timestamp'] = datetime.now()
46
- history_file = "history/predictions.csv"
47
- if os.path.exists(history_file):
48
- df.to_csv(history_file, mode='a', header=False, index=False)
49
- else:
50
- df.to_csv(history_file, index=False)
51
-
52
- return result_text
53
-
54
- # Theme Gradio đẹp + tooltip
55
- interface = gr.Interface(
56
- fn=predict_price,
57
- inputs=[
58
- gr.Number(label="Bedrooms", info="Số phòng ngủ"),
59
- gr.Number(label="Bathrooms", info="Số phòng tắm"),
60
- gr.Number(label="Sqft Living", info="Diện tích sử dụng (sqft)"),
61
- gr.Number(label="Sqft Lot", info="Diện tích mảnh đất (sqft)"),
62
- gr.Number(label="Floors", info="Số tầng của nhà"),
63
- gr.Checkbox(label="Waterfront", info="Nhà view ra nước? (tick nếu có)"),
64
- gr.Number(label="View", info="Chất lượng view (0-4)"),
65
- gr.Number(label="Condition", info="Tình trạng nhà (1-5)"),
66
- gr.Number(label="Grade", info="Chất lượng xây dựng (1-13)"),
67
- gr.Number(label="Year Built", info="Năm xây dựng"),
68
- gr.Number(label="Zipcode", info="Mã bưu chính"),
69
- gr.Number(label="Latitude", info="Vĩ độ"),
70
- gr.Number(label="Longitude", info="Kinh độ"),
71
- ],
72
- outputs="text",
73
- title="🏡 House Price Predictor (Enhanced)",
74
- description="Dự đoán giá nhà bằng XGBoost pretrained từ HuggingFace.\nLưu lịch sử dự đoán để xuất CSV.",
75
- theme="default" # Gradio mới hỗ trợ theme: default, soft, compact, etc.
76
- )
77
-
78
- interface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import os
3
+ import joblib
4
+ import numpy as np
5
+ import pandas as pd
6
+ from sklearn.datasets import fetch_california_housing
7
+ from sklearn.ensemble import RandomForestRegressor
8
+ from sklearn.model_selection import train_test_split
9
+ from sklearn.pipeline import Pipeline
10
+ from sklearn.preprocessing import StandardScaler
11
+ import gradio as gr
12
+
13
+ MODEL_PATH = "model.joblib"
14
+
15
+ def train_and_save_model():
16
+ # load dataset (scikit-learn sẽ tự tải nếu chưa có)
17
+ cal = fetch_california_housing(as_frame=True)
18
+ X = cal.data
19
+ y = cal.target # median house value (in 100k$)
20
+ # quick train-test split
21
+ X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.15, random_state=42)
22
+ # pipeline: scaler + random forest
23
+ pipe = Pipeline([
24
+ ("scaler", StandardScaler()),
25
+ ("rf", RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1))
26
+ ])
27
+ pipe.fit(X_train, y_train)
28
+ joblib.dump(pipe, MODEL_PATH)
29
+ return pipe
30
+
31
+ def load_model():
32
+ if os.path.exists(MODEL_PATH):
33
+ return joblib.load(MODEL_PATH)
34
+ else:
35
+ return train_and_save_model()
36
+
37
+ model = load_model()
38
+
39
+ FEATURE_NAMES = [
40
+ "MedInc", # median income in block
41
+ "HouseAge", # median house age in block
42
+ "AveRooms", # average rooms
43
+ "AveBedrms", # average bedrooms
44
+ "Population",
45
+ "AveOccup", # average occupants per household
46
+ "Latitude",
47
+ "Longitude"
48
+ ]
49
+
50
+ def predict_from_inputs(medinc, houseage, averooms, avebedrms, population, aveoccup, lat, long):
51
+ # construct dataframe with same order as dataset
52
+ x = pd.DataFrame([[
53
+ medinc, houseage, averooms, avebedrms, population, aveoccup, lat, long
54
+ ]], columns=FEATURE_NAMES)
55
+ pred = model.predict(x)[0] # value in 100k$ units
56
+ usd = pred * 100000
57
+ return f"Estimated median house value: {pred:.3f} (×100k$) → ${usd:,.0f}"
58
+
59
+ # A friendly "chat-like" textarea where user can paste a CSV row or natural description:
60
+ def predict_from_text(text_input):
61
+ """
62
+ Accepts either:
63
+ - comma-separated numeric values in the FEATURE_NAMES order, OR
64
+ - simple natural-language like "income=3.5 age=20 rooms=5 beds=1 population=1000 occ=2 lat=34 long=-118"
65
+ This parser is permissive; if parsing fails it falls back to average values.
66
+ """
67
+ # try CSV parse first
68
+ vals = None
69
+ try:
70
+ parts = [p.strip() for p in text_input.split(",")]
71
+ if len(parts) == len(FEATURE_NAMES):
72
+ vals = [float(p) for p in parts]
73
+ except Exception:
74
+ vals = None
75
+
76
+ if vals is None:
77
+ # try key=value pairs
78
+ try:
79
+ tok = {}
80
+ for piece in text_input.replace(",", " ").split():
81
+ if "=" in piece:
82
+ k,v = piece.split("=",1)
83
+ tok[k.lower()] = float(v)
84
+ # map known keys to features
85
+ mapping = {
86
+ "medinc":"MedInc","income":"MedInc",
87
+ "houseage":"HouseAge","age":"HouseAge",
88
+ "averooms":"AveRooms","rooms":"AveRooms",
89
+ "avebedrms":"AveBedrms","beds":"AveBedrms","bedrooms":"AveBedrms",
90
+ "population":"Population","pop":"Population",
91
+ "aveoccup":"AveOccup","occup":"AveOccup","occ":"AveOccup",
92
+ "lat":"Latitude","latitude":"Latitude",
93
+ "long":"Longitude","lon":"Longitude","lng":"Longitude"
94
+ }
95
+ vals = []
96
+ # use dataset mean when not provided
97
+ df_sample = fetch_california_housing(as_frame=True).data
98
+ means = df_sample.mean().to_dict()
99
+ for f in FEATURE_NAMES:
100
+ # find mapping key if exists
101
+ found = None
102
+ for k,v in mapping.items():
103
+ if v == f and k in tok:
104
+ found = tok[k]
105
+ break
106
+ if found is None:
107
+ vals.append(float(means[f]))
108
+ else:
109
+ vals.append(float(found))
110
+ except Exception:
111
+ vals = None
112
+
113
+ if vals is None:
114
+ # fallback: use dataset means
115
+ df_sample = fetch_california_housing(as_frame=True).data
116
+ vals = df_sample.mean().tolist()
117
+
118
+ x = pd.DataFrame([vals], columns=FEATURE_NAMES)
119
+ pred = model.predict(x)[0]
120
+ usd = pred * 100000
121
+ return f"Estimated median house value: {pred:.3f} (×100k$) → ${usd:,.0f}"
122
+
123
+ # Build Gradio UI
124
+ with gr.Blocks(title="HousePriceAI - demo") as demo:
125
+ gr.Markdown("## HousePriceAI — Predict median house value (demo)\nEnter features or paste a CSV row. Model trained quickly on California housing dataset.")
126
+ with gr.Row():
127
+ with gr.Column(scale=2):
128
+ medinc = gr.Number(value=3.0, label="MedInc (median income)")
129
+ houseage = gr.Number(value=30.0, label="HouseAge")
130
+ averooms = gr.Number(value=5.0, label="AveRooms")
131
+ avebedrms = gr.Number(value=1.0, label="AveBedrms")
132
+ population = gr.Number(value=1000, label="Population")
133
+ aveoccup = gr.Number(value=3.0, label="AveOccup")
134
+ lat = gr.Number(value=34.0, label="Latitude")
135
+ long = gr.Number(value=-118.0, label="Longitude")
136
+ predict_btn = gr.Button("Predict (form)")
137
+ output = gr.Textbox(label="Prediction")
138
+ with gr.Column(scale=1):
139
+ gr.Markdown("### Or paste free text / CSV")
140
+ text_in = gr.Textbox(lines=6, placeholder="e.g. 3.2, 25, 5.4, 1.1, 1500, 2.5, 34.1, -118.2 OR income=3.2 age=25 ...", label="Text input")
141
+ predict_text_btn = gr.Button("Predict (text)")
142
+
143
+ predict_btn.click(
144
+ fn=lambda a,b,c,d,e,f,g,h: predict_from_inputs(a,b,c,d,e,f,g,h),
145
+ inputs=[medinc, houseage, averooms, avebedrms, population, aveoccup, lat, long],
146
+ outputs=[output]
147
+ )
148
+ predict_text_btn.click(fn=predict_from_text, inputs=[text_in], outputs=[output])
149
+
150
+ gr.Examples(examples=[
151
+ [3.0, 20, 5.5, 1.1, 1200, 2.8, 34.0, -118.2],
152
+ ["income=4.5 age=35 rooms=6 beds=1.2 population=800 occ=2.5 lat=37 long=-122"]
153
+ ], inputs=[medinc, houseage, averooms, avebedrms, population, aveoccup, lat, long])
154
+
155
+ gr.Markdown("Model: RandomForestRegressor trained on scikit-learn California Housing dataset. (This is a demo; for production use a real dataset and careful validation.)")
156
+
157
+ if __name__ == "__main__":
158
+ demo.launch()