Spaces:

Wewoo
/

ABC

Sleeping

App Files Files Community

Wewoo commited on Dec 8, 2025

Commit

23d836e

verified ·

1 Parent(s): e1bed7c

Update app.py

Browse files

Files changed (1) hide show

app.py +158 -78

app.py CHANGED Viewed

@@ -1,78 +1,158 @@
-import gradio as gr
-import pandas as pd
-import joblib
-from huggingface_hub import hf_hub_download
-from datetime import datetime
-import os
-# Load pretrained XGBoost model từ HF Hub
-model_path = hf_hub_download(
-    repo_id="sdaoudi/house-price-regression-xgb",
-    filename="xgb_model.pkl"
-)
-model = joblib.load(model_path)
-# Tạo folder lưu lịch sử nếu chưa tồn tại
-if not os.path.exists("history"):
-    os.makedirs("history")
-def predict_price(
-    bedrooms, bathrooms, sqft_living, sqft_lot, floors,
-    waterfront, view, condition, grade, yr_built, zipcode, lat, long
-):
-    data = {
-        "bedrooms": [bedrooms],
-        "bathrooms": [bathrooms],
-        "sqft_living": [sqft_living],
-        "sqft_lot": [sqft_lot],
-        "floors": [floors],
-        "waterfront": [int(waterfront)],
-        "view": [view],
-        "condition": [condition],
-        "grade": [grade],
-        "yr_built": [yr_built],
-        "zipcode": [zipcode],
-        "lat": [lat],
-        "long": [long]
-    }
-    df = pd.DataFrame(data)
-    pred = model.predict(df)[0]
-    result_text = f"💰 Giá nhà dự đoán: {pred:,.0f} USD"
-    # Lưu lịch sử dự đoán
-    df['predicted_price'] = pred
-    df['timestamp'] = datetime.now()
-    history_file = "history/predictions.csv"
-    if os.path.exists(history_file):
-        df.to_csv(history_file, mode='a', header=False, index=False)
-    else:
-        df.to_csv(history_file, index=False)
-    return result_text
-# Theme Gradio đẹp + tooltip
-interface = gr.Interface(
-    fn=predict_price,
-    inputs=[
-        gr.Number(label="Bedrooms", info="Số phòng ngủ"),
-        gr.Number(label="Bathrooms", info="Số phòng tắm"),
-        gr.Number(label="Sqft Living", info="Diện tích sử dụng (sqft)"),
-        gr.Number(label="Sqft Lot", info="Diện tích mảnh đất (sqft)"),
-        gr.Number(label="Floors", info="Số tầng của nhà"),
-        gr.Checkbox(label="Waterfront", info="Nhà có view ra nước? (tick nếu có)"),
-        gr.Number(label="View", info="Chất lượng view (0-4)"),
-        gr.Number(label="Condition", info="Tình trạng nhà (1-5)"),
-        gr.Number(label="Grade", info="Chất lượng xây dựng (1-13)"),
-        gr.Number(label="Year Built", info="Năm xây dựng"),
-        gr.Number(label="Zipcode", info="Mã bưu chính"),
-        gr.Number(label="Latitude", info="Vĩ độ"),
-        gr.Number(label="Longitude", info="Kinh độ"),
-    ],
-    outputs="text",
-    title="🏡 House Price Predictor (Enhanced)",
-    description="Dự đoán giá nhà bằng XGBoost pretrained từ HuggingFace.\nLưu lịch sử dự đoán để xuất CSV.",
-    theme="default"  # Gradio mới hỗ trợ theme: default, soft, compact, etc.
-)
-interface.launch()

+# app.py
+import os
+import joblib
+import numpy as np
+import pandas as pd
+from sklearn.datasets import fetch_california_housing
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.model_selection import train_test_split
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler
+import gradio as gr
+MODEL_PATH = "model.joblib"
+def train_and_save_model():
+    # load dataset (scikit-learn sẽ tự tải nếu chưa có)
+    cal = fetch_california_housing(as_frame=True)
+    X = cal.data
+    y = cal.target  # median house value (in 100k$)
+    # quick train-test split
+    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.15, random_state=42)
+    # pipeline: scaler + random forest
+    pipe = Pipeline([
+        ("scaler", StandardScaler()),
+        ("rf", RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1))
+    ])
+    pipe.fit(X_train, y_train)
+    joblib.dump(pipe, MODEL_PATH)
+    return pipe
+def load_model():
+    if os.path.exists(MODEL_PATH):
+        return joblib.load(MODEL_PATH)
+    else:
+        return train_and_save_model()
+model = load_model()
+FEATURE_NAMES = [
+    "MedInc",    # median income in block
+    "HouseAge",  # median house age in block
+    "AveRooms",  # average rooms
+    "AveBedrms", # average bedrooms
+    "Population",
+    "AveOccup",  # average occupants per household
+    "Latitude",
+    "Longitude"
+]
+def predict_from_inputs(medinc, houseage, averooms, avebedrms, population, aveoccup, lat, long):
+    # construct dataframe with same order as dataset
+    x = pd.DataFrame([[
+        medinc, houseage, averooms, avebedrms, population, aveoccup, lat, long
+    ]], columns=FEATURE_NAMES)
+    pred = model.predict(x)[0]  # value in 100k$ units
+    usd = pred * 100000
+    return f"Estimated median house value: {pred:.3f} (×100k$) → ${usd:,.0f}"
+# A friendly "chat-like" textarea where user can paste a CSV row or natural description:
+def predict_from_text(text_input):
+    """
+    Accepts either:
+      - comma-separated numeric values in the FEATURE_NAMES order, OR
+      - simple natural-language like "income=3.5 age=20 rooms=5 beds=1 population=1000 occ=2 lat=34 long=-118"
+    This parser is permissive; if parsing fails it falls back to average values.
+    """
+    # try CSV parse first
+    vals = None
+    try:
+        parts = [p.strip() for p in text_input.split(",")]
+        if len(parts) == len(FEATURE_NAMES):
+            vals = [float(p) for p in parts]
+    except Exception:
+        vals = None
+    if vals is None:
+        # try key=value pairs
+        try:
+            tok = {}
+            for piece in text_input.replace(",", " ").split():
+                if "=" in piece:
+                    k,v = piece.split("=",1)
+                    tok[k.lower()] = float(v)
+            # map known keys to features
+            mapping = {
+                "medinc":"MedInc","income":"MedInc",
+                "houseage":"HouseAge","age":"HouseAge",
+                "averooms":"AveRooms","rooms":"AveRooms",
+                "avebedrms":"AveBedrms","beds":"AveBedrms","bedrooms":"AveBedrms",
+                "population":"Population","pop":"Population",
+                "aveoccup":"AveOccup","occup":"AveOccup","occ":"AveOccup",
+                "lat":"Latitude","latitude":"Latitude",
+                "long":"Longitude","lon":"Longitude","lng":"Longitude"
+            }
+            vals = []
+            # use dataset mean when not provided
+            df_sample = fetch_california_housing(as_frame=True).data
+            means = df_sample.mean().to_dict()
+            for f in FEATURE_NAMES:
+                # find mapping key if exists
+                found = None
+                for k,v in mapping.items():
+                    if v == f and k in tok:
+                        found = tok[k]
+                        break
+                if found is None:
+                    vals.append(float(means[f]))
+                else:
+                    vals.append(float(found))
+        except Exception:
+            vals = None
+    if vals is None:
+        # fallback: use dataset means
+        df_sample = fetch_california_housing(as_frame=True).data
+        vals = df_sample.mean().tolist()
+    x = pd.DataFrame([vals], columns=FEATURE_NAMES)
+    pred = model.predict(x)[0]
+    usd = pred * 100000
+    return f"Estimated median house value: {pred:.3f} (×100k$) → ${usd:,.0f}"
+# Build Gradio UI
+with gr.Blocks(title="HousePriceAI - demo") as demo:
+    gr.Markdown("## HousePriceAI — Predict median house value (demo)\nEnter features or paste a CSV row. Model trained quickly on California housing dataset.")
+    with gr.Row():
+        with gr.Column(scale=2):
+            medinc = gr.Number(value=3.0, label="MedInc (median income)")
+            houseage = gr.Number(value=30.0, label="HouseAge")
+            averooms = gr.Number(value=5.0, label="AveRooms")
+            avebedrms = gr.Number(value=1.0, label="AveBedrms")
+            population = gr.Number(value=1000, label="Population")
+            aveoccup = gr.Number(value=3.0, label="AveOccup")
+            lat = gr.Number(value=34.0, label="Latitude")
+            long = gr.Number(value=-118.0, label="Longitude")
+            predict_btn = gr.Button("Predict (form)")
+            output = gr.Textbox(label="Prediction")
+        with gr.Column(scale=1):
+            gr.Markdown("### Or paste free text / CSV")
+            text_in = gr.Textbox(lines=6, placeholder="e.g. 3.2, 25, 5.4, 1.1, 1500, 2.5, 34.1, -118.2 OR income=3.2 age=25 ...", label="Text input")
+            predict_text_btn = gr.Button("Predict (text)")
+    predict_btn.click(
+        fn=lambda a,b,c,d,e,f,g,h: predict_from_inputs(a,b,c,d,e,f,g,h),
+        inputs=[medinc, houseage, averooms, avebedrms, population, aveoccup, lat, long],
+        outputs=[output]
+    )
+    predict_text_btn.click(fn=predict_from_text, inputs=[text_in], outputs=[output])
+    gr.Examples(examples=[
+        [3.0, 20, 5.5, 1.1, 1200, 2.8, 34.0, -118.2],
+        ["income=4.5 age=35 rooms=6 beds=1.2 population=800 occ=2.5 lat=37 long=-122"]
+    ], inputs=[medinc, houseage, averooms, avebedrms, population, aveoccup, lat, long])
+    gr.Markdown("Model: RandomForestRegressor trained on scikit-learn California Housing dataset. (This is a demo; for production use a real dataset and careful validation.)")
+if __name__ == "__main__":
+    demo.launch()