Spaces:

Wewoo
/

ABC

Sleeping

App Files Files Community

Wewoo commited on Dec 8, 2025

Commit

c9903cd

verified ·

1 Parent(s): 7201bc4

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -141

app.py CHANGED Viewed

@@ -1,158 +1,72 @@
-# app.py
-import os
-import joblib
 import numpy as np
-import pandas as pd
 from sklearn.datasets import fetch_california_housing
 from sklearn.ensemble import RandomForestRegressor
-from sklearn.model_selection import train_test_split
-from sklearn.pipeline import Pipeline
-from sklearn.preprocessing import StandardScaler
-import gradio as gr
-MODEL_PATH = "model.joblib"
-def train_and_save_model():
-    # load dataset (scikit-learn sẽ tự tải nếu chưa có)
-    cal = fetch_california_housing(as_frame=True)
-    X = cal.data
-    y = cal.target  # median house value (in 100k$)
-    # quick train-test split
-    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.15, random_state=42)
-    # pipeline: scaler + random forest
-    pipe = Pipeline([
-        ("scaler", StandardScaler()),
-        ("rf", RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1))
-    ])
-    pipe.fit(X_train, y_train)
-    joblib.dump(pipe, MODEL_PATH)
-    return pipe
-def load_model():
-    if os.path.exists(MODEL_PATH):
-        return joblib.load(MODEL_PATH)
-    else:
-        return train_and_save_model()
-model = load_model()
-FEATURE_NAMES = [
-    "MedInc",    # median income in block
-    "HouseAge",  # median house age in block
-    "AveRooms",  # average rooms
-    "AveBedrms", # average bedrooms
-    "Population",
-    "AveOccup",  # average occupants per household
-    "Latitude",
-    "Longitude"
-]
-def predict_from_inputs(medinc, houseage, averooms, avebedrms, population, aveoccup, lat, long):
-    # construct dataframe with same order as dataset
-    x = pd.DataFrame([[
-        medinc, houseage, averooms, avebedrms, population, aveoccup, lat, long
-    ]], columns=FEATURE_NAMES)
-    pred = model.predict(x)[0]  # value in 100k$ units
-    usd = pred * 100000
-    return f"Estimated median house value: {pred:.3f} (×100k$) → ${usd:,.0f}"
-# A friendly "chat-like" textarea where user can paste a CSV row or natural description:
-def predict_from_text(text_input):
-    """
-    Accepts either:
-      - comma-separated numeric values in the FEATURE_NAMES order, OR
-      - simple natural-language like "income=3.5 age=20 rooms=5 beds=1 population=1000 occ=2 lat=34 long=-118"
-    This parser is permissive; if parsing fails it falls back to average values.
-    """
-    # try CSV parse first
-    vals = None
-    try:
-        parts = [p.strip() for p in text_input.split(",")]
-        if len(parts) == len(FEATURE_NAMES):
-            vals = [float(p) for p in parts]
-    except Exception:
-        vals = None
-    if vals is None:
-        # try key=value pairs
-        try:
-            tok = {}
-            for piece in text_input.replace(",", " ").split():
-                if "=" in piece:
-                    k,v = piece.split("=",1)
-                    tok[k.lower()] = float(v)
-            # map known keys to features
-            mapping = {
-                "medinc":"MedInc","income":"MedInc",
-                "houseage":"HouseAge","age":"HouseAge",
-                "averooms":"AveRooms","rooms":"AveRooms",
-                "avebedrms":"AveBedrms","beds":"AveBedrms","bedrooms":"AveBedrms",
-                "population":"Population","pop":"Population",
-                "aveoccup":"AveOccup","occup":"AveOccup","occ":"AveOccup",
-                "lat":"Latitude","latitude":"Latitude",
-                "long":"Longitude","lon":"Longitude","lng":"Longitude"
-            }
-            vals = []
-            # use dataset mean when not provided
-            df_sample = fetch_california_housing(as_frame=True).data
-            means = df_sample.mean().to_dict()
-            for f in FEATURE_NAMES:
-                # find mapping key if exists
-                found = None
-                for k,v in mapping.items():
-                    if v == f and k in tok:
-                        found = tok[k]
-                        break
-                if found is None:
-                    vals.append(float(means[f]))
-                else:
-                    vals.append(float(found))
-        except Exception:
-            vals = None
-    if vals is None:
-        # fallback: use dataset means
-        df_sample = fetch_california_housing(as_frame=True).data
-        vals = df_sample.mean().tolist()
-    x = pd.DataFrame([vals], columns=FEATURE_NAMES)
-    pred = model.predict(x)[0]
-    usd = pred * 100000
-    return f"Estimated median house value: {pred:.3f} (×100k$) → ${usd:,.0f}"
-# Build Gradio UI
-with gr.Blocks(title="HousePriceAI - demo") as demo:
-    gr.Markdown("## HousePriceAI — Predict median house value (demo)\nEnter features or paste a CSV row. Model trained quickly on California housing dataset.")
-    with gr.Row():
-        with gr.Column(scale=2):
-            medinc = gr.Number(value=3.0, label="MedInc")
-            houseage = gr.Number(value=30.0, label="HouseAge")
-            averooms = gr.Number(value=5.0, label="AveRooms")
-            avebedrms = gr.Number(value=1.0, label="AveBedrms")
-            population = gr.Number(value=1000, label="Population")
-            aveoccup = gr.Number(value=3.0, label="AveOccup")
-            lat = gr.Number(value=34.0, label="Latitude")
-            long = gr.Number(value=-118.0, label="Longitude")
-            predict_btn = gr.Button("Predict (form)")
-            output = gr.Textbox(label="Prediction")
-        with gr.Column(scale=1):
-            gr.Markdown("### Or paste free text / CSV")
-            text_in = gr.Textbox(lines=6, placeholder="e.g. 3.2, 25, 5.4, 1.1, 1500, 2.5, 34.1, -118.2 OR income=3.2 age=25 ...", label="Text input")
-            predict_text_btn = gr.Button("Predict (text)")
-    predict_btn.click(
-        fn=lambda a,b,c,d,e,f,g,h: predict_from_inputs(a,b,c,d,e,f,g,h),
-        inputs=[medinc, houseage, averooms, avebedrms, population, aveoccup, lat, long],
-        outputs=[output]
-    )
-    predict_text_btn.click(fn=predict_from_text, inputs=[text_in], outputs=[output])
-    gr.Examples(examples=[
-        [3.0, 20, 5.5, 1.1, 1200, 2.8, 34.0, -118.2],
-        ["income=4.5 age=35 rooms=6 beds=1.2 population=800 occ=2.5 lat=37 long=-122"]
-    ], inputs=[medinc, houseage, averooms, avebedrms, population, aveoccup, lat, long])
-    gr.Markdown("Model: RandomForestRegressor trained on scikit-learn California Housing dataset. (This is a demo; for production use a real dataset and careful validation.)")
-if __name__ == "__main__":
-    demo.launch()

+import gradio as gr
 import numpy as np
 from sklearn.datasets import fetch_california_housing
 from sklearn.ensemble import RandomForestRegressor
+# ======================
+# 1. Load data & train model
+# ======================
+data = fetch_california_housing()
+X = data.data
+y = data.target  # đơn vị: 100,000 USD
+model = RandomForestRegressor(n_estimators=100, random_state=42)
+model.fit(X, y)
+# ======================
+# 2. Hàm điều chỉnh theo năm
+# ======================
+BASE_YEAR = 2000
+YEARLY_RATE = 0.04  # 4% / năm
+def adjust_by_year(price, year):
+    years_diff = year - BASE_YEAR
+    adjusted_price = price * ((1 + YEARLY_RATE)**years_diff)
+    return adjusted_price
+# ======================
+# 3. Hàm dự đoán chính
+# ======================
+def predict_price(
+    medinc, houseage, averooms, avebedrms,
+    population, aveoccup, latitude, longitude,
+    year
+):
+    features = np.array([[
+        medinc, houseage, averooms, avebedrms,
+        population, aveoccup, latitude, longitude
+    ]])
+    base_pred = model.predict(features)[0]          # ×100,000$
+    adjusted = adjust_by_year(base_pred, year)      # theo năm
+    usd = adjusted * 100000
+    return f"💰 Giá nhà dự đoán năm {year}: {usd:,.2f} USD"
+# ======================
+# 4. Giao diện Gradio (chỉ dùng form)
+# ======================
+interface = gr.Interface(
+    fn=predict_price,
+    inputs=[
+        gr.Slider(0, 20, value=3, label="MedInc (Thu nhập trung vị)"),
+        gr.Slider(0, 100, value=20, label="HouseAge (Tuổi nhà)"),
+        gr.Slider(1, 10, value=5, label="AveRooms (Số phòng TB)"),
+        gr.Slider(0.5, 5, value=1, label="AveBedrms (Phòng ngủ TB)"),
+        gr.Number(value=800, label="Population (Dân số)"),
+        gr.Slider(1, 10, value=3, label="AveOccup (Số người/hộ TB)"),
+        gr.Number(value=34.05, label="Latitude (Vĩ độ)"),
+        gr.Number(value=-118.24, label="Longitude (Kinh độ)"),
+        gr.Slider(1990, 2050, value=2024, step=1, label="Năm muốn dự đoán")
+    ],
+    outputs="text",
+    title="🏠 AI Dự đoán giá nhà theo năm (California)",
+    description="Mô hình AI + điều chỉnh theo năm (4%/năm). Không sử dụng text input."
+)
+interface.launch()