Spaces:
Sleeping
Sleeping
| """ | |
| Apartment Predictor - Conversational Agent (Numeric Model + LLM) | |
| User describes the apartment in natural German. | |
| 1. The LLM extracts rooms, area_m2, town, luxurious, furnished as strict JSON. | |
| 2. A pickled GradientBoostingRegressor predicts the monthly rent in CHF. | |
| 3. The LLM produces a short German explanation including one uncertainty note. | |
| LLM usage is mandatory. There is no rule-based fallback for extraction or | |
| explanation - errors are surfaced so the failure mode stays visible. | |
| """ | |
| import json | |
| import os | |
| import pickle | |
| import gradio as gr | |
| import numpy as np | |
| import pandas as pd | |
| from openai import OpenAI | |
| # ── Config ──────────────────────────────────────────────────────────────────── | |
| BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | |
| MODEL_PATH = os.path.join(BASE_DIR, "model_gbm.pkl") | |
| LOOKUP_PATH = os.path.join(BASE_DIR, "municipality_lookup.csv") | |
| OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") | |
| OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini") | |
| FEATURES = [ | |
| "rooms", "area", "pop", "pop_dens", "frg_pct", "emp", "tax_income", | |
| "room_per_m2", "luxurious", "furnished", "zurich_city", | |
| "distance_to_zurich_center", | |
| ] | |
| # ── Load model & municipality lookup ───────────────────────────────────────── | |
| with open(MODEL_PATH, "rb") as f: | |
| model = pickle.load(f) | |
| municipality_lookup = pd.read_csv(LOOKUP_PATH) | |
| town_to_row = { | |
| str(row["bfs_name"]).lower(): row | |
| for _, row in municipality_lookup.iterrows() | |
| } | |
| valid_towns = sorted(municipality_lookup["bfs_name"].unique().tolist()) | |
| def get_openai_client() -> OpenAI: | |
| if not OPENAI_API_KEY: | |
| raise RuntimeError( | |
| "OPENAI_API_KEY is not set. Add it as a Hugging Face Space secret." | |
| ) | |
| return OpenAI(api_key=OPENAI_API_KEY) | |
| # ── Town matching ──────────────────────────────────────────────────────────── | |
| def match_town(user_town: str): | |
| """Map a free-text town to the canonical bfs_name. Returns None if no match.""" | |
| if not user_town: | |
| return None | |
| key = str(user_town).strip().lower() | |
| if not key: | |
| return None | |
| if key in town_to_row: | |
| return town_to_row[key]["bfs_name"] | |
| for canonical in valid_towns: | |
| if key in canonical.lower() or canonical.lower() in key: | |
| return canonical | |
| return None | |
| # ── LLM helper ─────────────────────────────────────────────────────────────── | |
| def call_llm_json(system_prompt: str, user_prompt: str) -> str: | |
| """Call the LLM and return its raw text. JSON mode keeps the output parseable.""" | |
| client = get_openai_client() | |
| response = client.chat.completions.create( | |
| model=OPENAI_MODEL, | |
| response_format={"type": "json_object"}, | |
| temperature=0, | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_prompt}, | |
| ], | |
| ) | |
| return response.choices[0].message.content or "" | |
| def parse_json_response(raw: str, required_keys: tuple) -> dict: | |
| cleaned = (raw or "").strip() | |
| if not cleaned: | |
| raise ValueError("LLM returned an empty response instead of JSON.") | |
| try: | |
| parsed = json.loads(cleaned) | |
| except json.JSONDecodeError as exc: | |
| raise ValueError( | |
| f"LLM did not return valid JSON. Received: {cleaned[:300]}" | |
| ) from exc | |
| missing = [k for k in required_keys if k not in parsed] | |
| if missing: | |
| raise ValueError( | |
| f"LLM JSON is missing required keys: {', '.join(missing)}." | |
| ) | |
| return parsed | |
| # ── Step 1 - Extract preferences from natural language ────────────────────── | |
| EXTRACTION_SYSTEM_PROMPT = """Du bist ein Extraktionshelfer für eine Schweizer Wohnungs-App. | |
| Lies den deutschen Text und gib AUSSCHLIESSLICH ein JSON-Objekt zurück - kein Markdown, keine Erklärung. | |
| Pflichtfelder: | |
| - "rooms" (Zahl, z.B. 3.5) | |
| - "area_m2" (Zahl in Quadratmetern, z.B. 85) | |
| - "town" (String, Schweizer Gemeindename im Kanton Zürich, z.B. "Winterthur") | |
| Optionale Felder (nur setzen, falls erkennbar - sonst false): | |
| - "luxurious" (true/false: Hinweise wie "luxuriös", "exklusiv", "hochwertig") | |
| - "furnished" (true/false: Hinweise wie "möbliert", "eingerichtet") | |
| Wenn ein Pflichtfeld fehlt, gib für rooms oder area_m2 den Wert null und für town einen leeren String "" zurück. | |
| Antworte immer mit gültigem JSON.""" | |
| def extract_preferences(user_text: str) -> dict: | |
| raw = call_llm_json(EXTRACTION_SYSTEM_PROMPT, user_text) | |
| parsed = parse_json_response(raw, required_keys=("rooms", "area_m2", "town")) | |
| rooms = parsed.get("rooms") | |
| area_m2 = parsed.get("area_m2") | |
| town = parsed.get("town") | |
| if rooms is None or area_m2 is None or not town: | |
| raise ValueError( | |
| f"Unvollständige Extraktion: rooms={rooms}, area_m2={area_m2}, town={town!r}" | |
| ) | |
| canonical_town = match_town(str(town)) | |
| if canonical_town is None: | |
| raise ValueError( | |
| f"Ort '{town}' wurde nicht im Kanton Zürich gefunden. " | |
| f"Bitte eine Gemeinde im Kanton Zürich angeben." | |
| ) | |
| return { | |
| "rooms": float(rooms), | |
| "area_m2": float(area_m2), | |
| "town": canonical_town, | |
| "luxurious": bool(parsed.get("luxurious", False)), | |
| "furnished": bool(parsed.get("furnished", False)), | |
| } | |
| # ── Step 2 - Predict monthly rent ──────────────────────────────────────────── | |
| def predict_apartment_price(preferences: dict) -> float: | |
| rooms = preferences["rooms"] | |
| area_m2 = preferences["area_m2"] | |
| town = preferences["town"] | |
| row = municipality_lookup[municipality_lookup["bfs_name"] == town] | |
| if row.empty: | |
| raise ValueError(f"Gemeinde '{town}' fehlt im Lookup.") | |
| row = row.iloc[0] | |
| room_per_m2 = round(area_m2 / max(rooms, 0.5), 2) | |
| features = { | |
| "rooms": rooms, | |
| "area": area_m2, | |
| "pop": row["pop"], | |
| "pop_dens": row["pop_dens"], | |
| "frg_pct": row["frg_pct"], | |
| "emp": row["emp"], | |
| "tax_income": row["tax_income"], | |
| "room_per_m2": room_per_m2, | |
| "luxurious": int(preferences.get("luxurious", False)), | |
| "furnished": int(preferences.get("furnished", False)), | |
| "zurich_city": int(row["zurich_city"]), | |
| "distance_to_zurich_center": row["distance_to_zurich_center"], | |
| } | |
| X = pd.DataFrame([features])[FEATURES] | |
| return float(np.round(model.predict(X)[0])) | |
| # ── Step 3 - Generate explanation ──────────────────────────────────────────── | |
| EXPLANATION_SYSTEM_PROMPT = """Du erklärst eine Mietpreis-Schätzung aus einem maschinellen Lernmodell auf Deutsch. | |
| Wichtig: | |
| - Berechne KEINEN neuen Preis. Verwende exakt den vorgegebenen Wert. | |
| - Antwort als JSON mit dem Schlüssel "answer". | |
| - 2-4 kurze Sätze. | |
| - Nimm Bezug auf Zimmer, Fläche und Ort des Nutzers. | |
| - Erwähne genau eine Unsicherheit oder Limitation des Modells (z.B. Zustand, Mikrolage, Stockwerk, Renovationsjahr). | |
| - Keine Markdown-Formatierung in der Antwort.""" | |
| def generate_explanation(preferences: dict, prediction: float) -> str: | |
| user_payload = json.dumps( | |
| {"preferences": preferences, "predicted_rent_chf": prediction}, | |
| ensure_ascii=False, | |
| ) | |
| raw = call_llm_json(EXPLANATION_SYSTEM_PROMPT, user_payload) | |
| parsed = parse_json_response(raw, required_keys=("answer",)) | |
| return str(parsed["answer"]).strip() | |
| # ── Step 4 - End-to-end pipeline ───────────────────────────────────────────── | |
| def run_pipeline(user_text: str): | |
| if not user_text or not user_text.strip(): | |
| return ({}, None, "Bitte beschreibe deinen Wohnungswunsch auf Deutsch.") | |
| try: | |
| preferences = extract_preferences(user_text) | |
| prediction = predict_apartment_price(preferences) | |
| answer = generate_explanation(preferences, prediction) | |
| return preferences, prediction, answer | |
| except Exception as exc: | |
| return ({}, None, f"Fehler: {exc}") | |
| # ── Gradio UI ──────────────────────────────────────────────────────────────── | |
| EXAMPLES = [ | |
| ["Ich suche eine 3.5-Zimmer-Wohnung mit etwa 85 m2 in Winterthur."], | |
| ["Ich brauche eine möblierte 2-Zimmer-Wohnung mit 55 m2 in Kloten."], | |
| ["Ich hätte gerne eine luxuriöse 4.5-Zimmer-Wohnung mit 140 m2 in Küsnacht (ZH)."], | |
| ["Ich suche 3 Zimmer und rund 70 m2 in Uster."], | |
| ["Eine 5-Zimmer-Wohnung mit 130 m2 in Zürich wäre ideal."], | |
| ] | |
| with gr.Blocks(title="Wohnungs-Schätzer mit LLM") as demo: | |
| gr.Markdown( | |
| """ | |
| # 🏠 Wohnungs-Schätzer mit LLM | |
| Beschreibe deinen Wohnungswunsch auf **Deutsch**. | |
| Ein Sprachmodell extrahiert Zimmer, Fläche und Ort, ein Gradient-Boosting-Modell | |
| schätzt die monatliche Miete im Kanton Zürich, und das LLM erklärt das Ergebnis. | |
| _Beispiel: "Ich suche eine 3.5-Zimmer-Wohnung mit etwa 85 m² in Winterthur."_ | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| user_text = gr.Textbox( | |
| label="Wohnungswunsch (Deutsch)", | |
| lines=4, | |
| placeholder='z.B. "Ich suche eine 3.5-Zimmer-Wohnung mit 85 m2 in Winterthur."', | |
| ) | |
| submit = gr.Button("Schätzen", variant="primary") | |
| gr.Examples(examples=EXAMPLES, inputs=[user_text], label="Beispiele") | |
| with gr.Column(): | |
| extracted = gr.JSON(label="Extrahierte Eingaben (LLM)") | |
| price = gr.Number(label="Geschätzte Monatsmiete (CHF)", precision=0) | |
| response = gr.Textbox(label="Erklärung (LLM)", lines=6) | |
| submit.click( | |
| fn=run_pipeline, | |
| inputs=[user_text], | |
| outputs=[extracted, price, response], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |