Spaces:
Runtime error
Runtime error
| import joblib | |
| import pandas as pd | |
| import numpy as np | |
| import gradio as gr | |
| import geopy.distance | |
| from sklearn.ensemble import RandomForestRegressor | |
| from sklearn.model_selection import train_test_split | |
| # ------------------------------ | |
| # Load Data | |
| # ------------------------------ | |
| DATA_PATH = "apartments_data_enriched_lat_lon_combined.csv" | |
| df = pd.read_csv(DATA_PATH) | |
| # Handle missing coordinate columns | |
| lat_col = "latitude" if "latitude" in df.columns else "lat" | |
| lon_col = "longitude" if "longitude" in df.columns else "lon" | |
| if lat_col not in df.columns or lon_col not in df.columns: | |
| raise KeyError("Latitude and Longitude columns are missing!") | |
| # Define public transport stations | |
| public_transit_stations = [ | |
| {"name": "Hauptbahnhof", "lat": 47.378177, "lon": 8.540192}, | |
| {"name": "Bahnhof Stadelhofen", "lat": 47.366321, "lon": 8.548008}, | |
| {"name": "Hardbrücke", "lat": 47.385118, "lon": 8.517220}, | |
| {"name": "Enge", "lat": 47.364751, "lon": 8.531601} | |
| ] | |
| # Function to compute distance to nearest station | |
| def distance_to_nearest_station(lat, lon): | |
| min_distance = np.inf | |
| for station in public_transit_stations: | |
| dist = geopy.distance.geodesic((lat, lon), (station["lat"], station["lon"])) | |
| min_distance = min(min_distance, dist.km) | |
| return min_distance | |
| # Compute new feature | |
| df["distance_to_transit"] = df.apply(lambda row: distance_to_nearest_station(row[lat_col], row[lon_col]), axis=1) | |
| # ------------------------------ | |
| # Train Model | |
| # ------------------------------ | |
| features = ["rooms", "area", "pop_dens", "tax_income", "distance_to_transit"] | |
| X = df[features] | |
| y = df["price"] | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
| model = RandomForestRegressor(n_estimators=100, random_state=42) | |
| model.fit(X_train, y_train) | |
| # Save model | |
| joblib.dump(model, "apartment_price_model.pkl") | |
| # ------------------------------ | |
| # Gradio Web Interface | |
| # ------------------------------ | |
| def predict_price(rooms, area, pop_dens, tax_income, distance_to_transit): | |
| model = joblib.load("apartment_price_model.pkl") | |
| input_data = pd.DataFrame([[rooms, area, pop_dens, tax_income, distance_to_transit]], columns=features) | |
| prediction = model.predict(input_data)[0] | |
| return f"Geschätzter Preis: {prediction:.2f} CHF" | |
| app = gr.Interface( | |
| fn=predict_price, | |
| inputs=[ | |
| gr.Number(label="Anzahl Zimmer"), | |
| gr.Number(label="Fläche (m²)"), | |
| gr.Number(label="Bevölkerungsdichte"), | |
| gr.Number(label="Steuerbares Einkommen"), | |
| gr.Number(label="Distanz zur nächsten ÖV-Station (km)") | |
| ], | |
| outputs="text" | |
| ) | |
| if __name__ == "__main__": | |
| app.launch() | |