Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| import random | |
| import os | |
| # Configuration for Moroccan Real Estate High-Fidelity (Mubawab/ANCFCC/BAM 2024) | |
| CITIES = { | |
| "Casablanca": { | |
| "base_price_apt": 19500, # Increased based on 2024 trends | |
| "base_price_villa": 27500, | |
| "premium_neighborhoods": ["Anfa", "Ain Diab", "Gauthier", "Les Princesses", "Racine", "California"], | |
| "standard_neighborhoods": ["Maarif", "Sidi Maârouf", "Oulfa", "Bernoussi", "Belvédère", "Salmia"], | |
| "has_tram": True | |
| }, | |
| "Rabat": { | |
| "base_price_apt": 17800, # Rabat prices are high due to administrative status | |
| "base_price_villa": 24000, | |
| "premium_neighborhoods": ["Hay Riad", "Souissi", "Agdal", "Ambassadeurs", "Orangers"], | |
| "standard_neighborhoods": ["Hassan", "Yacoub El Mansour", "El Menzeh", "Ocean", "Kamra"], | |
| "has_tram": True | |
| }, | |
| "Marrakech": { | |
| "base_price_apt": 14500, | |
| "base_price_villa": 25000, | |
| "premium_neighborhoods": ["Hivernage", "Palmeraie", "Gueliz-High", "Amelkis", "Targa-Premium"], | |
| "standard_neighborhoods": ["Gueliz-Standard", "Medina", "Targa", "Mhamid", "Massira", "Iziki"], | |
| "has_tram": False | |
| }, | |
| "Tanger": { | |
| "base_price_apt": 15500, | |
| "base_price_villa": 19000, | |
| "premium_neighborhoods": ["Malabata", "Marshane", "Achakkar", "Jebel Kebir", "California-Tanger"], | |
| "standard_neighborhoods": ["Iberia", "Val Fleuri", "Beni Makada", "Dradeb", "Moghogha"], | |
| "has_tram": False | |
| }, | |
| "Agadir": { | |
| "base_price_apt": 16500, | |
| "base_price_villa": 20000, | |
| "premium_neighborhoods": ["Founty", "Charaf", "Haut Founty", "Marina"], | |
| "standard_neighborhoods": ["Dakhla", "Salam", "Anza", "Al Houda"], | |
| "has_tram": False | |
| } | |
| } | |
| PROPERTY_TYPES = ["Appartement", "Villa", "Maison"] | |
| STANDINGS = ["Haut Standing", "Moyen Standing", "Economique"] | |
| CONDITIONS = ["Neuf", "Bon état", "A rénover"] | |
| ORIENTATIONS = ["Sud (Ensoleillé)", "Est", "Ouest", "Nord"] | |
| VIEWS = ["Sans vis-à-vis", "Vue sur mer", "Vue sur Parc/Jardin", "Vue sur rue"] | |
| RESIDENCY_TYPES = ["Résidence fermée & sécurisée", "Public / Quartier ouvert"] | |
| def generate_data(n_samples=20000): | |
| data = [] | |
| for _ in range(n_samples): | |
| city = random.choice(list(CITIES.keys())) | |
| is_premium_zone = random.random() < 0.35 | |
| neighborhoods = CITIES[city]["premium_neighborhoods"] if is_premium_zone else CITIES[city]["standard_neighborhoods"] | |
| neighborhood = random.choice(neighborhoods) | |
| prop_type = random.choice(PROPERTY_TYPES) | |
| standing = random.choice(STANDINGS) | |
| condition = random.choice(CONDITIONS) | |
| orientation = random.choice(ORIENTATIONS) | |
| view = random.choice(VIEWS) | |
| residency = random.choice(RESIDENCY_TYPES) | |
| # Dimensions logic | |
| if prop_type == "Villa": | |
| surface = random.randint(200, 1500) | |
| rooms = random.randint(5, 15) | |
| bedrooms = random.randint(3, 8) | |
| floor = 0 | |
| base_price_m2 = CITIES[city]["base_price_villa"] | |
| else: | |
| # Apartment specific | |
| surface = random.randint(40, 350) | |
| rooms = random.randint(1, 8) | |
| bedrooms = random.randint(1, 5) | |
| floor = random.randint(0, 12) | |
| base_price_m2 = CITIES[city]["base_price_apt"] | |
| # Specific Moroccan features | |
| has_lift = 1 if (prop_type == "Appartement" and floor > 2) or (is_premium_zone and prop_type == "Appartement") else 0 | |
| has_pool = 1 if (prop_type == "Villa" and (is_premium_zone or random.random() > 0.5)) else 0 | |
| has_garden = 1 if (prop_type == "Villa" or (prop_type == "Appartement" and floor == 0 and random.random() > 0.7)) else 0 | |
| parking_spots = random.randint(1, 3) if (is_premium_zone or standing == "Haut Standing") else random.randint(0, 1) | |
| # New Proximity Logic | |
| proximity_tram = 1 if (CITIES[city]["has_tram"] and random.random() > 0.6) else 0 | |
| proximity_university = 1 if (random.random() > 0.7) else 0 # High demand for students | |
| proximity_mosque = 1 if (random.random() > 0.3) else 0 # Essential service in Moroccan neighborhoods | |
| # --- Valuation Logic (High-Fidelity) --- | |
| # Multipliers | |
| mult = 1.0 | |
| # Neighborhood & Standing Impact | |
| if is_premium_zone: mult *= 1.6 | |
| if neighborhood in ["Souissi", "Anfa", "Hivernage", "Ain Diab", "Souissi"]: mult *= 1.4 | |
| # Standing (Standard finishes vs Luxury) | |
| standing_map = {"Haut Standing": 1.5, "Moyen Standing": 1.0, "Economique": 0.6} | |
| mult *= standing_map[standing] | |
| # Residency & Security | |
| if residency == "Résidence fermée & sécurisée": mult *= 1.15 | |
| # Orientation (Sud is highly valued in MOROCCO for winter sun) | |
| if orientation == "Sud (Ensoleillé)": mult *= 1.08 | |
| elif orientation == "Nord": mult *= 0.95 | |
| # View | |
| view_map = {"Vue sur mer": 1.35, "Vue sur Parc/Jardin": 1.12, "Sans vis-à-vis": 1.10, "Vue sur rue": 0.95} | |
| mult *= view_map[view] | |
| # Condition | |
| cond_map = {"Neuf": 1.25, "Bon état": 1.0, "A rénover": 0.7} | |
| mult *= cond_map[condition] | |
| # Proximity Impact | |
| if proximity_tram == 1: mult *= 1.05 | |
| if proximity_university == 1: mult *= 1.07 # Student demand premium | |
| if proximity_mosque == 1: mult *= 1.04 # Convenience premium | |
| # Extras | |
| if has_pool: mult *= 1.2 | |
| if has_garden: mult *= 1.1 | |
| # Final calculation | |
| price_per_m2 = base_price_m2 * mult | |
| total_price = (price_per_m2 * surface) * random.uniform(0.97, 1.03) | |
| data.append({ | |
| "City": city, | |
| "Neighborhood": neighborhood, | |
| "Type": prop_type, | |
| "Surface": surface, | |
| "Rooms": rooms, | |
| "Bedrooms": bedrooms, | |
| "Standing": standing, | |
| "Residency": residency, | |
| "Orientation": orientation, | |
| "View": view, | |
| "Condition": condition, | |
| "Floor": floor, | |
| "Lift": int(has_lift), | |
| "Pool": int(has_pool), | |
| "Garden": int(has_garden), | |
| "Parking_Spots": parking_spots, | |
| "Proximity_Tram": int(proximity_tram), | |
| "Proximity_University": int(proximity_university), | |
| "Proximity_Mosque": int(proximity_mosque), | |
| "Price": round(total_price, -3) | |
| }) | |
| df = pd.DataFrame(data) | |
| output_path = os.path.join(os.path.dirname(__file__), "..", "data", "morocco_real_estate_data_pro.csv") | |
| df.to_csv(output_path, index=False, encoding='utf-8-sig') | |
| print(f"Generated {n_samples} High-Fidelity records in {output_path}") | |
| if __name__ == "__main__": | |
| generate_data(20000) | |