Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| import random | |
| import os | |
| # Configuration for Moroccan Real Estate High-Fidelity (Mubawab/ANCFCC 2024/2025) | |
| CITIES = { | |
| "Casablanca": { | |
| "base_price_apt": 13900, | |
| "base_price_villa": 20500, | |
| "premium_neighborhoods": ["Anfa", "Ain Diab", "Gauthier", "Les Princesses", "Bouskoura Ville Verte"], | |
| "standard_neighborhoods": ["Maarif", "Sidi Maârouf", "Oulfa", "Bernoussi", "Belvédère"], | |
| "has_tram": True | |
| }, | |
| "Rabat": { | |
| "base_price_apt": 14500, | |
| "base_price_villa": 20300, | |
| "premium_neighborhoods": ["Hay Riad", "Souissi", "Agdal", "Ambassadeurs"], | |
| "standard_neighborhoods": ["Hassan", "Yacoub El Mansour", "El Menzeh", "Ocean"], | |
| "has_tram": True | |
| }, | |
| "Marrakech": { | |
| "base_price_apt": 16000, | |
| "base_price_villa": 22000, | |
| "premium_neighborhoods": ["Hivernage", "Palmeraie", "Gueliz-High", "Amelkis"], | |
| "standard_neighborhoods": ["Gueliz-Standard", "Medina", "Targa", "Mhamid", "Massira"], | |
| "has_tram": False | |
| }, | |
| "Tanger": { | |
| "base_price_apt": 11000, | |
| "base_price_villa": 15000, | |
| "premium_neighborhoods": ["Malabata", "Marshane", "Achakkar", "Jebel Kebir"], | |
| "standard_neighborhoods": ["Iberia", "Val Fleuri", "Beni Makada", "Dradeb"], | |
| "has_tram": False | |
| }, | |
| } | |
| PROPERTY_TYPES = ["Appartement", "Villa", "Maison"] | |
| STANDINGS = ["Haut Standing", "Moyen Standing", "Economique"] | |
| CONDITIONS = ["Neuf", "Bon état", "A rénover"] | |
| ORIENTATIONS = ["Sud (Ensoleillé)", "Est", "Ouest", "Nord"] | |
| VIEWS = ["Sans vis-à-vis", "Vue sur mer", "Vue sur Parc/Jardin", "Vue sur rue"] | |
| RESIDENCY_TYPES = ["Résidence fermée & sécurisée", "Public / Quartier ouvert"] | |
| def generate_data(n_samples=20000): | |
| data = [] | |
| for _ in range(n_samples): | |
| city = random.choice(list(CITIES.keys())) | |
| is_premium_zone = random.random() < 0.35 | |
| neighborhoods = CITIES[city]["premium_neighborhoods"] if is_premium_zone else CITIES[city]["standard_neighborhoods"] | |
| neighborhood = random.choice(neighborhoods) | |
| prop_type = random.choice(PROPERTY_TYPES) | |
| standing = random.choice(STANDINGS) | |
| condition = random.choice(CONDITIONS) | |
| orientation = random.choice(ORIENTATIONS) | |
| view = random.choice(VIEWS) | |
| residency = random.choice(RESIDENCY_TYPES) | |
| if prop_type == "Villa": | |
| surface = random.randint(200, 1500) | |
| rooms = random.randint(5, 15) | |
| bedrooms = random.randint(3, 8) | |
| floor = 0 | |
| base_price_m2 = CITIES[city]["base_price_villa"] | |
| else: | |
| surface = random.randint(40, 350) | |
| rooms = random.randint(1, 8) | |
| bedrooms = random.randint(1, 4) | |
| floor = random.randint(0, 12) | |
| base_price_m2 = CITIES[city]["base_price_apt"] | |
| has_lift = 1 if (prop_type == "Appartement" and floor > 2) or (is_premium_zone and prop_type == "Appartement") else 0 | |
| has_pool = 1 if (prop_type == "Villa" and (is_premium_zone or random.random() > 0.5)) else 0 | |
| has_garden = 1 if (prop_type == "Villa" or (prop_type == "Appartement" and floor == 0 and random.random() > 0.7)) else 0 | |
| parking_spots = random.randint(1, 3) if (is_premium_zone or standing == "Haut Standing") else random.randint(0, 1) | |
| proximity_tram = 1 if (CITIES[city]["has_tram"] and random.random() > 0.6) else 0 | |
| proximity_university = 1 if (random.random() > 0.7) else 0 | |
| proximity_mosque = 1 if (random.random() > 0.3) else 0 | |
| mult = 1.0 | |
| if is_premium_zone: mult *= 1.6 | |
| if neighborhood in ["Souissi", "Anfa", "Hivernage", "Ain Diab"]: mult *= 1.4 | |
| standing_map = {"Haut Standing": 1.5, "Moyen Standing": 1.0, "Economique": 0.6} | |
| mult *= standing_map[standing] | |
| if residency == "Résidence fermée & sécurisée": mult *= 1.15 | |
| if orientation == "Sud (Ensoleillé)": mult *= 1.08 | |
| elif orientation == "Nord": mult *= 0.95 | |
| view_map = {"Vue sur mer": 1.35, "Vue sur Parc/Jardin": 1.12, "Sans vis-à-vis": 1.10, "Vue sur rue": 0.95} | |
| mult *= view_map[view] | |
| cond_map = {"Neuf": 1.25, "Bon état": 1.0, "A rénover": 0.7} | |
| mult *= cond_map[condition] | |
| if proximity_tram == 1: mult *= 1.05 | |
| if proximity_university == 1: mult *= 1.07 | |
| if proximity_mosque == 1: mult *= 1.04 | |
| if has_pool: mult *= 1.2 | |
| if has_garden: mult *= 1.1 | |
| price_per_m2 = base_price_m2 * mult | |
| total_price = (price_per_m2 * surface) * random.uniform(0.97, 1.03) | |
| data.append({ | |
| "City": city, | |
| "Neighborhood": neighborhood, | |
| "Type": prop_type, | |
| "Surface": surface, | |
| "Rooms": rooms, | |
| "Bedrooms": bedrooms, | |
| "Standing": standing, | |
| "Residency": residency, | |
| "Orientation": orientation, | |
| "View": view, | |
| "Condition": condition, | |
| "Floor": floor, | |
| "Lift": int(has_lift), | |
| "Pool": int(has_pool), | |
| "Garden": int(has_garden), | |
| "Parking_Spots": parking_spots, | |
| "Proximity_Tram": int(proximity_tram), | |
| "Proximity_University": int(proximity_university), | |
| "Proximity_Mosque": int(proximity_mosque), | |
| "Price": round(total_price, -3) | |
| }) | |
| df = pd.DataFrame(data) | |
| df.to_csv("data.csv", index=False) | |
| return df | |
| if __name__ == "__main__": | |
| generate_data(20000) | |