import pandas as pd import numpy as np import random import os # Configuration for Moroccan Real Estate High-Fidelity (Mubawab/ANCFCC 2024/2025) CITIES = { "Casablanca": { "base_price_apt": 13900, "base_price_villa": 20500, "premium_neighborhoods": ["Anfa", "Ain Diab", "Gauthier", "Les Princesses", "Bouskoura Ville Verte"], "standard_neighborhoods": ["Maarif", "Sidi Maârouf", "Oulfa", "Bernoussi", "Belvédère"], "has_tram": True }, "Rabat": { "base_price_apt": 14500, "base_price_villa": 20300, "premium_neighborhoods": ["Hay Riad", "Souissi", "Agdal", "Ambassadeurs"], "standard_neighborhoods": ["Hassan", "Yacoub El Mansour", "El Menzeh", "Ocean"], "has_tram": True }, "Marrakech": { "base_price_apt": 16000, "base_price_villa": 22000, "premium_neighborhoods": ["Hivernage", "Palmeraie", "Gueliz-High", "Amelkis"], "standard_neighborhoods": ["Gueliz-Standard", "Medina", "Targa", "Mhamid", "Massira"], "has_tram": False }, "Tanger": { "base_price_apt": 11000, "base_price_villa": 15000, "premium_neighborhoods": ["Malabata", "Marshane", "Achakkar", "Jebel Kebir"], "standard_neighborhoods": ["Iberia", "Val Fleuri", "Beni Makada", "Dradeb"], "has_tram": False }, } PROPERTY_TYPES = ["Appartement", "Villa", "Maison"] STANDINGS = ["Haut Standing", "Moyen Standing", "Economique"] CONDITIONS = ["Neuf", "Bon état", "A rénover"] ORIENTATIONS = ["Sud (Ensoleillé)", "Est", "Ouest", "Nord"] VIEWS = ["Sans vis-à-vis", "Vue sur mer", "Vue sur Parc/Jardin", "Vue sur rue"] RESIDENCY_TYPES = ["Résidence fermée & sécurisée", "Public / Quartier ouvert"] def generate_data(n_samples=20000): data = [] for _ in range(n_samples): city = random.choice(list(CITIES.keys())) is_premium_zone = random.random() < 0.35 neighborhoods = CITIES[city]["premium_neighborhoods"] if is_premium_zone else CITIES[city]["standard_neighborhoods"] neighborhood = random.choice(neighborhoods) prop_type = random.choice(PROPERTY_TYPES) standing = random.choice(STANDINGS) condition = random.choice(CONDITIONS) orientation = random.choice(ORIENTATIONS) view = random.choice(VIEWS) residency = random.choice(RESIDENCY_TYPES) if prop_type == "Villa": surface = random.randint(200, 1500) rooms = random.randint(5, 15) bedrooms = random.randint(3, 8) floor = 0 base_price_m2 = CITIES[city]["base_price_villa"] else: surface = random.randint(40, 350) rooms = random.randint(1, 8) bedrooms = random.randint(1, 4) floor = random.randint(0, 12) base_price_m2 = CITIES[city]["base_price_apt"] has_lift = 1 if (prop_type == "Appartement" and floor > 2) or (is_premium_zone and prop_type == "Appartement") else 0 has_pool = 1 if (prop_type == "Villa" and (is_premium_zone or random.random() > 0.5)) else 0 has_garden = 1 if (prop_type == "Villa" or (prop_type == "Appartement" and floor == 0 and random.random() > 0.7)) else 0 parking_spots = random.randint(1, 3) if (is_premium_zone or standing == "Haut Standing") else random.randint(0, 1) proximity_tram = 1 if (CITIES[city]["has_tram"] and random.random() > 0.6) else 0 proximity_university = 1 if (random.random() > 0.7) else 0 proximity_mosque = 1 if (random.random() > 0.3) else 0 mult = 1.0 if is_premium_zone: mult *= 1.6 if neighborhood in ["Souissi", "Anfa", "Hivernage", "Ain Diab"]: mult *= 1.4 standing_map = {"Haut Standing": 1.5, "Moyen Standing": 1.0, "Economique": 0.6} mult *= standing_map[standing] if residency == "Résidence fermée & sécurisée": mult *= 1.15 if orientation == "Sud (Ensoleillé)": mult *= 1.08 elif orientation == "Nord": mult *= 0.95 view_map = {"Vue sur mer": 1.35, "Vue sur Parc/Jardin": 1.12, "Sans vis-à-vis": 1.10, "Vue sur rue": 0.95} mult *= view_map[view] cond_map = {"Neuf": 1.25, "Bon état": 1.0, "A rénover": 0.7} mult *= cond_map[condition] if proximity_tram == 1: mult *= 1.05 if proximity_university == 1: mult *= 1.07 if proximity_mosque == 1: mult *= 1.04 if has_pool: mult *= 1.2 if has_garden: mult *= 1.1 price_per_m2 = base_price_m2 * mult total_price = (price_per_m2 * surface) * random.uniform(0.97, 1.03) data.append({ "City": city, "Neighborhood": neighborhood, "Type": prop_type, "Surface": surface, "Rooms": rooms, "Bedrooms": bedrooms, "Standing": standing, "Residency": residency, "Orientation": orientation, "View": view, "Condition": condition, "Floor": floor, "Lift": int(has_lift), "Pool": int(has_pool), "Garden": int(has_garden), "Parking_Spots": parking_spots, "Proximity_Tram": int(proximity_tram), "Proximity_University": int(proximity_university), "Proximity_Mosque": int(proximity_mosque), "Price": round(total_price, -3) }) df = pd.DataFrame(data) df.to_csv("data.csv", index=False) return df if __name__ == "__main__": generate_data(20000)