import pandas as pd import numpy as np import random import os # Configuration for Moroccan Real Estate High-Fidelity (Mubawab/ANCFCC/BAM 2024) CITIES = { "Casablanca": { "base_price_apt": 19500, # Increased based on 2024 trends "base_price_villa": 27500, "premium_neighborhoods": ["Anfa", "Ain Diab", "Gauthier", "Les Princesses", "Racine", "California"], "standard_neighborhoods": ["Maarif", "Sidi Maârouf", "Oulfa", "Bernoussi", "Belvédère", "Salmia"], "has_tram": True }, "Rabat": { "base_price_apt": 17800, # Rabat prices are high due to administrative status "base_price_villa": 24000, "premium_neighborhoods": ["Hay Riad", "Souissi", "Agdal", "Ambassadeurs", "Orangers"], "standard_neighborhoods": ["Hassan", "Yacoub El Mansour", "El Menzeh", "Ocean", "Kamra"], "has_tram": True }, "Marrakech": { "base_price_apt": 14500, "base_price_villa": 25000, "premium_neighborhoods": ["Hivernage", "Palmeraie", "Gueliz-High", "Amelkis", "Targa-Premium"], "standard_neighborhoods": ["Gueliz-Standard", "Medina", "Targa", "Mhamid", "Massira", "Iziki"], "has_tram": False }, "Tanger": { "base_price_apt": 15500, "base_price_villa": 19000, "premium_neighborhoods": ["Malabata", "Marshane", "Achakkar", "Jebel Kebir", "California-Tanger"], "standard_neighborhoods": ["Iberia", "Val Fleuri", "Beni Makada", "Dradeb", "Moghogha"], "has_tram": False }, "Agadir": { "base_price_apt": 16500, "base_price_villa": 20000, "premium_neighborhoods": ["Founty", "Charaf", "Haut Founty", "Marina"], "standard_neighborhoods": ["Dakhla", "Salam", "Anza", "Al Houda"], "has_tram": False } } PROPERTY_TYPES = ["Appartement", "Villa", "Maison"] STANDINGS = ["Haut Standing", "Moyen Standing", "Economique"] CONDITIONS = ["Neuf", "Bon état", "A rénover"] ORIENTATIONS = ["Sud (Ensoleillé)", "Est", "Ouest", "Nord"] VIEWS = ["Sans vis-à-vis", "Vue sur mer", "Vue sur Parc/Jardin", "Vue sur rue"] RESIDENCY_TYPES = ["Résidence fermée & sécurisée", "Public / Quartier ouvert"] def generate_data(n_samples=20000): data = [] for _ in range(n_samples): city = random.choice(list(CITIES.keys())) is_premium_zone = random.random() < 0.35 neighborhoods = CITIES[city]["premium_neighborhoods"] if is_premium_zone else CITIES[city]["standard_neighborhoods"] neighborhood = random.choice(neighborhoods) prop_type = random.choice(PROPERTY_TYPES) standing = random.choice(STANDINGS) condition = random.choice(CONDITIONS) orientation = random.choice(ORIENTATIONS) view = random.choice(VIEWS) residency = random.choice(RESIDENCY_TYPES) # Dimensions logic if prop_type == "Villa": surface = random.randint(200, 1500) rooms = random.randint(5, 15) bedrooms = random.randint(3, 8) floor = 0 base_price_m2 = CITIES[city]["base_price_villa"] else: # Apartment specific surface = random.randint(40, 350) rooms = random.randint(1, 8) bedrooms = random.randint(1, 5) floor = random.randint(0, 12) base_price_m2 = CITIES[city]["base_price_apt"] # Specific Moroccan features has_lift = 1 if (prop_type == "Appartement" and floor > 2) or (is_premium_zone and prop_type == "Appartement") else 0 has_pool = 1 if (prop_type == "Villa" and (is_premium_zone or random.random() > 0.5)) else 0 has_garden = 1 if (prop_type == "Villa" or (prop_type == "Appartement" and floor == 0 and random.random() > 0.7)) else 0 parking_spots = random.randint(1, 3) if (is_premium_zone or standing == "Haut Standing") else random.randint(0, 1) # New Proximity Logic proximity_tram = 1 if (CITIES[city]["has_tram"] and random.random() > 0.6) else 0 proximity_university = 1 if (random.random() > 0.7) else 0 # High demand for students proximity_mosque = 1 if (random.random() > 0.3) else 0 # Essential service in Moroccan neighborhoods # --- Valuation Logic (High-Fidelity) --- # Multipliers mult = 1.0 # Neighborhood & Standing Impact if is_premium_zone: mult *= 1.6 if neighborhood in ["Souissi", "Anfa", "Hivernage", "Ain Diab", "Souissi"]: mult *= 1.4 # Standing (Standard finishes vs Luxury) standing_map = {"Haut Standing": 1.5, "Moyen Standing": 1.0, "Economique": 0.6} mult *= standing_map[standing] # Residency & Security if residency == "Résidence fermée & sécurisée": mult *= 1.15 # Orientation (Sud is highly valued in MOROCCO for winter sun) if orientation == "Sud (Ensoleillé)": mult *= 1.08 elif orientation == "Nord": mult *= 0.95 # View view_map = {"Vue sur mer": 1.35, "Vue sur Parc/Jardin": 1.12, "Sans vis-à-vis": 1.10, "Vue sur rue": 0.95} mult *= view_map[view] # Condition cond_map = {"Neuf": 1.25, "Bon état": 1.0, "A rénover": 0.7} mult *= cond_map[condition] # Proximity Impact if proximity_tram == 1: mult *= 1.05 if proximity_university == 1: mult *= 1.07 # Student demand premium if proximity_mosque == 1: mult *= 1.04 # Convenience premium # Extras if has_pool: mult *= 1.2 if has_garden: mult *= 1.1 # Final calculation price_per_m2 = base_price_m2 * mult total_price = (price_per_m2 * surface) * random.uniform(0.97, 1.03) data.append({ "City": city, "Neighborhood": neighborhood, "Type": prop_type, "Surface": surface, "Rooms": rooms, "Bedrooms": bedrooms, "Standing": standing, "Residency": residency, "Orientation": orientation, "View": view, "Condition": condition, "Floor": floor, "Lift": int(has_lift), "Pool": int(has_pool), "Garden": int(has_garden), "Parking_Spots": parking_spots, "Proximity_Tram": int(proximity_tram), "Proximity_University": int(proximity_university), "Proximity_Mosque": int(proximity_mosque), "Price": round(total_price, -3) }) df = pd.DataFrame(data) output_path = os.path.join(os.path.dirname(__file__), "..", "data", "morocco_real_estate_data_pro.csv") df.to_csv(output_path, index=False, encoding='utf-8-sig') print(f"Generated {n_samples} High-Fidelity records in {output_path}") if __name__ == "__main__": generate_data(20000)