ALBORAQ-AI-IMMO / backend /data_generator.py
mhdbbbbb's picture
Upload folder using huggingface_hub
fc41845 verified
import pandas as pd
import numpy as np
import random
import os
# Configuration for Moroccan Real Estate High-Fidelity (Mubawab/ANCFCC/BAM 2024)
CITIES = {
"Casablanca": {
"base_price_apt": 19500, # Increased based on 2024 trends
"base_price_villa": 27500,
"premium_neighborhoods": ["Anfa", "Ain Diab", "Gauthier", "Les Princesses", "Racine", "California"],
"standard_neighborhoods": ["Maarif", "Sidi Maârouf", "Oulfa", "Bernoussi", "Belvédère", "Salmia"],
"has_tram": True
},
"Rabat": {
"base_price_apt": 17800, # Rabat prices are high due to administrative status
"base_price_villa": 24000,
"premium_neighborhoods": ["Hay Riad", "Souissi", "Agdal", "Ambassadeurs", "Orangers"],
"standard_neighborhoods": ["Hassan", "Yacoub El Mansour", "El Menzeh", "Ocean", "Kamra"],
"has_tram": True
},
"Marrakech": {
"base_price_apt": 14500,
"base_price_villa": 25000,
"premium_neighborhoods": ["Hivernage", "Palmeraie", "Gueliz-High", "Amelkis", "Targa-Premium"],
"standard_neighborhoods": ["Gueliz-Standard", "Medina", "Targa", "Mhamid", "Massira", "Iziki"],
"has_tram": False
},
"Tanger": {
"base_price_apt": 15500,
"base_price_villa": 19000,
"premium_neighborhoods": ["Malabata", "Marshane", "Achakkar", "Jebel Kebir", "California-Tanger"],
"standard_neighborhoods": ["Iberia", "Val Fleuri", "Beni Makada", "Dradeb", "Moghogha"],
"has_tram": False
},
"Agadir": {
"base_price_apt": 16500,
"base_price_villa": 20000,
"premium_neighborhoods": ["Founty", "Charaf", "Haut Founty", "Marina"],
"standard_neighborhoods": ["Dakhla", "Salam", "Anza", "Al Houda"],
"has_tram": False
}
}
PROPERTY_TYPES = ["Appartement", "Villa", "Maison"]
STANDINGS = ["Haut Standing", "Moyen Standing", "Economique"]
CONDITIONS = ["Neuf", "Bon état", "A rénover"]
ORIENTATIONS = ["Sud (Ensoleillé)", "Est", "Ouest", "Nord"]
VIEWS = ["Sans vis-à-vis", "Vue sur mer", "Vue sur Parc/Jardin", "Vue sur rue"]
RESIDENCY_TYPES = ["Résidence fermée & sécurisée", "Public / Quartier ouvert"]
def generate_data(n_samples=20000):
data = []
for _ in range(n_samples):
city = random.choice(list(CITIES.keys()))
is_premium_zone = random.random() < 0.35
neighborhoods = CITIES[city]["premium_neighborhoods"] if is_premium_zone else CITIES[city]["standard_neighborhoods"]
neighborhood = random.choice(neighborhoods)
prop_type = random.choice(PROPERTY_TYPES)
standing = random.choice(STANDINGS)
condition = random.choice(CONDITIONS)
orientation = random.choice(ORIENTATIONS)
view = random.choice(VIEWS)
residency = random.choice(RESIDENCY_TYPES)
# Dimensions logic
if prop_type == "Villa":
surface = random.randint(200, 1500)
rooms = random.randint(5, 15)
bedrooms = random.randint(3, 8)
floor = 0
base_price_m2 = CITIES[city]["base_price_villa"]
else:
# Apartment specific
surface = random.randint(40, 350)
rooms = random.randint(1, 8)
bedrooms = random.randint(1, 5)
floor = random.randint(0, 12)
base_price_m2 = CITIES[city]["base_price_apt"]
# Specific Moroccan features
has_lift = 1 if (prop_type == "Appartement" and floor > 2) or (is_premium_zone and prop_type == "Appartement") else 0
has_pool = 1 if (prop_type == "Villa" and (is_premium_zone or random.random() > 0.5)) else 0
has_garden = 1 if (prop_type == "Villa" or (prop_type == "Appartement" and floor == 0 and random.random() > 0.7)) else 0
parking_spots = random.randint(1, 3) if (is_premium_zone or standing == "Haut Standing") else random.randint(0, 1)
# New Proximity Logic
proximity_tram = 1 if (CITIES[city]["has_tram"] and random.random() > 0.6) else 0
proximity_university = 1 if (random.random() > 0.7) else 0 # High demand for students
proximity_mosque = 1 if (random.random() > 0.3) else 0 # Essential service in Moroccan neighborhoods
# --- Valuation Logic (High-Fidelity) ---
# Multipliers
mult = 1.0
# Neighborhood & Standing Impact
if is_premium_zone: mult *= 1.6
if neighborhood in ["Souissi", "Anfa", "Hivernage", "Ain Diab", "Souissi"]: mult *= 1.4
# Standing (Standard finishes vs Luxury)
standing_map = {"Haut Standing": 1.5, "Moyen Standing": 1.0, "Economique": 0.6}
mult *= standing_map[standing]
# Residency & Security
if residency == "Résidence fermée & sécurisée": mult *= 1.15
# Orientation (Sud is highly valued in MOROCCO for winter sun)
if orientation == "Sud (Ensoleillé)": mult *= 1.08
elif orientation == "Nord": mult *= 0.95
# View
view_map = {"Vue sur mer": 1.35, "Vue sur Parc/Jardin": 1.12, "Sans vis-à-vis": 1.10, "Vue sur rue": 0.95}
mult *= view_map[view]
# Condition
cond_map = {"Neuf": 1.25, "Bon état": 1.0, "A rénover": 0.7}
mult *= cond_map[condition]
# Proximity Impact
if proximity_tram == 1: mult *= 1.05
if proximity_university == 1: mult *= 1.07 # Student demand premium
if proximity_mosque == 1: mult *= 1.04 # Convenience premium
# Extras
if has_pool: mult *= 1.2
if has_garden: mult *= 1.1
# Final calculation
price_per_m2 = base_price_m2 * mult
total_price = (price_per_m2 * surface) * random.uniform(0.97, 1.03)
data.append({
"City": city,
"Neighborhood": neighborhood,
"Type": prop_type,
"Surface": surface,
"Rooms": rooms,
"Bedrooms": bedrooms,
"Standing": standing,
"Residency": residency,
"Orientation": orientation,
"View": view,
"Condition": condition,
"Floor": floor,
"Lift": int(has_lift),
"Pool": int(has_pool),
"Garden": int(has_garden),
"Parking_Spots": parking_spots,
"Proximity_Tram": int(proximity_tram),
"Proximity_University": int(proximity_university),
"Proximity_Mosque": int(proximity_mosque),
"Price": round(total_price, -3)
})
df = pd.DataFrame(data)
output_path = os.path.join(os.path.dirname(__file__), "..", "data", "morocco_real_estate_data_pro.csv")
df.to_csv(output_path, index=False, encoding='utf-8-sig')
print(f"Generated {n_samples} High-Fidelity records in {output_path}")
if __name__ == "__main__":
generate_data(20000)