ALBORAQ-AI-IMMO / deployment /data_generator.py
mhdbbbbb's picture
Upload folder using huggingface_hub
fc41845 verified
import pandas as pd
import numpy as np
import random
import os
# Configuration for Moroccan Real Estate High-Fidelity (Mubawab/ANCFCC 2024/2025)
CITIES = {
"Casablanca": {
"base_price_apt": 13900,
"base_price_villa": 20500,
"premium_neighborhoods": ["Anfa", "Ain Diab", "Gauthier", "Les Princesses", "Bouskoura Ville Verte"],
"standard_neighborhoods": ["Maarif", "Sidi Maârouf", "Oulfa", "Bernoussi", "Belvédère"],
"has_tram": True
},
"Rabat": {
"base_price_apt": 14500,
"base_price_villa": 20300,
"premium_neighborhoods": ["Hay Riad", "Souissi", "Agdal", "Ambassadeurs"],
"standard_neighborhoods": ["Hassan", "Yacoub El Mansour", "El Menzeh", "Ocean"],
"has_tram": True
},
"Marrakech": {
"base_price_apt": 16000,
"base_price_villa": 22000,
"premium_neighborhoods": ["Hivernage", "Palmeraie", "Gueliz-High", "Amelkis"],
"standard_neighborhoods": ["Gueliz-Standard", "Medina", "Targa", "Mhamid", "Massira"],
"has_tram": False
},
"Tanger": {
"base_price_apt": 11000,
"base_price_villa": 15000,
"premium_neighborhoods": ["Malabata", "Marshane", "Achakkar", "Jebel Kebir"],
"standard_neighborhoods": ["Iberia", "Val Fleuri", "Beni Makada", "Dradeb"],
"has_tram": False
},
}
PROPERTY_TYPES = ["Appartement", "Villa", "Maison"]
STANDINGS = ["Haut Standing", "Moyen Standing", "Economique"]
CONDITIONS = ["Neuf", "Bon état", "A rénover"]
ORIENTATIONS = ["Sud (Ensoleillé)", "Est", "Ouest", "Nord"]
VIEWS = ["Sans vis-à-vis", "Vue sur mer", "Vue sur Parc/Jardin", "Vue sur rue"]
RESIDENCY_TYPES = ["Résidence fermée & sécurisée", "Public / Quartier ouvert"]
def generate_data(n_samples=20000):
data = []
for _ in range(n_samples):
city = random.choice(list(CITIES.keys()))
is_premium_zone = random.random() < 0.35
neighborhoods = CITIES[city]["premium_neighborhoods"] if is_premium_zone else CITIES[city]["standard_neighborhoods"]
neighborhood = random.choice(neighborhoods)
prop_type = random.choice(PROPERTY_TYPES)
standing = random.choice(STANDINGS)
condition = random.choice(CONDITIONS)
orientation = random.choice(ORIENTATIONS)
view = random.choice(VIEWS)
residency = random.choice(RESIDENCY_TYPES)
if prop_type == "Villa":
surface = random.randint(200, 1500)
rooms = random.randint(5, 15)
bedrooms = random.randint(3, 8)
floor = 0
base_price_m2 = CITIES[city]["base_price_villa"]
else:
surface = random.randint(40, 350)
rooms = random.randint(1, 8)
bedrooms = random.randint(1, 4)
floor = random.randint(0, 12)
base_price_m2 = CITIES[city]["base_price_apt"]
has_lift = 1 if (prop_type == "Appartement" and floor > 2) or (is_premium_zone and prop_type == "Appartement") else 0
has_pool = 1 if (prop_type == "Villa" and (is_premium_zone or random.random() > 0.5)) else 0
has_garden = 1 if (prop_type == "Villa" or (prop_type == "Appartement" and floor == 0 and random.random() > 0.7)) else 0
parking_spots = random.randint(1, 3) if (is_premium_zone or standing == "Haut Standing") else random.randint(0, 1)
proximity_tram = 1 if (CITIES[city]["has_tram"] and random.random() > 0.6) else 0
proximity_university = 1 if (random.random() > 0.7) else 0
proximity_mosque = 1 if (random.random() > 0.3) else 0
mult = 1.0
if is_premium_zone: mult *= 1.6
if neighborhood in ["Souissi", "Anfa", "Hivernage", "Ain Diab"]: mult *= 1.4
standing_map = {"Haut Standing": 1.5, "Moyen Standing": 1.0, "Economique": 0.6}
mult *= standing_map[standing]
if residency == "Résidence fermée & sécurisée": mult *= 1.15
if orientation == "Sud (Ensoleillé)": mult *= 1.08
elif orientation == "Nord": mult *= 0.95
view_map = {"Vue sur mer": 1.35, "Vue sur Parc/Jardin": 1.12, "Sans vis-à-vis": 1.10, "Vue sur rue": 0.95}
mult *= view_map[view]
cond_map = {"Neuf": 1.25, "Bon état": 1.0, "A rénover": 0.7}
mult *= cond_map[condition]
if proximity_tram == 1: mult *= 1.05
if proximity_university == 1: mult *= 1.07
if proximity_mosque == 1: mult *= 1.04
if has_pool: mult *= 1.2
if has_garden: mult *= 1.1
price_per_m2 = base_price_m2 * mult
total_price = (price_per_m2 * surface) * random.uniform(0.97, 1.03)
data.append({
"City": city,
"Neighborhood": neighborhood,
"Type": prop_type,
"Surface": surface,
"Rooms": rooms,
"Bedrooms": bedrooms,
"Standing": standing,
"Residency": residency,
"Orientation": orientation,
"View": view,
"Condition": condition,
"Floor": floor,
"Lift": int(has_lift),
"Pool": int(has_pool),
"Garden": int(has_garden),
"Parking_Spots": parking_spots,
"Proximity_Tram": int(proximity_tram),
"Proximity_University": int(proximity_university),
"Proximity_Mosque": int(proximity_mosque),
"Price": round(total_price, -3)
})
df = pd.DataFrame(data)
df.to_csv("data.csv", index=False)
return df
if __name__ == "__main__":
generate_data(20000)