File size: 5,625 Bytes
fc41845
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import pandas as pd
import numpy as np
import random
import os

# Configuration for Moroccan Real Estate High-Fidelity (Mubawab/ANCFCC 2024/2025)
CITIES = {
    "Casablanca": {
        "base_price_apt": 13900, 
        "base_price_villa": 20500,
        "premium_neighborhoods": ["Anfa", "Ain Diab", "Gauthier", "Les Princesses", "Bouskoura Ville Verte"],
        "standard_neighborhoods": ["Maarif", "Sidi Maârouf", "Oulfa", "Bernoussi", "Belvédère"],
        "has_tram": True
    },
    "Rabat": {
        "base_price_apt": 14500, 
        "base_price_villa": 20300,
        "premium_neighborhoods": ["Hay Riad", "Souissi", "Agdal", "Ambassadeurs"],
        "standard_neighborhoods": ["Hassan", "Yacoub El Mansour", "El Menzeh", "Ocean"],
        "has_tram": True
    },
    "Marrakech": {
        "base_price_apt": 16000, 
        "base_price_villa": 22000,
        "premium_neighborhoods": ["Hivernage", "Palmeraie", "Gueliz-High", "Amelkis"],
        "standard_neighborhoods": ["Gueliz-Standard", "Medina", "Targa", "Mhamid", "Massira"],
        "has_tram": False
    },
    "Tanger": {
        "base_price_apt": 11000, 
        "base_price_villa": 15000,
        "premium_neighborhoods": ["Malabata", "Marshane", "Achakkar", "Jebel Kebir"],
        "standard_neighborhoods": ["Iberia", "Val Fleuri", "Beni Makada", "Dradeb"],
        "has_tram": False
    },
}

PROPERTY_TYPES = ["Appartement", "Villa", "Maison"]
STANDINGS = ["Haut Standing", "Moyen Standing", "Economique"]
CONDITIONS = ["Neuf", "Bon état", "A rénover"]
ORIENTATIONS = ["Sud (Ensoleillé)", "Est", "Ouest", "Nord"]
VIEWS = ["Sans vis-à-vis", "Vue sur mer", "Vue sur Parc/Jardin", "Vue sur rue"]
RESIDENCY_TYPES = ["Résidence fermée & sécurisée", "Public / Quartier ouvert"]

def generate_data(n_samples=20000):
    data = []
    
    for _ in range(n_samples):
        city = random.choice(list(CITIES.keys()))
        is_premium_zone = random.random() < 0.35
        neighborhoods = CITIES[city]["premium_neighborhoods"] if is_premium_zone else CITIES[city]["standard_neighborhoods"]
        neighborhood = random.choice(neighborhoods)
        
        prop_type = random.choice(PROPERTY_TYPES)
        standing = random.choice(STANDINGS)
        condition = random.choice(CONDITIONS)
        orientation = random.choice(ORIENTATIONS)
        view = random.choice(VIEWS)
        residency = random.choice(RESIDENCY_TYPES)
        
        if prop_type == "Villa":
            surface = random.randint(200, 1500)
            rooms = random.randint(5, 15)
            bedrooms = random.randint(3, 8)
            floor = 0
            base_price_m2 = CITIES[city]["base_price_villa"]
        else:
            surface = random.randint(40, 350)
            rooms = random.randint(1, 8)
            bedrooms = random.randint(1, 4)
            floor = random.randint(0, 12)
            base_price_m2 = CITIES[city]["base_price_apt"]
            
        has_lift = 1 if (prop_type == "Appartement" and floor > 2) or (is_premium_zone and prop_type == "Appartement") else 0
        has_pool = 1 if (prop_type == "Villa" and (is_premium_zone or random.random() > 0.5)) else 0
        has_garden = 1 if (prop_type == "Villa" or (prop_type == "Appartement" and floor == 0 and random.random() > 0.7)) else 0
        parking_spots = random.randint(1, 3) if (is_premium_zone or standing == "Haut Standing") else random.randint(0, 1)
        proximity_tram = 1 if (CITIES[city]["has_tram"] and random.random() > 0.6) else 0
        proximity_university = 1 if (random.random() > 0.7) else 0
        proximity_mosque = 1 if (random.random() > 0.3) else 0
        
        mult = 1.0
        if is_premium_zone: mult *= 1.6
        if neighborhood in ["Souissi", "Anfa", "Hivernage", "Ain Diab"]: mult *= 1.4
        standing_map = {"Haut Standing": 1.5, "Moyen Standing": 1.0, "Economique": 0.6}
        mult *= standing_map[standing]
        if residency == "Résidence fermée & sécurisée": mult *= 1.15
        if orientation == "Sud (Ensoleillé)": mult *= 1.08
        elif orientation == "Nord": mult *= 0.95
        view_map = {"Vue sur mer": 1.35, "Vue sur Parc/Jardin": 1.12, "Sans vis-à-vis": 1.10, "Vue sur rue": 0.95}
        mult *= view_map[view]
        cond_map = {"Neuf": 1.25, "Bon état": 1.0, "A rénover": 0.7}
        mult *= cond_map[condition]
        if proximity_tram == 1: mult *= 1.05
        if proximity_university == 1: mult *= 1.07
        if proximity_mosque == 1: mult *= 1.04
        if has_pool: mult *= 1.2
        if has_garden: mult *= 1.1
        
        price_per_m2 = base_price_m2 * mult
        total_price = (price_per_m2 * surface) * random.uniform(0.97, 1.03)
        
        data.append({
            "City": city,
            "Neighborhood": neighborhood,
            "Type": prop_type,
            "Surface": surface,
            "Rooms": rooms,
            "Bedrooms": bedrooms,
            "Standing": standing,
            "Residency": residency,
            "Orientation": orientation,
            "View": view,
            "Condition": condition,
            "Floor": floor,
            "Lift": int(has_lift),
            "Pool": int(has_pool),
            "Garden": int(has_garden),
            "Parking_Spots": parking_spots,
            "Proximity_Tram": int(proximity_tram),
            "Proximity_University": int(proximity_university),
            "Proximity_Mosque": int(proximity_mosque),
            "Price": round(total_price, -3)
        })
        
    df = pd.DataFrame(data)
    df.to_csv("data.csv", index=False)
    return df

if __name__ == "__main__":
    generate_data(20000)