dcrey7 commited on
Commit
ba08c19
·
0 Parent(s):

feat: initial DVF data pipeline with 6-level price aggregation

Browse files
.gitignore ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ---- Data (raw and intermediate - too large to commit) ----
2
+ data/raw/
3
+ data/processed/
4
+ # NOTE: data/aggregated/ is NOT ignored - those JSON files are needed by the app
5
+
6
+ # ---- Python ----
7
+ __pycache__/
8
+ *.py[cod]
9
+ *.egg-info/
10
+ dist/
11
+ build/
12
+ wheels/
13
+ .eggs/
14
+ *.egg
15
+ .venv/
16
+ venv/
17
+
18
+ # ---- Jupyter ----
19
+ .ipynb_checkpoints/
20
+
21
+ # ---- Environment / secrets ----
22
+ .env
23
+ .env.*
24
+
25
+ # ---- Claude Code / MCP / IDE ----
26
+ .claude/
27
+ .mcp/
28
+ .vscode/
29
+ .idea/
30
+ *.code-workspace
31
+
32
+ # ---- Working notes (not part of the deliverable) ----
33
+ updates/
34
+
35
+ # ---- Reference repos (downloaded for research, not part of project) ----
36
+ explore.data.gouv.fr/
37
+ data-gouv-skill/
38
+ datagouv-mcp/
39
+ stats-explorer-datagouv/
40
+
41
+ # ---- OS ----
42
+ .DS_Store
43
+ Thumbs.db
44
+
45
+ # ---- Misc ----
46
+ *.log
47
+ uv.lock
48
+ .python-version
README.md ADDED
File without changes
data/aggregated/prices_commune.json ADDED
The diff for this file is too large to render. See raw diff
 
data/aggregated/prices_country.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"FR": {"tous": {"median": 2307.7, "mean": 2953.2, "q1": 1361.4, "q3": 3690.4, "volume": 4735156, "confidence": 0.6}, "appartement": {"median": 3106.7, "mean": 3878.9, "q1": 1985.3, "q3": 4742.4, "volume": 1987882, "confidence": 0.645}, "maison": {"median": 1858.8, "mean": 2266.8, "q1": 1112.6, "q3": 2900.8, "volume": 2549352, "confidence": 0.615}, "local": {"median": 1597.9, "mean": 2497.1, "q1": 827.0, "q3": 3000.0, "volume": 197922, "confidence": 0.6}}}
data/aggregated/prices_department.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"01": {"tous": {"median": 2250.0, "mean": 2545.7, "q1": 1500.0, "q3": 3278.7, "volume": 44534, "confidence": 0.684}, "appartement": {"median": 2380.0, "mean": 2771.5, "q1": 1630.9, "q3": 3823.5, "volume": 15712, "confidence": 0.631}, "maison": {"median": 2250.0, "mean": 2488.5, "q1": 1500.0, "q3": 3125.0, "volume": 26914, "confidence": 0.711}, "local": {"median": 1098.4, "mean": 1493.8, "q1": 645.6, "q3": 1839.5, "volume": 1908, "confidence": 0.6}}, "02": {"tous": {"median": 1105.3, "mean": 1215.1, "q1": 749.5, "q3": 1545.5, "volume": 33145, "confidence": 0.712}, "appartement": {"median": 1197.9, "mean": 1298.5, "q1": 921.9, "q3": 1588.8, "volume": 3910, "confidence": 0.777}, "maison": {"median": 1092.1, "mean": 1186.5, "q1": 737.8, "q3": 1535.7, "volume": 28134, "confidence": 0.708}, "local": {"median": 877.2, "mean": 1648.9, "q1": 459.8, "q3": 1777.8, "volume": 1101, "confidence": 0.6}}, "03": {"tous": {"median": 971.7, "mean": 1111.3, "q1": 613.2, "q3": 1437.5, "volume": 27890, "confidence": 0.661}, "appartement": {"median": 1121.5, "mean": 1249.1, "q1": 724.6, "q3": 1578.9, "volume": 7543, "confidence": 0.695}, "maison": {"median": 926.1, "mean": 1058.1, "q1": 588.2, "q3": 1383.5, "volume": 19418, "confidence": 0.657}, "local": {"median": 735.3, "mean": 1105.8, "q1": 416.7, "q3": 1270.5, "volume": 929, "confidence": 0.6}}, "04": {"tous": {"median": 1987.2, "mean": 2106.6, "q1": 1363.6, "q3": 2681.7, "volume": 17062, "confidence": 0.735}, "appartement": {"median": 1890.6, "mean": 1987.9, "q1": 1367.1, "q3": 2493.3, "volume": 7888, "confidence": 0.762}, "maison": {"median": 2170.0, "mean": 2270.8, "q1": 1443.8, "q3": 2891.6, "volume": 8453, "confidence": 0.733}, "local": {"median": 1118.9, "mean": 1480.4, "q1": 687.5, "q3": 1880.0, "volume": 721, "confidence": 0.6}}, "05": {"tous": {"median": 2418.4, "mean": 2596.6, "q1": 1750.0, "q3": 3222.2, "volume": 19480, "confidence": 0.756}, "appartement": {"median": 2473.2, "mean": 2661.5, "q1": 1875.0, "q3": 3254.0, "volume": 12155, "confidence": 0.777}, "maison": {"median": 2433.3, "mean": 2618.8, "q1": 1536.0, "q3": 3333.3, "volume": 5287, "confidence": 0.705}, "local": {"median": 2049.6, "mean": 2151.6, "q1": 1324.8, "q3": 2677.1, "volume": 2038, "confidence": 0.736}}, "06": {"tous": {"median": 4347.8, "mean": 4709.7, "q1": 3220.9, "q3": 5659.1, "volume": 138875, "confidence": 0.776}, "appartement": {"median": 4444.4, "mean": 4801.3, "q1": 3423.9, "q3": 5684.2, "volume": 110545, "confidence": 0.797}, "maison": {"median": 4000.0, "mean": 4622.5, "q1": 2520.2, "q3": 5769.2, "volume": 21801, "confidence": 0.675}, "local": {"median": 2900.0, "mean": 3449.3, "q1": 1829.3, "q3": 4400.0, "volume": 6529, "confidence": 0.645}}, "07": {"tous": {"median": 1634.1, "mean": 1786.4, "q1": 1067.6, "q3": 2338.2, "volume": 23768, "confidence": 0.689}, "appartement": {"median": 1384.6, "mean": 1510.9, "q1": 959.0, "q3": 1916.3, "volume": 6004, "confidence": 0.723}, "maison": {"median": 1804.0, "mean": 1921.8, "q1": 1178.6, "q3": 2481.8, "volume": 16726, "confidence": 0.711}, "local": {"median": 918.6, "mean": 1198.0, "q1": 533.4, "q3": 1500.0, "volume": 1038, "confidence": 0.6}}, "08": {"tous": {"median": 1045.5, "mean": 1154.5, "q1": 684.4, "q3": 1506.8, "volume": 16526, "confidence": 0.685}, "appartement": {"median": 964.3, "mean": 1022.0, "q1": 684.8, "q3": 1288.7, "volume": 3070, "confidence": 0.75}, "maison": {"median": 1089.1, "mean": 1185.5, "q1": 702.5, "q3": 1581.2, "volume": 12847, "confidence": 0.677}, "local": {"median": 692.3, "mean": 1169.0, "q1": 432.4, "q3": 1155.2, "volume": 609, "confidence": 0.6}}, "09": {"tous": {"median": 1277.1, "mean": 1384.5, "q1": 833.3, "q3": 1785.7, "volume": 14126, "confidence": 0.702}, "appartement": {"median": 1296.7, "mean": 1383.2, "q1": 893.8, "q3": 1704.5, "volume": 2343, "confidence": 0.75}, "maison": {"median": 1283.2, "mean": 1394.6, "q1": 833.3, "q3": 1808.8, "volume": 11335, "confidence": 0.696}, "local": {"median": 855.7, "mean": 1136.5, "q1": 500.0, "q3": 1649.5, "volume": 448, "confidence": 0.6}}, "10": {"tous": {"median": 1403.4, "mean": 1492.9, "q1": 944.7, "q3": 1904.8, "volume": 22180, "confidence": 0.726}, "appartement": {"median": 1529.7, "mean": 1649.0, "q1": 1189.2, "q3": 1948.3, "volume": 7506, "confidence": 0.802}, "maison": {"median": 1315.8, "mean": 1418.5, "q1": 844.4, "q3": 1891.0, "volume": 13764, "confidence": 0.682}, "local": {"median": 969.0, "mean": 1331.4, "q1": 559.4, "q3": 1509.4, "volume": 910, "confidence": 0.608}}, "11": {"tous": {"median": 1700.0, "mean": 1939.3, "q1": 1070.9, "q3": 2520.8, "volume": 41904, "confidence": 0.659}, "appartement": {"median": 2031.0, "mean": 2207.6, "q1": 1287.0, "q3": 2910.4, "volume": 12311, "confidence": 0.68}, "maison": {"median": 1608.6, "mean": 1852.0, "q1": 1011.9, "q3": 2355.6, "volume": 28398, "confidence": 0.666}, "local": {"median": 950.0, "mean": 1251.1, "q1": 547.8, "q3": 1571.4, "volume": 1195, "confidence": 0.6}}, "12": {"tous": {"median": 1331.3, "mean": 1435.8, "q1": 821.3, "q3": 1943.5, "volume": 21024, "confidence": 0.663}, "appartement": {"median": 1641.0, "mean": 1641.7, "q1": 1122.4, "q3": 2145.3, "volume": 6345, "confidence": 0.751}, "maison": {"median": 1223.7, "mean": 1371.3, "q1": 760.0, "q3": 1848.8, "volume": 13774, "confidence": 0.644}, "local": {"median": 755.4, "mean": 973.0, "q1": 429.4, "q3": 1235.3, "volume": 905, "confidence": 0.6}}, "13": {"tous": {"median": 3283.6, "mean": 3552.7, "q1": 2302.6, "q3": 4406.8, "volume": 149587, "confidence": 0.744}, "appartement": {"median": 3112.2, "mean": 3348.8, "q1": 2220.0, "q3": 4184.6, "volume": 96487, "confidence": 0.748}, "maison": {"median": 3798.1, "mean": 4132.5, "q1": 2830.9, "q3": 4955.2, "volume": 45523, "confidence": 0.776}, "local": {"median": 2064.3, "mean": 2665.1, "q1": 1269.2, "q3": 3271.0, "volume": 7577, "confidence": 0.612}}, "14": {"tous": {"median": 2400.0, "mean": 2644.1, "q1": 1481.5, "q3": 3381.0, "volume": 64499, "confidence": 0.683}, "appartement": {"median": 3015.9, "mean": 3312.5, "q1": 2191.0, "q3": 4105.3, "volume": 25762, "confidence": 0.746}, "maison": {"median": 1988.8, "mean": 2193.3, "q1": 1137.2, "q3": 2830.2, "volume": 36108, "confidence": 0.659}, "local": {"median": 1785.7, "mean": 2286.8, "q1": 894.6, "q3": 2947.4, "volume": 2629, "confidence": 0.6}}, "15": {"tous": {"median": 1098.6, "mean": 1216.9, "q1": 700.0, "q3": 1586.2, "volume": 10635, "confidence": 0.677}, "appartement": {"median": 1210.9, "mean": 1292.1, "q1": 883.9, "q3": 1575.5, "volume": 2845, "confidence": 0.772}, "maison": {"median": 1060.7, "mean": 1196.2, "q1": 664.7, "q3": 1612.1, "volume": 7399, "confidence": 0.643}, "local": {"median": 633.3, "mean": 1060.6, "q1": 388.3, "q3": 1044.8, "volume": 391, "confidence": 0.6}}, "16": {"tous": {"median": 1170.0, "mean": 1268.0, "q1": 757.5, "q3": 1663.0, "volume": 28466, "confidence": 0.69}, "appartement": {"median": 1486.5, "mean": 1529.6, "q1": 1156.2, "q3": 1807.0, "volume": 2841, "confidence": 0.825}, "maison": {"median": 1131.6, "mean": 1241.9, "q1": 739.6, "q3": 1641.8, "volume": 24709, "confidence": 0.681}, "local": {"median": 779.0, "mean": 1162.0, "q1": 439.7, "q3": 1294.0, "volume": 916, "confidence": 0.6}}, "17": {"tous": {"median": 2538.5, "mean": 2917.4, "q1": 1501.8, "q3": 3898.6, "volume": 68098, "confidence": 0.622}, "appartement": {"median": 3692.3, "mean": 3784.1, "q1": 2398.4, "q3": 4963.0, "volume": 14112, "confidence": 0.722}, "maison": {"median": 2307.7, "mean": 2706.4, "q1": 1358.0, "q3": 3542.2, "volume": 51383, "confidence": 0.621}, "local": {"median": 1817.2, "mean": 2383.2, "q1": 945.9, "q3": 3302.3, "volume": 2603, "confidence": 0.6}}, "18": {"tous": {"median": 1000.0, "mean": 1128.5, "q1": 647.5, "q3": 1489.8, "volume": 24469, "confidence": 0.663}, "appartement": {"median": 1282.0, "mean": 1312.1, "q1": 865.4, "q3": 1698.1, "volume": 3982, "confidence": 0.74}, "maison": {"median": 958.3, "mean": 1098.8, "q1": 626.9, "q3": 1433.7, "volume": 19736, "confidence": 0.663}, "local": {"median": 700.0, "mean": 937.1, "q1": 400.0, "q3": 1150.4, "volume": 751, "confidence": 0.6}}, "19": {"tous": {"median": 1132.4, "mean": 1261.2, "q1": 721.2, "q3": 1666.7, "volume": 18577, "confidence": 0.666}, "appartement": {"median": 1300.0, "mean": 1360.8, "q1": 873.0, "q3": 1755.6, "volume": 4063, "confidence": 0.728}, "maison": {"median": 1094.6, "mean": 1248.8, "q1": 714.3, "q3": 1650.9, "volume": 13885, "confidence": 0.658}, "local": {"median": 666.7, "mean": 892.0, "q1": 407.6, "q3": 1100.0, "volume": 629, "confidence": 0.6}}, "21": {"tous": {"median": 2166.7, "mean": 2181.1, "q1": 1371.8, "q3": 2833.3, "volume": 43798, "confidence": 0.73}, "appartement": {"median": 2416.7, "mean": 2417.0, "q1": 1831.3, "q3": 2971.4, "volume": 20913, "confidence": 0.811}, "maison": {"median": 1852.6, "mean": 1970.9, "q1": 1076.4, "q3": 2666.7, "volume": 20961, "confidence": 0.657}, "local": {"median": 1500.0, "mean": 1906.1, "q1": 774.2, "q3": 2368.4, "volume": 1924, "confidence": 0.6}}, "22": {"tous": {"median": 1574.6, "mean": 1793.6, "q1": 967.7, "q3": 2311.8, "volume": 55291, "confidence": 0.659}, "appartement": {"median": 1958.3, "mean": 2244.5, "q1": 1426.8, "q3": 2758.6, "volume": 9533, "confidence": 0.728}, "maison": {"median": 1500.0, "mean": 1711.2, "q1": 911.5, "q3": 2226.1, "volume": 43708, "confidence": 0.649}, "local": {"median": 1027.4, "mean": 1453.6, "q1": 575.5, "q3": 1866.7, "volume": 2050, "confidence": 0.6}}, "23": {"tous": {"median": 710.9, "mean": 831.5, "q1": 461.5, "q3": 1050.0, "volume": 10623, "confidence": 0.669}, "appartement": {"median": 714.3, "mean": 761.4, "q1": 486.9, "q3": 971.8, "volume": 752, "confidence": 0.728}, "maison": {"median": 714.3, "mean": 838.7, "q1": 463.9, "q3": 1060.6, "volume": 9687, "confidence": 0.666}, "local": {"median": 473.1, "mean": 740.0, "q1": 326.1, "q3": 833.3, "volume": 184, "confidence": 0.6}}, "24": {"tous": {"median": 1214.3, "mean": 1359.1, "q1": 808.1, "q3": 1727.3, "volume": 36883, "confidence": 0.697}, "appartement": {"median": 1478.4, "mean": 1527.9, "q1": 1162.2, "q3": 1822.6, "volume": 4927, "confidence": 0.821}, "maison": {"median": 1166.7, "mean": 1330.9, "q1": 784.7, "q3": 1705.7, "volume": 30766, "confidence": 0.684}, "local": {"median": 854.5, "mean": 1389.3, "q1": 488.9, "q3": 1486.5, "volume": 1190, "confidence": 0.6}}, "25": {"tous": {"median": 1830.2, "mean": 1916.2, "q1": 1194.6, "q3": 2500.0, "volume": 38158, "confidence": 0.715}, "appartement": {"median": 1906.2, "mean": 1953.0, "q1": 1290.9, "q3": 2528.6, "volume": 20055, "confidence": 0.74}, "maison": {"median": 1800.0, "mean": 1927.4, "q1": 1180.8, "q3": 2509.3, "volume": 16302, "confidence": 0.705}, "local": {"median": 1136.4, "mean": 1406.0, "q1": 615.4, "q3": 1796.4, "volume": 1801, "confidence": 0.6}}, "26": {"tous": {"median": 1927.5, "mean": 2016.8, "q1": 1366.3, "q3": 2551.0, "volume": 37930, "confidence": 0.754}, "appartement": {"median": 1760.0, "mean": 1841.8, "q1": 1338.7, "q3": 2263.2, "volume": 13998, "confidence": 0.79}, "maison": {"median": 2133.3, "mean": 2170.1, "q1": 1478.7, "q3": 2714.3, "volume": 21876, "confidence": 0.768}, "local": {"median": 1192.6, "mean": 1576.9, "q1": 703.1, "q3": 1921.8, "volume": 2056, "confidence": 0.6}}, "27": {"tous": {"median": 1515.5, "mean": 1613.4, "q1": 1006.4, "q3": 2084.3, "volume": 42426, "confidence": 0.716}, "appartement": {"median": 1746.3, "mean": 1812.5, "q1": 1372.5, "q3": 2197.4, "volume": 5919, "confidence": 0.811}, "maison": {"median": 1470.2, "mean": 1590.2, "q1": 977.3, "q3": 2071.4, "volume": 35334, "confidence": 0.702}, "local": {"median": 960.7, "mean": 1309.6, "q1": 535.7, "q3": 1625.0, "volume": 1173, "confidence": 0.6}}, "28": {"tous": {"median": 1655.6, "mean": 1742.6, "q1": 1075.9, "q3": 2275.6, "volume": 31209, "confidence": 0.71}, "appartement": {"median": 2059.5, "mean": 2043.8, "q1": 1436.4, "q3": 2600.0, "volume": 6087, "confidence": 0.774}, "maison": {"median": 1575.8, "mean": 1674.2, "q1": 1026.5, "q3": 2182.7, "volume": 24261, "confidence": 0.707}, "local": {"median": 994.9, "mean": 1539.4, "q1": 555.6, "q3": 1750.0, "volume": 861, "confidence": 0.6}}, "29": {"tous": {"median": 1818.2, "mean": 1924.9, "q1": 1231.9, "q3": 2432.4, "volume": 78321, "confidence": 0.736}, "appartement": {"median": 1971.8, "mean": 2094.8, "q1": 1529.8, "q3": 2515.0, "volume": 23098, "confidence": 0.8}, "maison": {"median": 1758.9, "mean": 1885.9, "q1": 1148.5, "q3": 2420.0, "volume": 52020, "confidence": 0.711}, "local": {"median": 1103.4, "mean": 1332.8, "q1": 631.6, "q3": 1797.9, "volume": 3203, "confidence": 0.6}}, "2A": {"tous": {"median": 3580.2, "mean": 3935.7, "q1": 2414.2, "q3": 4993.6, "volume": 11405, "confidence": 0.712}, "appartement": {"median": 3623.1, "mean": 3796.1, "q1": 2608.7, "q3": 4821.4, "volume": 6834, "confidence": 0.756}, "maison": {"median": 3812.6, "mean": 4459.6, "q1": 2271.9, "q3": 5700.0, "volume": 3746, "confidence": 0.64}, "local": {"median": 2411.8, "mean": 2713.4, "q1": 1428.6, "q3": 3555.6, "volume": 825, "confidence": 0.647}}, "2B": {"tous": {"median": 2894.7, "mean": 3047.9, "q1": 2000.0, "q3": 3800.0, "volume": 11685, "confidence": 0.751}, "appartement": {"median": 2924.5, "mean": 3051.2, "q1": 2131.9, "q3": 3777.8, "volume": 7342, "confidence": 0.775}, "maison": {"median": 2963.2, "mean": 3179.1, "q1": 1859.0, "q3": 3966.5, "volume": 3753, "confidence": 0.716}, "local": {"median": 1916.7, "mean": 2171.6, "q1": 1048.2, "q3": 2777.8, "volume": 590, "confidence": 0.639}}, "30": {"tous": {"median": 2176.1, "mean": 2382.8, "q1": 1415.4, "q3": 2994.0, "volume": 61106, "confidence": 0.71}, "appartement": {"median": 2066.7, "mean": 2448.3, "q1": 1325.0, "q3": 3075.0, "volume": 23317, "confidence": 0.661}, "maison": {"median": 2297.3, "mean": 2399.3, "q1": 1548.7, "q3": 3000.0, "volume": 35239, "confidence": 0.747}, "local": {"median": 1286.5, "mean": 1556.1, "q1": 760.0, "q3": 2000.0, "volume": 2550, "confidence": 0.614}}, "31": {"tous": {"median": 2718.4, "mean": 2828.5, "q1": 1972.2, "q3": 3468.1, "volume": 105201, "confidence": 0.78}, "appartement": {"median": 2875.0, "mean": 3069.4, "q1": 2295.7, "q3": 3673.8, "volume": 54728, "confidence": 0.808}, "maison": {"median": 2543.9, "mean": 2585.7, "q1": 1588.2, "q3": 3263.7, "volume": 46518, "confidence": 0.737}, "local": {"median": 1882.0, "mean": 2351.1, "q1": 1153.8, "q3": 2925.0, "volume": 3955, "confidence": 0.624}}, "32": {"tous": {"median": 1294.0, "mean": 1412.3, "q1": 873.2, "q3": 1804.6, "volume": 14858, "confidence": 0.712}, "appartement": {"median": 1445.3, "mean": 1473.4, "q1": 1125.0, "q3": 1779.2, "volume": 2273, "confidence": 0.819}, "maison": {"median": 1275.2, "mean": 1419.0, "q1": 854.2, "q3": 1830.1, "volume": 12124, "confidence": 0.694}, "local": {"median": 714.3, "mean": 935.8, "q1": 435.7, "q3": 1134.0, "volume": 461, "confidence": 0.609}}, "33": {"tous": {"median": 3351.1, "mean": 3543.5, "q1": 2126.3, "q3": 4464.3, "volume": 126310, "confidence": 0.721}, "appartement": {"median": 3776.3, "mean": 4001.3, "q1": 2944.4, "q3": 4736.1, "volume": 47692, "confidence": 0.81}, "maison": {"median": 2976.2, "mean": 3286.0, "q1": 1717.6, "q3": 4268.3, "volume": 74176, "confidence": 0.657}, "local": {"median": 2299.4, "mean": 2929.6, "q1": 1321.2, "q3": 3788.4, "volume": 4442, "confidence": 0.6}}, "34": {"tous": {"median": 2940.7, "mean": 3074.1, "q1": 1944.4, "q3": 3948.3, "volume": 118740, "confidence": 0.727}, "appartement": {"median": 3205.9, "mean": 3315.5, "q1": 2232.1, "q3": 4166.7, "volume": 63508, "confidence": 0.759}, "maison": {"median": 2698.8, "mean": 2857.4, "q1": 1751.3, "q3": 3705.1, "volume": 49976, "confidence": 0.71}, "local": {"median": 1902.8, "mean": 2216.6, "q1": 1145.0, "q3": 2875.4, "volume": 5256, "confidence": 0.636}}, "35": {"tous": {"median": 2562.5, "mean": 2768.9, "q1": 1688.2, "q3": 3550.0, "volume": 80001, "confidence": 0.709}, "appartement": {"median": 3238.7, "mean": 3415.0, "q1": 2500.0, "q3": 4156.2, "volume": 31218, "confidence": 0.795}, "maison": {"median": 2117.6, "mean": 2379.2, "q1": 1321.8, "q3": 2961.5, "volume": 45505, "confidence": 0.69}, "local": {"median": 1598.7, "mean": 2026.1, "q1": 914.3, "q3": 2661.3, "volume": 3278, "confidence": 0.6}}, "36": {"tous": {"median": 895.5, "mean": 1003.7, "q1": 589.3, "q3": 1289.5, "volume": 17022, "confidence": 0.687}, "appartement": {"median": 1026.4, "mean": 1059.4, "q1": 771.9, "q3": 1278.5, "volume": 1613, "confidence": 0.803}, "maison": {"median": 884.1, "mean": 999.5, "q1": 584.0, "q3": 1294.1, "volume": 14964, "confidence": 0.679}, "local": {"median": 598.5, "mean": 945.6, "q1": 370.4, "q3": 1031.2, "volume": 445, "confidence": 0.6}}, "37": {"tous": {"median": 2069.5, "mean": 2170.6, "q1": 1291.4, "q3": 2844.4, "volume": 45530, "confidence": 0.7}, "appartement": {"median": 2579.9, "mean": 2586.9, "q1": 1899.0, "q3": 3179.5, "volume": 14294, "confidence": 0.801}, "maison": {"median": 1822.9, "mean": 1996.7, "q1": 1117.6, "q3": 2628.6, "volume": 29305, "confidence": 0.668}, "local": {"median": 1241.7, "mean": 1727.5, "q1": 681.2, "q3": 2173.9, "volume": 1931, "confidence": 0.6}}, "38": {"tous": {"median": 2420.1, "mean": 2571.7, "q1": 1724.5, "q3": 3197.7, "volume": 93703, "confidence": 0.757}, "appartement": {"median": 2443.8, "mean": 2628.5, "q1": 1839.5, "q3": 3166.4, "volume": 49243, "confidence": 0.783}, "maison": {"median": 2500.0, "mean": 2612.3, "q1": 1700.4, "q3": 3309.9, "volume": 39048, "confidence": 0.742}, "local": {"median": 1408.5, "mean": 1762.5, "q1": 882.4, "q3": 2255.7, "volume": 5412, "confidence": 0.61}}, "39": {"tous": {"median": 1342.8, "mean": 1485.6, "q1": 864.7, "q3": 1902.7, "volume": 17911, "confidence": 0.691}, "appartement": {"median": 1327.4, "mean": 1487.3, "q1": 911.9, "q3": 1834.6, "volume": 5888, "confidence": 0.722}, "maison": {"median": 1400.0, "mean": 1516.9, "q1": 884.6, "q3": 1959.6, "volume": 11266, "confidence": 0.693}, "local": {"median": 719.3, "mean": 1006.4, "q1": 408.2, "q3": 1220.3, "volume": 757, "confidence": 0.6}}, "40": {"tous": {"median": 2359.1, "mean": 2836.0, "q1": 1553.8, "q3": 3565.2, "volume": 38674, "confidence": 0.659}, "appartement": {"median": 2851.9, "mean": 3379.5, "q1": 2083.3, "q3": 4266.7, "volume": 11301, "confidence": 0.694}, "maison": {"median": 2195.1, "mean": 2661.5, "q1": 1386.8, "q3": 3333.3, "volume": 25876, "confidence": 0.645}, "local": {"median": 1486.5, "mean": 1748.3, "q1": 803.1, "q3": 2410.0, "volume": 1497, "confidence": 0.6}}, "41": {"tous": {"median": 1250.0, "mean": 1360.6, "q1": 813.3, "q3": 1764.7, "volume": 25118, "confidence": 0.696}, "appartement": {"median": 1646.1, "mean": 1628.2, "q1": 1193.8, "q3": 2023.8, "volume": 3583, "confidence": 0.798}, "maison": {"median": 1194.4, "mean": 1320.2, "q1": 791.1, "q3": 1703.7, "volume": 20577, "confidence": 0.694}, "local": {"median": 844.8, "mean": 1228.8, "q1": 466.7, "q3": 1400.0, "volume": 958, "confidence": 0.6}}, "42": {"tous": {"median": 1333.3, "mean": 1509.5, "q1": 949.0, "q3": 1935.5, "volume": 55648, "confidence": 0.704}, "appartement": {"median": 1214.3, "mean": 1321.8, "q1": 925.0, "q3": 1592.1, "volume": 27539, "confidence": 0.78}, "maison": {"median": 1674.9, "mean": 1778.4, "q1": 1094.3, "q3": 2347.0, "volume": 25044, "confidence": 0.701}, "local": {"median": 795.5, "mean": 999.7, "q1": 483.9, "q3": 1220.3, "volume": 3065, "confidence": 0.63}}, "43": {"tous": {"median": 1159.1, "mean": 1282.4, "q1": 756.1, "q3": 1688.2, "volume": 15789, "confidence": 0.678}, "appartement": {"median": 1227.3, "mean": 1289.2, "q1": 898.9, "q3": 1636.4, "volume": 3591, "confidence": 0.76}, "maison": {"median": 1160.7, "mean": 1304.4, "q1": 750.0, "q3": 1740.0, "volume": 11553, "confidence": 0.659}, "local": {"median": 633.6, "mean": 850.0, "q1": 393.4, "q3": 1034.5, "volume": 645, "confidence": 0.6}}, "44": {"tous": {"median": 3030.3, "mean": 3188.0, "q1": 2086.5, "q3": 3962.3, "volume": 113530, "confidence": 0.752}, "appartement": {"median": 3475.7, "mean": 3679.6, "q1": 2693.9, "q3": 4328.4, "volume": 42694, "confidence": 0.812}, "maison": {"median": 2750.0, "mean": 2937.6, "q1": 1776.9, "q3": 3695.7, "volume": 66161, "confidence": 0.721}, "local": {"median": 1826.5, "mean": 2241.7, "q1": 1033.6, "q3": 2894.7, "volume": 4675, "confidence": 0.6}}, "45": {"tous": {"median": 1692.3, "mean": 1808.8, "q1": 1088.9, "q3": 2348.5, "volume": 48866, "confidence": 0.702}, "appartement": {"median": 2118.6, "mean": 2176.1, "q1": 1553.2, "q3": 2750.5, "volume": 13805, "confidence": 0.774}, "maison": {"median": 1524.3, "mean": 1654.4, "q1": 983.5, "q3": 2157.6, "volume": 33253, "confidence": 0.692}, "local": {"median": 1219.8, "mean": 1844.8, "q1": 648.0, "q3": 2164.3, "volume": 1808, "confidence": 0.6}}, "46": {"tous": {"median": 1234.5, "mean": 1361.3, "q1": 844.7, "q3": 1740.0, "volume": 14418, "confidence": 0.71}, "appartement": {"median": 1333.3, "mean": 1346.5, "q1": 997.1, "q3": 1666.7, "volume": 1923, "confidence": 0.799}, "maison": {"median": 1232.5, "mean": 1380.8, "q1": 846.4, "q3": 1775.7, "volume": 11956, "confidence": 0.698}, "local": {"median": 764.9, "mean": 980.6, "q1": 455.3, "q3": 1237.0, "volume": 539, "confidence": 0.6}}, "47": {"tous": {"median": 1196.1, "mean": 1322.2, "q1": 797.2, "q3": 1701.4, "volume": 27456, "confidence": 0.698}, "appartement": {"median": 1359.9, "mean": 1402.5, "q1": 1000.0, "q3": 1721.9, "volume": 4764, "confidence": 0.788}, "maison": {"median": 1166.7, "mean": 1299.4, "q1": 778.3, "q3": 1702.1, "volume": 21627, "confidence": 0.683}, "local": {"median": 784.3, "mean": 1425.4, "q1": 465.3, "q3": 1428.6, "volume": 1065, "confidence": 0.6}}, "48": {"tous": {"median": 1250.0, "mean": 1382.7, "q1": 810.0, "q3": 1820.4, "volume": 4885, "confidence": 0.677}, "appartement": {"median": 1195.1, "mean": 1270.7, "q1": 789.5, "q3": 1689.6, "volume": 1027, "confidence": 0.699}, "maison": {"median": 1301.4, "mean": 1437.7, "q1": 846.2, "q3": 1863.6, "volume": 3691, "confidence": 0.687}, "local": {"median": 646.0, "mean": 855.3, "q1": 388.7, "q3": 1058.8, "volume": 167, "confidence": 0.6}}, "49": {"tous": {"median": 1936.9, "mean": 2076.8, "q1": 1242.4, "q3": 2678.6, "volume": 56014, "confidence": 0.703}, "appartement": {"median": 2625.6, "mean": 2705.0, "q1": 1895.2, "q3": 3421.1, "volume": 14293, "confidence": 0.768}, "maison": {"median": 1761.4, "mean": 1866.7, "q1": 1136.4, "q3": 2398.6, "volume": 39175, "confidence": 0.713}, "local": {"median": 1139.1, "mean": 1782.6, "q1": 596.2, "q3": 2227.3, "volume": 2546, "confidence": 0.6}}, "50": {"tous": {"median": 1500.0, "mean": 1714.5, "q1": 896.9, "q3": 2234.5, "volume": 39558, "confidence": 0.643}, "appartement": {"median": 1964.3, "mean": 2239.8, "q1": 1431.6, "q3": 2769.2, "volume": 5510, "confidence": 0.728}, "maison": {"median": 1422.8, "mean": 1643.6, "q1": 852.0, "q3": 2162.8, "volume": 32803, "confidence": 0.631}, "local": {"median": 942.6, "mean": 1259.3, "q1": 535.7, "q3": 1597.2, "volume": 1245, "confidence": 0.6}}, "51": {"tous": {"median": 1933.3, "mean": 2047.2, "q1": 1304.3, "q3": 2625.0, "volume": 37107, "confidence": 0.727}, "appartement": {"median": 2184.6, "mean": 2285.0, "q1": 1590.9, "q3": 2869.0, "volume": 15037, "confidence": 0.766}, "maison": {"median": 1779.5, "mean": 1887.6, "q1": 1162.8, "q3": 2422.7, "volume": 20414, "confidence": 0.717}, "local": {"median": 1272.2, "mean": 1856.1, "q1": 722.2, "q3": 2352.9, "volume": 1656, "confidence": 0.6}}, "52": {"tous": {"median": 887.0, "mean": 968.8, "q1": 535.7, "q3": 1312.5, "volume": 10684, "confidence": 0.65}, "appartement": {"median": 1010.8, "mean": 1010.0, "q1": 763.6, "q3": 1236.6, "volume": 1364, "confidence": 0.813}, "maison": {"median": 860.2, "mean": 970.4, "q1": 517.7, "q3": 1343.8, "volume": 8989, "confidence": 0.616}, "local": {"median": 625.0, "mean": 757.2, "q1": 353.7, "q3": 1000.0, "volume": 331, "confidence": 0.6}}, "53": {"tous": {"median": 1296.3, "mean": 1376.1, "q1": 790.0, "q3": 1851.5, "volume": 23269, "confidence": 0.672}, "appartement": {"median": 1698.1, "mean": 1724.4, "q1": 1242.4, "q3": 2150.5, "volume": 3026, "confidence": 0.786}, "maison": {"median": 1243.1, "mean": 1330.3, "q1": 759.5, "q3": 1796.6, "volume": 19550, "confidence": 0.666}, "local": {"median": 800.0, "mean": 1149.0, "q1": 463.9, "q3": 1320.0, "volume": 693, "confidence": 0.6}}, "54": {"tous": {"median": 1733.8, "mean": 1784.4, "q1": 1145.8, "q3": 2314.3, "volume": 52852, "confidence": 0.73}, "appartement": {"median": 1803.6, "mean": 1829.3, "q1": 1258.8, "q3": 2327.5, "volume": 22396, "confidence": 0.763}, "maison": {"median": 1708.8, "mean": 1770.6, "q1": 1100.0, "q3": 2321.0, "volume": 28474, "confidence": 0.714}, "local": {"median": 1129.3, "mean": 1475.6, "q1": 650.0, "q3": 1842.1, "volume": 1982, "confidence": 0.6}}, "55": {"tous": {"median": 905.3, "mean": 1005.5, "q1": 576.9, "q3": 1333.3, "volume": 11660, "confidence": 0.666}, "appartement": {"median": 949.7, "mean": 989.6, "q1": 656.6, "q3": 1240.7, "volume": 1636, "confidence": 0.754}, "maison": {"median": 900.0, "mean": 1010.3, "q1": 568.4, "q3": 1355.6, "volume": 9736, "confidence": 0.65}, "local": {"median": 690.2, "mean": 931.1, "q1": 404.2, "q3": 1142.9, "volume": 288, "confidence": 0.6}}, "56": {"tous": {"median": 2264.4, "mean": 2529.1, "q1": 1300.0, "q3": 3375.0, "volume": 66390, "confidence": 0.633}, "appartement": {"median": 3012.3, "mean": 3213.2, "q1": 2152.5, "q3": 3981.5, "volume": 16777, "confidence": 0.757}, "maison": {"median": 2024.2, "mean": 2336.4, "q1": 1128.7, "q3": 3113.4, "volume": 46761, "confidence": 0.608}, "local": {"median": 1300.6, "mean": 1665.7, "q1": 733.3, "q3": 2200.0, "volume": 2852, "confidence": 0.6}}, "58": {"tous": {"median": 888.9, "mean": 1003.4, "q1": 578.9, "q3": 1277.8, "volume": 18420, "confidence": 0.686}, "appartement": {"median": 896.3, "mean": 923.1, "q1": 629.1, "q3": 1155.2, "volume": 2858, "confidence": 0.765}, "maison": {"median": 892.9, "mean": 1013.8, "q1": 579.7, "q3": 1318.0, "volume": 15040, "confidence": 0.669}, "local": {"median": 600.0, "mean": 1143.6, "q1": 359.0, "q3": 1060.6, "volume": 522, "confidence": 0.6}}, "59": {"tous": {"median": 1950.0, "mean": 2177.8, "q1": 1273.5, "q3": 2833.3, "volume": 168756, "confidence": 0.68}, "appartement": {"median": 2617.2, "mean": 2789.7, "q1": 1753.6, "q3": 3630.8, "volume": 44138, "confidence": 0.713}, "maison": {"median": 1788.1, "mean": 1962.1, "q1": 1184.2, "q3": 2531.9, "volume": 118578, "confidence": 0.699}, "local": {"median": 1305.8, "mean": 1940.1, "q1": 701.8, "q3": 2457.6, "volume": 6040, "confidence": 0.6}}, "60": {"tous": {"median": 2075.0, "mean": 2207.1, "q1": 1456.5, "q3": 2753.0, "volume": 52270, "confidence": 0.75}, "appartement": {"median": 2200.0, "mean": 2360.9, "q1": 1580.0, "q3": 2985.1, "volume": 12578, "confidence": 0.745}, "maison": {"median": 2058.8, "mean": 2162.1, "q1": 1437.5, "q3": 2691.5, "volume": 38044, "confidence": 0.756}, "local": {"median": 1475.4, "mean": 2070.4, "q1": 803.6, "q3": 2540.8, "volume": 1648, "confidence": 0.6}}, "61": {"tous": {"median": 1015.0, "mean": 1140.1, "q1": 655.0, "q3": 1470.0, "volume": 21482, "confidence": 0.679}, "appartement": {"median": 1155.6, "mean": 1250.9, "q1": 846.2, "q3": 1566.0, "volume": 2366, "confidence": 0.751}, "maison": {"median": 1000.0, "mean": 1136.1, "q1": 650.0, "q3": 1467.5, "volume": 18455, "confidence": 0.673}, "local": {"median": 587.8, "mean": 854.5, "q1": 357.1, "q3": 973.5, "volume": 661, "confidence": 0.6}}, "62": {"tous": {"median": 1569.3, "mean": 1862.4, "q1": 1098.9, "q3": 2125.2, "volume": 89090, "confidence": 0.738}, "appartement": {"median": 2074.1, "mean": 2842.0, "q1": 1410.7, "q3": 3333.3, "volume": 15170, "confidence": 0.629}, "maison": {"median": 1516.1, "mean": 1664.9, "q1": 1070.8, "q3": 2000.0, "volume": 71120, "confidence": 0.755}, "local": {"median": 978.6, "mean": 1569.8, "q1": 555.6, "q3": 1737.9, "volume": 2800, "confidence": 0.6}}, "63": {"tous": {"median": 1734.7, "mean": 1777.4, "q1": 1052.2, "q3": 2371.1, "volume": 48373, "confidence": 0.696}, "appartement": {"median": 2011.3, "mean": 2048.6, "q1": 1550.0, "q3": 2512.2, "volume": 17759, "confidence": 0.809}, "maison": {"median": 1484.5, "mean": 1639.8, "q1": 866.7, "q3": 2272.7, "volume": 28830, "confidence": 0.621}, "local": {"median": 1022.5, "mean": 1300.8, "q1": 591.1, "q3": 1696.4, "volume": 1784, "confidence": 0.6}}, "64": {"tous": {"median": 2444.4, "mean": 3210.6, "q1": 1574.1, "q3": 4230.8, "volume": 57491, "confidence": 0.6}, "appartement": {"median": 3068.0, "mean": 3726.5, "q1": 1942.9, "q3": 4863.9, "volume": 31645, "confidence": 0.619}, "maison": {"median": 2020.0, "mean": 2642.7, "q1": 1229.5, "q3": 3082.8, "volume": 22995, "confidence": 0.633}, "local": {"median": 1513.3, "mean": 2064.8, "q1": 819.0, "q3": 2546.5, "volume": 2851, "confidence": 0.6}}, "65": {"tous": {"median": 1495.3, "mean": 1704.3, "q1": 1036.6, "q3": 2122.0, "volume": 21988, "confidence": 0.71}, "appartement": {"median": 1550.0, "mean": 1860.7, "q1": 1111.1, "q3": 2450.0, "volume": 9970, "confidence": 0.654}, "maison": {"median": 1497.5, "mean": 1594.0, "q1": 1013.6, "q3": 2000.0, "volume": 11159, "confidence": 0.737}, "local": {"median": 763.6, "mean": 1322.8, "q1": 448.2, "q3": 1250.0, "volume": 859, "confidence": 0.6}}, "66": {"tous": {"median": 2168.7, "mean": 2349.5, "q1": 1421.9, "q3": 3019.9, "volume": 60224, "confidence": 0.705}, "appartement": {"median": 2073.2, "mean": 2341.5, "q1": 1358.8, "q3": 3095.2, "volume": 27223, "confidence": 0.665}, "maison": {"median": 2295.6, "mean": 2416.5, "q1": 1573.1, "q3": 3013.9, "volume": 30564, "confidence": 0.749}, "local": {"median": 1333.3, "mean": 1597.0, "q1": 791.7, "q3": 2076.9, "volume": 2437, "confidence": 0.614}}, "69": {"tous": {"median": 3651.7, "mean": 3743.9, "q1": 2547.6, "q3": 4733.3, "volume": 123788, "confidence": 0.761}, "appartement": {"median": 3846.2, "mean": 3933.0, "q1": 2839.2, "q3": 4889.7, "volume": 84201, "confidence": 0.787}, "maison": {"median": 3333.3, "mean": 3488.7, "q1": 2154.8, "q3": 4436.6, "volume": 32288, "confidence": 0.726}, "local": {"median": 2077.5, "mean": 2692.4, "q1": 1210.7, "q3": 3481.2, "volume": 7299, "confidence": 0.6}}, "70": {"tous": {"median": 972.8, "mean": 1083.9, "q1": 634.9, "q3": 1401.6, "volume": 14482, "confidence": 0.685}, "appartement": {"median": 950.0, "mean": 989.4, "q1": 678.6, "q3": 1225.8, "volume": 2695, "confidence": 0.77}, "maison": {"median": 1000.0, "mean": 1119.1, "q1": 645.2, "q3": 1478.8, "volume": 11312, "confidence": 0.667}, "local": {"median": 615.4, "mean": 782.9, "q1": 382.7, "q3": 978.6, "volume": 475, "confidence": 0.613}}, "71": {"tous": {"median": 1180.6, "mean": 1316.5, "q1": 783.1, "q3": 1707.3, "volume": 40251, "confidence": 0.687}, "appartement": {"median": 1191.2, "mean": 1286.3, "q1": 857.1, "q3": 1616.6, "volume": 10310, "confidence": 0.745}, "maison": {"median": 1196.6, "mean": 1339.9, "q1": 781.2, "q3": 1763.7, "volume": 28356, "confidence": 0.672}, "local": {"median": 737.2, "mean": 1094.2, "q1": 434.8, "q3": 1266.7, "volume": 1585, "confidence": 0.6}}, "72": {"tous": {"median": 1366.3, "mean": 1443.3, "q1": 864.3, "q3": 1916.7, "volume": 42319, "confidence": 0.692}, "appartement": {"median": 1671.5, "mean": 1739.4, "q1": 1259.3, "q3": 2131.1, "volume": 7772, "confidence": 0.791}, "maison": {"median": 1290.9, "mean": 1386.7, "q1": 809.5, "q3": 1860.5, "volume": 33115, "confidence": 0.674}, "local": {"median": 862.9, "mean": 1145.1, "q1": 488.4, "q3": 1396.6, "volume": 1432, "confidence": 0.6}}, "73": {"tous": {"median": 3159.2, "mean": 3778.2, "q1": 2142.9, "q3": 4500.0, "volume": 47162, "confidence": 0.702}, "appartement": {"median": 3399.9, "mean": 4094.0, "q1": 2414.9, "q3": 4778.6, "volume": 31057, "confidence": 0.722}, "maison": {"median": 2745.0, "mean": 3178.9, "q1": 1787.1, "q3": 3951.9, "volume": 13000, "confidence": 0.685}, "local": {"median": 2138.9, "mean": 3127.9, "q1": 1156.6, "q3": 4027.8, "volume": 3105, "confidence": 0.6}}, "74": {"tous": {"median": 4000.0, "mean": 4407.2, "q1": 2979.6, "q3": 5240.8, "volume": 77027, "confidence": 0.774}, "appartement": {"median": 4023.8, "mean": 4411.1, "q1": 3080.4, "q3": 5227.3, "volume": 51353, "confidence": 0.787}, "maison": {"median": 4137.0, "mean": 4624.3, "q1": 2981.5, "q3": 5429.3, "volume": 22456, "confidence": 0.763}, "local": {"median": 2307.7, "mean": 2830.7, "q1": 1351.7, "q3": 3571.4, "volume": 3218, "confidence": 0.615}}, "75": {"tous": {"median": 10284.5, "mean": 10351.6, "q1": 8541.6, "q3": 12083.3, "volume": 159360, "confidence": 0.862}, "appartement": {"median": 10362.8, "mean": 10477.6, "q1": 8709.5, "q3": 12106.8, "volume": 147088, "confidence": 0.869}, "maison": {"median": 13457.8, "mean": 13777.4, "q1": 10400.0, "q3": 17260.3, "volume": 652, "confidence": 0.796}, "local": {"median": 8255.6, "mean": 8564.9, "q1": 5411.0, "q3": 11215.0, "volume": 11620, "confidence": 0.719}}, "76": {"tous": {"median": 1900.0, "mean": 2007.4, "q1": 1269.8, "q3": 2555.3, "volume": 90674, "confidence": 0.729}, "appartement": {"median": 2153.8, "mean": 2257.1, "q1": 1534.0, "q3": 2864.8, "volume": 32914, "confidence": 0.753}, "maison": {"median": 1781.2, "mean": 1870.3, "q1": 1160.7, "q3": 2376.5, "volume": 54432, "confidence": 0.727}, "local": {"median": 1285.2, "mean": 1779.6, "q1": 732.9, "q3": 2232.0, "volume": 3328, "confidence": 0.6}}, "77": {"tous": {"median": 2867.2, "mean": 2913.1, "q1": 1954.5, "q3": 3692.3, "volume": 91321, "confidence": 0.758}, "appartement": {"median": 3228.3, "mean": 3259.3, "q1": 2473.4, "q3": 3979.6, "volume": 33952, "confidence": 0.813}, "maison": {"median": 2641.8, "mean": 2725.9, "q1": 1731.6, "q3": 3485.7, "volume": 53910, "confidence": 0.734}, "local": {"median": 1800.0, "mean": 2431.2, "q1": 1083.3, "q3": 2987.0, "volume": 3459, "confidence": 0.6}}, "78": {"tous": {"median": 3790.0, "mean": 4155.8, "q1": 2783.7, "q3": 5081.6, "volume": 88078, "confidence": 0.757}, "appartement": {"median": 3966.7, "mean": 4284.9, "q1": 3000.0, "q3": 5204.1, "volume": 45616, "confidence": 0.778}, "maison": {"median": 3636.4, "mean": 4091.1, "q1": 2607.4, "q3": 4983.3, "volume": 39315, "confidence": 0.739}, "local": {"median": 2397.3, "mean": 3094.8, "q1": 1363.6, "q3": 4038.5, "volume": 3147, "confidence": 0.6}}, "79": {"tous": {"median": 1167.0, "mean": 1279.0, "q1": 750.0, "q3": 1700.0, "volume": 31041, "confidence": 0.674}, "appartement": {"median": 1604.7, "mean": 1644.8, "q1": 1149.1, "q3": 2055.6, "volume": 2840, "confidence": 0.774}, "maison": {"median": 1134.2, "mean": 1246.2, "q1": 729.9, "q3": 1658.3, "volume": 27200, "confidence": 0.673}, "local": {"median": 833.3, "mean": 1132.6, "q1": 458.3, "q3": 1428.6, "volume": 1001, "confidence": 0.6}}, "80": {"tous": {"median": 1600.0, "mean": 1784.1, "q1": 1029.0, "q3": 2307.7, "volume": 38817, "confidence": 0.68}, "appartement": {"median": 2326.5, "mean": 2454.1, "q1": 1698.7, "q3": 2996.2, "volume": 7619, "confidence": 0.777}, "maison": {"median": 1461.3, "mean": 1621.5, "q1": 952.8, "q3": 2083.3, "volume": 30105, "confidence": 0.691}, "local": {"median": 1191.4, "mean": 1592.6, "q1": 660.0, "q3": 2000.0, "volume": 1093, "confidence": 0.6}}, "81": {"tous": {"median": 1362.5, "mean": 1491.6, "q1": 906.0, "q3": 1961.5, "volume": 26542, "confidence": 0.69}, "appartement": {"median": 1710.9, "mean": 1748.7, "q1": 1203.7, "q3": 2187.5, "volume": 4300, "confidence": 0.77}, "maison": {"median": 1306.9, "mean": 1454.8, "q1": 888.8, "q3": 1910.7, "volume": 21376, "confidence": 0.687}, "local": {"median": 750.0, "mean": 1124.6, "q1": 436.0, "q3": 1352.9, "volume": 866, "confidence": 0.6}}, "82": {"tous": {"median": 1460.7, "mean": 1541.5, "q1": 975.6, "q3": 2002.5, "volume": 18714, "confidence": 0.719}, "appartement": {"median": 1636.4, "mean": 1647.4, "q1": 1303.6, "q3": 2000.0, "volume": 3733, "confidence": 0.83}, "maison": {"median": 1409.5, "mean": 1528.2, "q1": 933.8, "q3": 2019.6, "volume": 14403, "confidence": 0.692}, "local": {"median": 938.6, "mean": 1189.5, "q1": 590.3, "q3": 1377.8, "volume": 578, "confidence": 0.664}}, "83": {"tous": {"median": 3352.3, "mean": 3779.0, "q1": 2185.2, "q3": 4816.7, "volume": 126979, "confidence": 0.686}, "appartement": {"median": 3529.4, "mean": 3905.7, "q1": 2411.5, "q3": 4991.4, "volume": 73026, "confidence": 0.708}, "maison": {"median": 3214.3, "mean": 3710.1, "q1": 1940.3, "q3": 4675.9, "volume": 49080, "confidence": 0.66}, "local": {"median": 2060.6, "mean": 2575.2, "q1": 1304.3, "q3": 3148.1, "volume": 4873, "confidence": 0.642}}, "84": {"tous": {"median": 2340.4, "mean": 2515.8, "q1": 1644.1, "q3": 3088.2, "volume": 41739, "confidence": 0.753}, "appartement": {"median": 2093.2, "mean": 2197.7, "q1": 1481.3, "q3": 2750.0, "volume": 14836, "confidence": 0.758}, "maison": {"median": 2551.3, "mean": 2760.6, "q1": 1857.1, "q3": 3319.0, "volume": 25053, "confidence": 0.771}, "local": {"median": 1360.6, "mean": 1751.8, "q1": 830.2, "q3": 2193.2, "volume": 1850, "confidence": 0.6}}, "85": {"tous": {"median": 2200.0, "mean": 2456.4, "q1": 1372.5, "q3": 3159.1, "volume": 66695, "confidence": 0.675}, "appartement": {"median": 3030.3, "mean": 3325.6, "q1": 2200.0, "q3": 4170.6, "volume": 11325, "confidence": 0.74}, "maison": {"median": 2080.6, "mean": 2306.9, "q1": 1295.3, "q3": 2945.2, "volume": 52440, "confidence": 0.683}, "local": {"median": 1333.1, "mean": 1773.0, "q1": 703.7, "q3": 2252.5, "volume": 2930, "confidence": 0.6}}, "86": {"tous": {"median": 1309.5, "mean": 1447.7, "q1": 806.5, "q3": 1930.6, "volume": 33289, "confidence": 0.657}, "appartement": {"median": 1963.4, "mean": 1992.8, "q1": 1472.0, "q3": 2496.3, "volume": 6618, "confidence": 0.791}, "maison": {"median": 1164.3, "mean": 1296.7, "q1": 750.0, "q3": 1750.0, "volume": 25387, "confidence": 0.656}, "local": {"median": 964.9, "mean": 1623.3, "q1": 509.5, "q3": 1642.9, "volume": 1284, "confidence": 0.6}}, "87": {"tous": {"median": 1250.0, "mean": 1331.7, "q1": 801.4, "q3": 1759.6, "volume": 28742, "confidence": 0.693}, "appartement": {"median": 1516.1, "mean": 1546.7, "q1": 1182.1, "q3": 1869.6, "volume": 8255, "confidence": 0.819}, "maison": {"median": 1088.5, "mean": 1250.9, "q1": 705.7, "q3": 1687.5, "volume": 19529, "confidence": 0.639}, "local": {"median": 933.3, "mean": 1126.0, "q1": 529.2, "q3": 1503.0, "volume": 958, "confidence": 0.6}}, "88": {"tous": {"median": 1071.0, "mean": 1240.1, "q1": 689.7, "q3": 1580.5, "volume": 25803, "confidence": 0.667}, "appartement": {"median": 1094.8, "mean": 1286.6, "q1": 743.4, "q3": 1568.3, "volume": 8141, "confidence": 0.699}, "maison": {"median": 1075.7, "mean": 1231.5, "q1": 687.5, "q3": 1603.3, "volume": 16612, "confidence": 0.659}, "local": {"median": 750.0, "mean": 1015.5, "q1": 444.4, "q3": 1201.9, "volume": 1050, "confidence": 0.6}}, "89": {"tous": {"median": 1197.9, "mean": 1291.7, "q1": 783.1, "q3": 1671.2, "volume": 26732, "confidence": 0.703}, "appartement": {"median": 1354.2, "mean": 1378.0, "q1": 994.7, "q3": 1723.9, "volume": 4285, "confidence": 0.785}, "maison": {"median": 1172.2, "mean": 1271.4, "q1": 763.6, "q3": 1665.4, "volume": 21553, "confidence": 0.692}, "local": {"median": 849.5, "mean": 1365.5, "q1": 506.8, "q3": 1448.3, "volume": 894, "confidence": 0.6}}, "90": {"tous": {"median": 1333.3, "mean": 1399.7, "q1": 962.3, "q3": 1776.2, "volume": 8960, "confidence": 0.756}, "appartement": {"median": 1214.3, "mean": 1263.9, "q1": 913.6, "q3": 1554.1, "volume": 4715, "confidence": 0.789}, "maison": {"median": 1602.7, "mean": 1605.5, "q1": 1114.8, "q3": 2026.7, "volume": 3850, "confidence": 0.772}, "local": {"median": 824.4, "mean": 1014.8, "q1": 500.0, "q3": 1272.7, "volume": 395, "confidence": 0.625}}, "91": {"tous": {"median": 3165.6, "mean": 3282.5, "q1": 2427.2, "q3": 3921.6, "volume": 77368, "confidence": 0.811}, "appartement": {"median": 2983.3, "mean": 3097.1, "q1": 2347.2, "q3": 3712.1, "volume": 36975, "confidence": 0.817}, "maison": {"median": 3378.4, "mean": 3503.6, "q1": 2674.4, "q3": 4147.5, "volume": 37594, "confidence": 0.826}, "local": {"median": 2027.0, "mean": 2760.2, "q1": 1205.7, "q3": 3333.3, "volume": 2799, "confidence": 0.6}}, "92": {"tous": {"median": 6862.9, "mean": 7100.3, "q1": 5420.9, "q3": 8500.0, "volume": 97813, "confidence": 0.821}, "appartement": {"median": 6823.3, "mean": 7038.5, "q1": 5448.7, "q3": 8375.0, "volume": 82181, "confidence": 0.828}, "maison": {"median": 7822.6, "mean": 8225.7, "q1": 6134.5, "q3": 9820.0, "volume": 11649, "confidence": 0.812}, "local": {"median": 4756.1, "mean": 5083.4, "q1": 2744.2, "q3": 6625.0, "volume": 3983, "confidence": 0.674}}, "93": {"tous": {"median": 3913.0, "mean": 4302.8, "q1": 2947.8, "q3": 5294.1, "volume": 70849, "confidence": 0.76}, "appartement": {"median": 3911.3, "mean": 4287.9, "q1": 2892.9, "q3": 5409.1, "volume": 46907, "confidence": 0.743}, "maison": {"median": 4000.0, "mean": 4430.4, "q1": 3181.8, "q3": 5147.1, "volume": 21420, "confidence": 0.803}, "local": {"median": 2717.4, "mean": 3496.7, "q1": 1625.0, "q3": 4359.0, "volume": 2522, "confidence": 0.6}}, "94": {"tous": {"median": 4900.0, "mean": 5305.3, "q1": 3653.8, "q3": 6466.7, "volume": 76197, "confidence": 0.77}, "appartement": {"median": 4880.9, "mean": 5241.3, "q1": 3658.5, "q3": 6363.6, "volume": 55606, "confidence": 0.778}, "maison": {"median": 5125.0, "mean": 5703.1, "q1": 3854.2, "q3": 7000.0, "volume": 17890, "confidence": 0.754}, "local": {"median": 3378.4, "mean": 3987.9, "q1": 2000.0, "q3": 5294.1, "volume": 2701, "confidence": 0.61}}, "95": {"tous": {"median": 3400.0, "mean": 3530.8, "q1": 2687.4, "q3": 4184.6, "volume": 68727, "confidence": 0.824}, "appartement": {"median": 3226.4, "mean": 3324.0, "q1": 2600.0, "q3": 3942.3, "volume": 33240, "confidence": 0.834}, "maison": {"median": 3611.1, "mean": 3782.1, "q1": 2905.6, "q3": 4447.8, "volume": 33220, "confidence": 0.829}, "local": {"median": 2101.6, "mean": 2881.0, "q1": 1174.7, "q3": 3500.0, "volume": 2267, "confidence": 0.6}}, "971": {"tous": {"median": 2826.1, "mean": 3033.2, "q1": 1743.6, "q3": 3912.1, "volume": 10721, "confidence": 0.693}, "appartement": {"median": 2951.7, "mean": 3120.8, "q1": 2000.0, "q3": 3960.4, "volume": 4988, "confidence": 0.734}, "maison": {"median": 2771.3, "mean": 3011.1, "q1": 1602.2, "q3": 3921.1, "volume": 4326, "confidence": 0.665}, "local": {"median": 2470.0, "mean": 2791.0, "q1": 1395.3, "q3": 3691.3, "volume": 1407, "confidence": 0.628}}, "972": {"tous": {"median": 2500.0, "mean": 2682.8, "q1": 1600.0, "q3": 3460.0, "volume": 9912, "confidence": 0.702}, "appartement": {"median": 2666.7, "mean": 2846.0, "q1": 1966.3, "q3": 3476.9, "volume": 4813, "confidence": 0.773}, "maison": {"median": 2348.7, "mean": 2604.3, "q1": 1322.4, "q3": 3584.4, "volume": 4142, "confidence": 0.615}, "local": {"median": 1925.9, "mean": 2201.6, "q1": 1198.0, "q3": 2872.4, "volume": 957, "confidence": 0.652}}, "973": {"tous": {"median": 2469.1, "mean": 2500.8, "q1": 1760.0, "q3": 3078.9, "volume": 4002, "confidence": 0.786}, "appartement": {"median": 2576.8, "mean": 2603.9, "q1": 2112.7, "q3": 3111.1, "volume": 1586, "confidence": 0.845}, "maison": {"median": 2401.3, "mean": 2478.9, "q1": 1619.8, "q3": 3088.2, "volume": 2260, "confidence": 0.755}, "local": {"median": 1382.9, "mean": 1769.8, "q1": 882.4, "q3": 2036.4, "volume": 156, "confidence": 0.666}}, "974": {"tous": {"median": 2492.1, "mean": 2800.9, "q1": 1833.3, "q3": 3402.8, "volume": 28084, "confidence": 0.748}, "appartement": {"median": 2473.7, "mean": 2781.1, "q1": 1966.7, "q3": 3214.3, "volume": 12306, "confidence": 0.798}, "maison": {"median": 2521.7, "mean": 2829.5, "q1": 1666.7, "q3": 3571.4, "volume": 14873, "confidence": 0.698}, "local": {"median": 2162.2, "mean": 2600.0, "q1": 1386.4, "q3": 3164.6, "volume": 905, "confidence": 0.671}}}
data/aggregated/prices_postcode.json ADDED
The diff for this file is too large to render. See raw diff
 
data/aggregated/prices_region.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"01": {"tous": {"median": 2826.1, "mean": 3033.2, "q1": 1743.6, "q3": 3912.1, "volume": 10721, "confidence": 0.693}, "appartement": {"median": 2951.7, "mean": 3120.8, "q1": 2000.0, "q3": 3960.4, "volume": 4988, "confidence": 0.734}, "maison": {"median": 2771.3, "mean": 3011.1, "q1": 1602.2, "q3": 3921.1, "volume": 4326, "confidence": 0.665}, "local": {"median": 2470.0, "mean": 2791.0, "q1": 1395.3, "q3": 3691.3, "volume": 1407, "confidence": 0.628}}, "02": {"tous": {"median": 2500.0, "mean": 2682.8, "q1": 1600.0, "q3": 3460.0, "volume": 9912, "confidence": 0.702}, "appartement": {"median": 2666.7, "mean": 2846.0, "q1": 1966.3, "q3": 3476.9, "volume": 4813, "confidence": 0.773}, "maison": {"median": 2348.7, "mean": 2604.3, "q1": 1322.4, "q3": 3584.4, "volume": 4142, "confidence": 0.615}, "local": {"median": 1925.9, "mean": 2201.6, "q1": 1198.0, "q3": 2872.4, "volume": 957, "confidence": 0.652}}, "03": {"tous": {"median": 2469.1, "mean": 2500.8, "q1": 1760.0, "q3": 3078.9, "volume": 4002, "confidence": 0.786}, "appartement": {"median": 2576.8, "mean": 2603.9, "q1": 2112.7, "q3": 3111.1, "volume": 1586, "confidence": 0.845}, "maison": {"median": 2401.3, "mean": 2478.9, "q1": 1619.8, "q3": 3088.2, "volume": 2260, "confidence": 0.755}, "local": {"median": 1382.9, "mean": 1769.8, "q1": 882.4, "q3": 2036.4, "volume": 156, "confidence": 0.666}}, "04": {"tous": {"median": 2492.1, "mean": 2800.9, "q1": 1833.3, "q3": 3402.8, "volume": 28084, "confidence": 0.748}, "appartement": {"median": 2473.7, "mean": 2781.1, "q1": 1966.7, "q3": 3214.3, "volume": 12306, "confidence": 0.798}, "maison": {"median": 2521.7, "mean": 2829.5, "q1": 1666.7, "q3": 3571.4, "volume": 14873, "confidence": 0.698}, "local": {"median": 2162.2, "mean": 2600.0, "q1": 1386.4, "q3": 3164.6, "volume": 905, "confidence": 0.671}}, "11": {"tous": {"median": 4545.5, "mean": 5730.9, "q1": 3077.3, "q3": 7896.2, "volume": 729713, "confidence": 0.6}, "appartement": {"median": 5606.1, "mean": 6527.2, "q1": 3481.5, "q3": 9145.5, "volume": 481565, "confidence": 0.6}, "maison": {"median": 3529.4, "mean": 4019.9, "q1": 2593.4, "q3": 4755.6, "volume": 215650, "confidence": 0.755}, "local": {"median": 3975.0, "mean": 5285.5, "q1": 1916.7, "q3": 7640.4, "volume": 32498, "confidence": 0.6}}, "24": {"tous": {"median": 1489.4, "mean": 1667.3, "q1": 916.7, "q3": 2209.6, "volume": 192214, "confidence": 0.653}, "appartement": {"median": 2045.5, "mean": 2126.8, "q1": 1428.6, "q3": 2750.0, "volume": 43364, "confidence": 0.742}, "maison": {"median": 1344.8, "mean": 1533.8, "q1": 845.1, "q3": 2015.4, "volume": 142096, "confidence": 0.652}, "local": {"median": 1000.0, "mean": 1524.8, "q1": 545.5, "q3": 1829.8, "volume": 6754, "confidence": 0.6}}, "27": {"tous": {"median": 1392.9, "mean": 1578.7, "q1": 869.0, "q3": 2099.0, "volume": 208712, "confidence": 0.647}, "appartement": {"median": 1659.7, "mean": 1797.3, "q1": 1062.5, "q3": 2411.8, "volume": 71719, "confidence": 0.675}, "maison": {"median": 1290.3, "mean": 1471.6, "q1": 807.7, "q3": 1935.5, "volume": 128640, "confidence": 0.65}, "local": {"median": 933.3, "mean": 1351.1, "q1": 515.4, "q3": 1637.9, "volume": 8353, "confidence": 0.6}}, "28": {"tous": {"median": 1768.4, "mean": 1984.7, "q1": 1094.1, "q3": 2543.3, "volume": 258639, "confidence": 0.672}, "appartement": {"median": 2305.6, "mean": 2561.8, "q1": 1612.9, "q3": 3192.4, "volume": 72471, "confidence": 0.726}, "maison": {"median": 1574.5, "mean": 1761.8, "q1": 982.1, "q3": 2288.6, "volume": 177132, "confidence": 0.668}, "local": {"median": 1215.5, "mean": 1726.8, "q1": 654.2, "q3": 2217.0, "volume": 9036, "confidence": 0.6}}, "32": {"tous": {"median": 1735.8, "mean": 1984.7, "q1": 1145.8, "q3": 2500.0, "volume": 382078, "confidence": 0.688}, "appartement": {"median": 2333.3, "mean": 2634.0, "q1": 1557.4, "q3": 3362.1, "volume": 83415, "confidence": 0.691}, "maison": {"median": 1625.0, "mean": 1802.6, "q1": 1082.6, "q3": 2285.7, "volume": 285981, "confidence": 0.704}, "local": {"median": 1190.5, "mean": 1820.0, "q1": 642.2, "q3": 2210.5, "volume": 12682, "confidence": 0.6}}, "44": {"tous": {"median": 1417.3, "mean": 1564.0, "q1": 884.6, "q3": 2071.4, "volume": 176812, "confidence": 0.665}, "appartement": {"median": 1631.1, "mean": 1763.5, "q1": 1101.7, "q3": 2286.3, "volume": 59150, "confidence": 0.709}, "maison": {"median": 1324.3, "mean": 1468.1, "q1": 809.3, "q3": 1961.9, "volume": 110836, "confidence": 0.652}, "local": {"median": 966.9, "mean": 1392.8, "q1": 550.8, "q3": 1649.5, "volume": 6826, "confidence": 0.6}}, "52": {"tous": {"median": 2160.0, "mean": 2435.8, "q1": 1319.6, "q3": 3219.2, "volume": 301827, "confidence": 0.648}, "appartement": {"median": 3005.7, "mean": 3187.4, "q1": 2125.0, "q3": 3958.3, "volume": 79110, "confidence": 0.756}, "maison": {"median": 1917.1, "mean": 2187.7, "q1": 1173.5, "q3": 2841.5, "volume": 210441, "confidence": 0.652}, "local": {"median": 1347.0, "mean": 1845.0, "q1": 699.3, "q3": 2381.0, "volume": 12276, "confidence": 0.6}}, "53": {"tous": {"median": 2033.0, "mean": 2283.4, "q1": 1261.7, "q3": 2948.7, "volume": 280003, "confidence": 0.668}, "appartement": {"median": 2614.4, "mean": 2856.4, "q1": 1868.3, "q3": 3613.6, "volume": 80626, "confidence": 0.733}, "maison": {"median": 1829.3, "mean": 2076.7, "q1": 1111.1, "q3": 2666.7, "volume": 187994, "confidence": 0.66}, "local": {"median": 1255.8, "mean": 1637.6, "q1": 712.5, "q3": 2142.9, "volume": 11383, "confidence": 0.6}}, "75": {"tous": {"median": 1853.8, "mean": 2415.6, "q1": 1072.1, "q3": 3248.8, "volume": 505650, "confidence": 0.6}, "appartement": {"median": 2859.1, "mean": 3258.9, "q1": 1756.7, "q3": 4290.3, "volume": 139810, "confidence": 0.646}, "maison": {"median": 1586.2, "mean": 2099.2, "q1": 936.6, "q3": 2652.9, "volume": 347220, "confidence": 0.6}, "local": {"median": 1343.4, "mean": 1984.4, "q1": 676.7, "q3": 2547.2, "volume": 18620, "confidence": 0.6}}, "76": {"tous": {"median": 2125.0, "mean": 2356.1, "q1": 1300.0, "q3": 3095.2, "volume": 523730, "confidence": 0.662}, "appartement": {"median": 2520.9, "mean": 2721.5, "q1": 1629.6, "q3": 3531.2, "volume": 213001, "confidence": 0.698}, "maison": {"median": 1900.0, "mean": 2129.7, "q1": 1166.7, "q3": 2804.6, "volume": 290513, "confidence": 0.655}, "local": {"median": 1353.2, "mean": 1760.7, "q1": 746.1, "q3": 2254.5, "volume": 20216, "confidence": 0.6}}, "84": {"tous": {"median": 2391.3, "mean": 2785.3, "q1": 1434.8, "q3": 3673.5, "volume": 606247, "confidence": 0.626}, "appartement": {"median": 2804.3, "mean": 3162.9, "q1": 1756.1, "q3": 4148.9, "volume": 310845, "confidence": 0.659}, "maison": {"median": 2086.6, "mean": 2429.4, "q1": 1238.7, "q3": 3126.3, "volume": 264552, "confidence": 0.638}, "local": {"median": 1414.4, "mean": 2033.1, "q1": 789.1, "q3": 2527.8, "volume": 30850, "confidence": 0.6}}, "93": {"tous": {"median": 3381.3, "mean": 3761.0, "q1": 2263.1, "q3": 4736.8, "volume": 493722, "confidence": 0.707}, "appartement": {"median": 3546.9, "mean": 3872.9, "q1": 2419.4, "q3": 4862.4, "volume": 314937, "confidence": 0.724}, "maison": {"median": 3220.2, "mean": 3693.3, "q1": 2140.2, "q3": 4588.2, "volume": 155197, "confidence": 0.696}, "local": {"median": 2155.5, "mean": 2711.4, "q1": 1305.6, "q3": 3366.7, "volume": 23588, "confidence": 0.618}}, "94": {"tous": {"median": 3166.7, "mean": 3486.4, "q1": 2169.2, "q3": 4375.0, "volume": 23090, "confidence": 0.721}, "appartement": {"median": 3200.1, "mean": 3410.3, "q1": 2333.3, "q3": 4317.2, "volume": 14176, "confidence": 0.752}, "maison": {"median": 3274.6, "mean": 3818.7, "q1": 2037.0, "q3": 4727.3, "volume": 7499, "confidence": 0.671}, "local": {"median": 2146.3, "mean": 2487.5, "q1": 1280.0, "q3": 3250.0, "volume": 1415, "confidence": 0.633}}}
data/aggregated/top_cities.json ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Bordeaux": {
3
+ "code": "33063",
4
+ "appartement": {
5
+ "median": 4439.2,
6
+ "mean": 4566.7,
7
+ "q1": 3653.8,
8
+ "q3": 5333.3,
9
+ "volume": 18352
10
+ },
11
+ "local": {
12
+ "median": 3487.2,
13
+ "mean": 4017.4,
14
+ "q1": 2343.5,
15
+ "q3": 4945.1,
16
+ "volume": 1218
17
+ },
18
+ "maison": {
19
+ "median": 5147.0,
20
+ "mean": 5317.0,
21
+ "q1": 4210.5,
22
+ "q3": 6153.8,
23
+ "volume": 6018
24
+ },
25
+ "tous": {
26
+ "median": 4566.4,
27
+ "mean": 4717.0,
28
+ "q1": 3697.8,
29
+ "q3": 5543.5,
30
+ "volume": 25588
31
+ }
32
+ },
33
+ "Lille": {
34
+ "code": "59350",
35
+ "appartement": {
36
+ "median": 3673.5,
37
+ "mean": 3774.2,
38
+ "q1": 2897.4,
39
+ "q3": 4517.9,
40
+ "volume": 14741
41
+ },
42
+ "local": {
43
+ "median": 2542.9,
44
+ "mean": 3167.6,
45
+ "q1": 1563.9,
46
+ "q3": 3812.5,
47
+ "volume": 1064
48
+ },
49
+ "maison": {
50
+ "median": 2785.8,
51
+ "mean": 2930.7,
52
+ "q1": 2172.4,
53
+ "q3": 3500.0,
54
+ "volume": 4911
55
+ },
56
+ "tous": {
57
+ "median": 3405.1,
58
+ "mean": 3543.1,
59
+ "q1": 2600.0,
60
+ "q3": 4312.5,
61
+ "volume": 20716
62
+ }
63
+ },
64
+ "Lyon": {
65
+ "code": "69123",
66
+ "appartement": {
67
+ "median": 4826.2,
68
+ "mean": 4830.9,
69
+ "q1": 3875.0,
70
+ "q3": 5711.9,
71
+ "volume": 35236
72
+ },
73
+ "local": {
74
+ "median": 3311.4,
75
+ "mean": 3753.1,
76
+ "q1": 2173.9,
77
+ "q3": 4703.4,
78
+ "volume": 2734
79
+ },
80
+ "maison": {
81
+ "median": 5990.0,
82
+ "mean": 6463.1,
83
+ "q1": 4662.2,
84
+ "q3": 7609.6,
85
+ "volume": 949
86
+ },
87
+ "tous": {
88
+ "median": 4777.8,
89
+ "mean": 4795.0,
90
+ "q1": 3756.5,
91
+ "q3": 5710.7,
92
+ "volume": 38919
93
+ }
94
+ },
95
+ "Marseille": {
96
+ "code": "13055",
97
+ "appartement": {
98
+ "median": 2935.5,
99
+ "mean": 3110.5,
100
+ "q1": 2089.6,
101
+ "q3": 3909.1,
102
+ "volume": 59629
103
+ },
104
+ "local": {
105
+ "median": 2062.5,
106
+ "mean": 2473.1,
107
+ "q1": 1279.1,
108
+ "q3": 3157.9,
109
+ "volume": 3731
110
+ },
111
+ "maison": {
112
+ "median": 4505.3,
113
+ "mean": 4990.0,
114
+ "q1": 3283.3,
115
+ "q3": 6014.7,
116
+ "volume": 7871
117
+ },
118
+ "tous": {
119
+ "median": 3023.8,
120
+ "mean": 3284.8,
121
+ "q1": 2111.1,
122
+ "q3": 4083.3,
123
+ "volume": 71231
124
+ }
125
+ },
126
+ "Montpellier": {
127
+ "code": "34172",
128
+ "appartement": {
129
+ "median": 3261.3,
130
+ "mean": 3280.2,
131
+ "q1": 2522.7,
132
+ "q3": 4000.0,
133
+ "volume": 22452
134
+ },
135
+ "local": {
136
+ "median": 2564.1,
137
+ "mean": 2766.4,
138
+ "q1": 1639.3,
139
+ "q3": 3466.8,
140
+ "volume": 1585
141
+ },
142
+ "maison": {
143
+ "median": 3819.5,
144
+ "mean": 4073.3,
145
+ "q1": 3070.7,
146
+ "q3": 4806.0,
147
+ "volume": 2510
148
+ },
149
+ "tous": {
150
+ "median": 3279.5,
151
+ "mean": 3324.5,
152
+ "q1": 2508.0,
153
+ "q3": 4047.6,
154
+ "volume": 26547
155
+ }
156
+ },
157
+ "Nantes": {
158
+ "code": "44109",
159
+ "appartement": {
160
+ "median": 3690.5,
161
+ "mean": 3724.2,
162
+ "q1": 3000.0,
163
+ "q3": 4393.4,
164
+ "volume": 21661
165
+ },
166
+ "local": {
167
+ "median": 2911.7,
168
+ "mean": 3252.4,
169
+ "q1": 1845.2,
170
+ "q3": 4227.3,
171
+ "volume": 1334
172
+ },
173
+ "maison": {
174
+ "median": 4433.6,
175
+ "mean": 4675.4,
176
+ "q1": 3636.4,
177
+ "q3": 5500.0,
178
+ "volume": 5544
179
+ },
180
+ "tous": {
181
+ "median": 3793.1,
182
+ "mean": 3886.9,
183
+ "q1": 3048.8,
184
+ "q3": 4575.8,
185
+ "volume": 28539
186
+ }
187
+ },
188
+ "Nice": {
189
+ "code": "06088",
190
+ "appartement": {
191
+ "median": 4444.4,
192
+ "mean": 4717.2,
193
+ "q1": 3478.3,
194
+ "q3": 5600.0,
195
+ "volume": 39206
196
+ },
197
+ "local": {
198
+ "median": 3214.3,
199
+ "mean": 3623.2,
200
+ "q1": 2054.8,
201
+ "q3": 4722.2,
202
+ "volume": 2412
203
+ },
204
+ "maison": {
205
+ "median": 5312.5,
206
+ "mean": 5688.8,
207
+ "q1": 3689.0,
208
+ "q3": 6964.6,
209
+ "volume": 1565
210
+ },
211
+ "tous": {
212
+ "median": 4416.7,
213
+ "mean": 4691.3,
214
+ "q1": 3409.1,
215
+ "q3": 5616.4,
216
+ "volume": 43183
217
+ }
218
+ },
219
+ "Paris": {
220
+ "code": "75056",
221
+ "appartement": {
222
+ "median": 10362.8,
223
+ "mean": 10477.6,
224
+ "q1": 8709.5,
225
+ "q3": 12106.8,
226
+ "volume": 147088
227
+ },
228
+ "local": {
229
+ "median": 8255.6,
230
+ "mean": 8564.9,
231
+ "q1": 5411.0,
232
+ "q3": 11215.0,
233
+ "volume": 11620
234
+ },
235
+ "maison": {
236
+ "median": 13457.8,
237
+ "mean": 13777.4,
238
+ "q1": 10400.0,
239
+ "q3": 17260.3,
240
+ "volume": 652
241
+ },
242
+ "tous": {
243
+ "median": 10284.5,
244
+ "mean": 10351.6,
245
+ "q1": 8541.6,
246
+ "q3": 12083.3,
247
+ "volume": 159360
248
+ }
249
+ },
250
+ "Toulouse": {
251
+ "code": "31555",
252
+ "appartement": {
253
+ "median": 3230.8,
254
+ "mean": 3420.9,
255
+ "q1": 2552.2,
256
+ "q3": 4124.3,
257
+ "volume": 35114
258
+ },
259
+ "local": {
260
+ "median": 2500.0,
261
+ "mean": 2990.2,
262
+ "q1": 1580.5,
263
+ "q3": 3766.8,
264
+ "volume": 1845
265
+ },
266
+ "maison": {
267
+ "median": 3750.0,
268
+ "mean": 4100.6,
269
+ "q1": 2881.3,
270
+ "q3": 4969.7,
271
+ "volume": 6119
272
+ },
273
+ "tous": {
274
+ "median": 3272.7,
275
+ "mean": 3499.0,
276
+ "q1": 2557.3,
277
+ "q3": 4224.7,
278
+ "volume": 43078
279
+ }
280
+ }
281
+ }
main.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ def main():
2
+ print("Hello from realadvisor!")
3
+
4
+
5
+ if __name__ == "__main__":
6
+ main()
ml_challenge.txt ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MLE Challenge
2
+ Context
3
+ You are analysing residential property prices in France, you have at your disposal the public recent transactions on (https://www.data.gouv.fr/datasets/demandes-de-valeurs-foncieres/) to estimate the current market price as price per squared meter (€/m²)
4
+ Objective
5
+ You need to generate an interactive map visualization for the aggregated price data by :
6
+ Country
7
+ Region
8
+ Departament
9
+ Neighborhood
10
+ Postcode
11
+ Building plots
12
+
13
+ You need to procure yourself with open data from the french government for such geometries and take care of cleaning the data to make accurate aggregates.
14
+ You are asked to make the best estimation of the market price taking into account, transaction price volatility, transaction volume, data freshness and consistency. You can estimate a number for the price or an interval.
15
+ You can use this one as a reference https://explore.data.gouv.fr/fr/immobilier
16
+
17
+ You need to render an interactive map that shows the price aggregate with colors, as the level zooms, it needs to transition between aggregation levels. (Link to hosted app, optional but preferred)
18
+ If the volume of data is too important for the browser to support all you can subset it, but a solution for this will be appreciated.
19
+ Produce a list of market price per square meter by property type for the top 10 biggest cities.
20
+ Submit your processing code. (Link to github repo)
21
+ What you will be evaluated on
22
+ Is the colored map loading ?
23
+ Is the map usable and not laggy ?
24
+ Is the map refreshing the aggregation level on zoom ?
25
+ Are all 6 aggregation levels present ?
26
+ Country
27
+ Region
28
+ Departament
29
+ Neighborhood
30
+ Postcode
31
+ Building plots
32
+ Are the price estimates plausible ?
33
+ Is the data complete or was it subset ?
34
+ The processing code is clean, clear and reusable
35
+ The architecture is robust and logical
36
+ App is hosted and functional
notebooks/01_data_exploration.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
pyproject.toml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "realadvisor-mle-challenge"
3
+ version = "0.1.0"
4
+ description = "French property price analysis and interactive map visualization"
5
+ readme = "README.md"
6
+ requires-python = ">=3.11"
7
+ dependencies = [
8
+ "polars>=1.0.0",
9
+ "requests>=2.31.0",
10
+ "tqdm>=4.66.0",
11
+ "jupyter>=1.0.0",
12
+ "matplotlib>=3.8.0",
13
+ "seaborn>=0.13.0",
14
+ "geopandas>=0.14.0",
15
+ "folium>=0.15.0",
16
+ "lab>=8.8",
17
+ ]
src/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """RealAdvisor MLE Challenge - French property price analysis pipeline."""
src/aggregator.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Aggregate property prices at different geographic levels.
3
+
4
+ Responsibility: Given cleaned transaction data, compute summary statistics
5
+ (median, volume, IQR, confidence) grouped by any geographic column.
6
+ """
7
+
8
+ import json
9
+ import logging
10
+ from pathlib import Path
11
+
12
+ import polars as pl
13
+
14
+ from src.config import (
15
+ AGGREGATED_DIR,
16
+ AGGREGATION_LEVELS,
17
+ DEPT_TO_REGION,
18
+ NO_DVF_DEPARTMENTS,
19
+ REGION_NAMES,
20
+ TYPE_LOCAL_SHORT,
21
+ )
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+ # Map each aggregation level to the column used for grouping
26
+ LEVEL_TO_COLUMN: dict[str, str] = {
27
+ "country": "_country", # synthetic constant column
28
+ "region": "code_region",
29
+ "department": "code_departement",
30
+ "commune": "code_commune",
31
+ "postcode": "code_postal",
32
+ "section": "code_section",
33
+ }
34
+
35
+
36
+ def aggregate_level(
37
+ lf: pl.LazyFrame,
38
+ group_col: str,
39
+ *,
40
+ property_type: str | None = None,
41
+ ) -> pl.DataFrame:
42
+ """
43
+ Compute price statistics for one geographic level and property type.
44
+
45
+ Args:
46
+ lf: Cleaned transaction LazyFrame.
47
+ group_col: Column to group by (e.g. "code_departement").
48
+ property_type: Filter to this type_local value, or None for all.
49
+
50
+ Returns:
51
+ DataFrame with columns: code, median_price_m2, mean_price_m2,
52
+ q1, q3, volume, std_dev.
53
+ """
54
+ filtered = lf
55
+ if property_type:
56
+ filtered = filtered.filter(pl.col("type_local") == property_type)
57
+
58
+ # For country level, add a constant column
59
+ if group_col == "_country":
60
+ filtered = filtered.with_columns(pl.lit("FR").alias("_country"))
61
+
62
+ return (
63
+ filtered.group_by(group_col)
64
+ .agg(
65
+ pl.col("prix_m2").median().alias("median_price_m2"),
66
+ pl.col("prix_m2").mean().alias("mean_price_m2"),
67
+ pl.col("prix_m2").quantile(0.25).alias("q1"),
68
+ pl.col("prix_m2").quantile(0.75).alias("q3"),
69
+ pl.col("prix_m2").std().alias("std_dev"),
70
+ pl.col("prix_m2").count().alias("volume"),
71
+ pl.col("year").cast(pl.Int32, strict=False).max().alias("latest_year"),
72
+ )
73
+ .rename({group_col: "code"})
74
+ .with_columns(
75
+ # IQR = Q3 - Q1
76
+ (pl.col("q3") - pl.col("q1")).alias("iqr"),
77
+ )
78
+ .sort("code")
79
+ .collect()
80
+ )
81
+
82
+
83
+ def compute_confidence(df: pl.DataFrame) -> pl.DataFrame:
84
+ """
85
+ Add a confidence score (0-1) based on volume and volatility.
86
+
87
+ Higher volume + lower relative IQR = higher confidence.
88
+ """
89
+ return df.with_columns(
90
+ (
91
+ # Volume component: log-scaled, saturates around 100 transactions
92
+ pl.col("volume").cast(pl.Float64).log1p() / pl.lit(100.0).log1p()
93
+ ).clip(0.0, 1.0).alias("conf_volume"),
94
+ (
95
+ # Stability component: 1 - (IQR / median), clamped
96
+ pl.when(pl.col("median_price_m2") > 0)
97
+ .then(
98
+ (1.0 - pl.col("iqr") / pl.col("median_price_m2")).clip(0.0, 1.0)
99
+ )
100
+ .otherwise(0.0)
101
+ ).alias("conf_stability"),
102
+ ).with_columns(
103
+ # Overall confidence = weighted average
104
+ (pl.col("conf_volume") * 0.6 + pl.col("conf_stability") * 0.4)
105
+ .round(3)
106
+ .alias("confidence")
107
+ )
108
+
109
+
110
+ def aggregate_all_types(
111
+ lf: pl.LazyFrame,
112
+ group_col: str,
113
+ ) -> pl.DataFrame:
114
+ """
115
+ Aggregate for all property types + combined "tous" for a given level.
116
+
117
+ Returns:
118
+ DataFrame with columns: code, type, median_price_m2, volume, confidence, ...
119
+ """
120
+ results = []
121
+
122
+ # All types combined
123
+ df_all = aggregate_level(lf, group_col)
124
+ df_all = compute_confidence(df_all)
125
+ df_all = df_all.with_columns(pl.lit("tous").alias("type"))
126
+ results.append(df_all)
127
+
128
+ # Per property type
129
+ for full_name, short_name in TYPE_LOCAL_SHORT.items():
130
+ df_type = aggregate_level(lf, group_col, property_type=full_name)
131
+ df_type = compute_confidence(df_type)
132
+ df_type = df_type.with_columns(pl.lit(short_name).alias("type"))
133
+ results.append(df_type)
134
+
135
+ return pl.concat(results, how="vertical_relaxed")
136
+
137
+
138
+ def aggregate_all_levels(lf: pl.LazyFrame) -> dict[str, pl.DataFrame]:
139
+ """
140
+ Run aggregation for all 6 geographic levels.
141
+
142
+ Args:
143
+ lf: Cleaned LazyFrame.
144
+
145
+ Returns:
146
+ Dict mapping level name → aggregated DataFrame.
147
+ """
148
+ results = {}
149
+ for level in AGGREGATION_LEVELS:
150
+ col = LEVEL_TO_COLUMN[level]
151
+ logger.info("Aggregating level: %s (group by %s)", level, col)
152
+ results[level] = aggregate_all_types(lf, col)
153
+ logger.info(
154
+ " → %d rows (%d unique codes)",
155
+ len(results[level]),
156
+ results[level]["code"].n_unique(),
157
+ )
158
+ return results
159
+
160
+
161
+ def export_json(aggregated: dict[str, pl.DataFrame], output_dir: Path | None = None) -> None:
162
+ """
163
+ Export aggregated DataFrames to JSON files for the frontend.
164
+
165
+ Each level produces a JSON file structured as:
166
+ { "code1": { "tous": {...}, "appartement": {...}, ... }, ... }
167
+
168
+ Args:
169
+ aggregated: Dict from aggregate_all_levels().
170
+ output_dir: Directory to write JSONs. Defaults to config.AGGREGATED_DIR.
171
+ """
172
+ output_dir = output_dir or AGGREGATED_DIR
173
+ output_dir.mkdir(parents=True, exist_ok=True)
174
+
175
+ for level, df in aggregated.items():
176
+ data: dict = {}
177
+ for row in df.iter_rows(named=True):
178
+ code = str(row["code"])
179
+ ptype = row["type"]
180
+ if code not in data:
181
+ data[code] = {}
182
+ data[code][ptype] = {
183
+ "median": round(row["median_price_m2"] or 0, 1),
184
+ "mean": round(row["mean_price_m2"] or 0, 1),
185
+ "q1": round(row["q1"] or 0, 1),
186
+ "q3": round(row["q3"] or 0, 1),
187
+ "volume": row["volume"],
188
+ "confidence": row["confidence"],
189
+ }
190
+
191
+ path = output_dir / f"prices_{level}.json"
192
+ with open(path, "w") as f:
193
+ json.dump(data, f, ensure_ascii=False)
194
+ logger.info("Exported: %s (%d entries)", path.name, len(data))
src/cleaner.py ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Clean and filter raw DVF transaction data.
3
+
4
+ Responsibility: Load raw CSVs, apply quality filters, deduplicate
5
+ multi-row mutations, and produce a clean dataset ready for price calculation.
6
+ """
7
+
8
+ import logging
9
+ from pathlib import Path
10
+
11
+ import polars as pl
12
+
13
+ from src.config import (
14
+ ARRONDISSEMENT_MAPPING,
15
+ DVF_COLUMNS,
16
+ PRICE_M2_MAX,
17
+ PRICE_M2_MIN,
18
+ REFERENCE_DATE,
19
+ SURFACE_MAX,
20
+ SURFACE_MIN,
21
+ TEMPORAL_LAMBDA,
22
+ VALID_NATURE_MUTATION,
23
+ VALID_TYPE_LOCAL,
24
+ )
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ def load_raw_csv(path: Path) -> pl.LazyFrame:
30
+ """
31
+ Load a single raw DVF CSV into a Polars LazyFrame.
32
+
33
+ Args:
34
+ path: Path to the CSV file (decompressed).
35
+
36
+ Returns:
37
+ LazyFrame with only the columns we need.
38
+ """
39
+ # Force string types for geographic codes (Corsica has 2A/2B prefixes)
40
+ schema_overrides = {
41
+ "id_mutation": pl.Utf8,
42
+ "code_postal": pl.Utf8,
43
+ "code_commune": pl.Utf8,
44
+ "code_departement": pl.Utf8,
45
+ "id_parcelle": pl.Utf8,
46
+ "code_type_local": pl.Utf8,
47
+ "date_mutation": pl.Utf8,
48
+ "nom_commune": pl.Utf8,
49
+ "nature_mutation": pl.Utf8,
50
+ "type_local": pl.Utf8,
51
+ }
52
+ return pl.scan_csv(
53
+ path,
54
+ separator=",",
55
+ infer_schema_length=10_000,
56
+ null_values=["", "NA", "null"],
57
+ schema_overrides=schema_overrides,
58
+ ).select(DVF_COLUMNS)
59
+
60
+
61
+ def load_multiple_csvs(paths: list[Path]) -> pl.LazyFrame:
62
+ """
63
+ Load and concatenate multiple DVF CSV files.
64
+
65
+ Args:
66
+ paths: List of paths to CSV files.
67
+
68
+ Returns:
69
+ Single LazyFrame with all years combined.
70
+ """
71
+ frames = [load_raw_csv(p) for p in paths]
72
+ return pl.concat(frames, how="vertical_relaxed")
73
+
74
+
75
+ def filter_sales(lf: pl.LazyFrame) -> pl.LazyFrame:
76
+ """
77
+ Keep only standard property sales.
78
+
79
+ Filters:
80
+ - nature_mutation == "Vente"
81
+ - type_local in [Appartement, Maison, Local industriel. et commercial.]
82
+ - valeur_fonciere > 0
83
+ - surface_reelle_bati > 0
84
+ """
85
+ return lf.filter(
86
+ (pl.col("nature_mutation") == VALID_NATURE_MUTATION)
87
+ & (pl.col("type_local").is_in(VALID_TYPE_LOCAL))
88
+ & (pl.col("valeur_fonciere").is_not_null())
89
+ & (pl.col("valeur_fonciere") > 0)
90
+ & (pl.col("surface_reelle_bati").is_not_null())
91
+ & (pl.col("surface_reelle_bati") > 0)
92
+ )
93
+
94
+
95
+ def deduplicate_mutations(lf: pl.LazyFrame) -> pl.LazyFrame:
96
+ """
97
+ Handle multi-row mutations in DVF data.
98
+
99
+ A single sale (id_mutation) can span multiple rows when multiple lots
100
+ are included. We:
101
+ 1. Keep only mutations where all rows share the same type_local
102
+ (mixed-type sales have ambiguous price attribution).
103
+ 2. Group by id_mutation: sum surface, keep price (same across rows).
104
+
105
+ Returns:
106
+ One row per mutation with aggregated surface.
107
+ """
108
+ # Tag mutations that have a single property type
109
+ type_counts = lf.group_by("id_mutation").agg(
110
+ pl.col("type_local").n_unique().alias("n_types")
111
+ )
112
+ single_type = type_counts.filter(pl.col("n_types") == 1).select("id_mutation")
113
+
114
+ # Keep only single-type mutations, then aggregate
115
+ filtered = lf.join(single_type, on="id_mutation", how="inner")
116
+
117
+ return filtered.group_by("id_mutation").agg(
118
+ pl.col("date_mutation").first(),
119
+ pl.col("nature_mutation").first(),
120
+ pl.col("valeur_fonciere").first(),
121
+ pl.col("code_postal").first(),
122
+ pl.col("code_commune").first(),
123
+ pl.col("nom_commune").first(),
124
+ pl.col("code_departement").first(),
125
+ pl.col("id_parcelle").first(),
126
+ pl.col("type_local").first(),
127
+ pl.col("surface_reelle_bati").sum(),
128
+ pl.col("nombre_pieces_principales").sum(),
129
+ pl.col("nombre_lots").first(),
130
+ pl.col("longitude").first(),
131
+ pl.col("latitude").first(),
132
+ )
133
+
134
+
135
+ def normalize_commune_codes(lf: pl.LazyFrame) -> pl.LazyFrame:
136
+ """
137
+ Map arrondissement codes back to parent city codes.
138
+
139
+ Paris (75101-75120 → 75056), Lyon (69381-69389 → 69123),
140
+ Marseille (13201-13216 → 13055).
141
+ """
142
+ mapping_expr = pl.col("code_commune").cast(pl.Utf8)
143
+ for arr_code, city_code in ARRONDISSEMENT_MAPPING.items():
144
+ mapping_expr = (
145
+ pl.when(pl.col("code_commune").cast(pl.Utf8) == arr_code)
146
+ .then(pl.lit(city_code))
147
+ .otherwise(mapping_expr)
148
+ )
149
+
150
+ return lf.with_columns(
151
+ mapping_expr.alias("code_commune_city"),
152
+ # Keep original for section-level work
153
+ pl.col("code_commune").alias("code_commune_original"),
154
+ )
155
+
156
+
157
+ def remove_outliers(lf: pl.LazyFrame) -> pl.LazyFrame:
158
+ """
159
+ Remove transactions with implausible price/m² or surface values.
160
+
161
+ Uses absolute thresholds from config — intentionally conservative
162
+ to avoid removing valid luxury or rural transactions.
163
+ """
164
+ return lf.filter(
165
+ (pl.col("surface_reelle_bati") >= SURFACE_MIN)
166
+ & (pl.col("surface_reelle_bati") <= SURFACE_MAX)
167
+ & (pl.col("prix_m2") >= PRICE_M2_MIN)
168
+ & (pl.col("prix_m2") <= PRICE_M2_MAX)
169
+ )
170
+
171
+
172
+ def add_derived_columns(lf: pl.LazyFrame) -> pl.LazyFrame:
173
+ """
174
+ Add computed columns needed downstream.
175
+
176
+ Adds:
177
+ - prix_m2: valeur_fonciere / surface_reelle_bati
178
+ - code_section: first 10 chars of id_parcelle
179
+ - code_region: mapped from code_departement
180
+ - year: extracted from date_mutation
181
+ - months_since: months between transaction and reference date
182
+ - temporal_weight: exponential decay weight for time-relevance
183
+ """
184
+ from src.config import DEPT_TO_REGION
185
+
186
+ # Build region mapping expression
187
+ region_expr = pl.lit("unknown")
188
+ for dept, region in DEPT_TO_REGION.items():
189
+ region_expr = (
190
+ pl.when(pl.col("code_departement").cast(pl.Utf8) == dept)
191
+ .then(pl.lit(region))
192
+ .otherwise(region_expr)
193
+ )
194
+
195
+ ref_date = pl.lit(REFERENCE_DATE).str.to_date("%Y-%m-%d")
196
+
197
+ return lf.with_columns(
198
+ (pl.col("valeur_fonciere") / pl.col("surface_reelle_bati")).alias("prix_m2"),
199
+ pl.col("id_parcelle").cast(pl.Utf8).str.slice(0, 10).alias("code_section"),
200
+ region_expr.alias("code_region"),
201
+ pl.col("date_mutation").cast(pl.Utf8).str.slice(0, 4).alias("year"),
202
+ # Temporal weighting: months since reference date
203
+ (
204
+ (ref_date - pl.col("date_mutation").cast(pl.Utf8).str.to_date("%Y-%m-%d"))
205
+ .dt.total_days()
206
+ .cast(pl.Float64)
207
+ / 30.44 # average days per month
208
+ ).alias("months_since"),
209
+ ).with_columns(
210
+ # Exponential decay weight
211
+ (pl.lit(TEMPORAL_LAMBDA) ** pl.col("months_since").clip(0.0, None)).alias(
212
+ "temporal_weight"
213
+ ),
214
+ )
215
+
216
+
217
+ def clean(paths: list[Path]) -> pl.LazyFrame:
218
+ """
219
+ Run the full cleaning pipeline on raw DVF files.
220
+
221
+ Args:
222
+ paths: List of raw CSV file paths.
223
+
224
+ Returns:
225
+ Cleaned LazyFrame ready for aggregation.
226
+ """
227
+ logger.info("Loading %d raw files...", len(paths))
228
+ lf = load_multiple_csvs(paths)
229
+
230
+ logger.info("Filtering sales...")
231
+ lf = filter_sales(lf)
232
+
233
+ logger.info("Deduplicating mutations...")
234
+ lf = deduplicate_mutations(lf)
235
+
236
+ logger.info("Adding derived columns...")
237
+ lf = add_derived_columns(lf)
238
+
239
+ logger.info("Removing outliers...")
240
+ lf = remove_outliers(lf)
241
+
242
+ logger.info("Normalizing commune codes...")
243
+ lf = normalize_commune_codes(lf)
244
+
245
+ return lf
src/config.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Configuration constants for the DVF data pipeline.
3
+
4
+ Single source of truth for paths, URLs, thresholds, and mappings.
5
+ """
6
+
7
+ from pathlib import Path
8
+
9
+ # ---------------------------------------------------------------------------
10
+ # Paths
11
+ # ---------------------------------------------------------------------------
12
+ ROOT_DIR = Path(__file__).resolve().parent.parent
13
+ DATA_DIR = ROOT_DIR / "data"
14
+ RAW_DIR = DATA_DIR / "raw"
15
+ PROCESSED_DIR = DATA_DIR / "processed"
16
+ AGGREGATED_DIR = DATA_DIR / "aggregated"
17
+ SECTIONS_DIR = AGGREGATED_DIR / "sections"
18
+
19
+ # ---------------------------------------------------------------------------
20
+ # DVF data source
21
+ # ---------------------------------------------------------------------------
22
+ DVF_BASE_URL = "https://files.data.gouv.fr/geo-dvf/latest/csv"
23
+ DVF_YEARS = list(range(2014, 2026)) # 2014-2025: full dataset per Carlos's feedback
24
+
25
+ def dvf_url(year: int) -> str:
26
+ """Return download URL for a given year's national DVF geolocalized CSV."""
27
+ return f"{DVF_BASE_URL}/{year}/full.csv.gz"
28
+
29
+ # ---------------------------------------------------------------------------
30
+ # Columns we actually need (saves memory on load)
31
+ # ---------------------------------------------------------------------------
32
+ DVF_COLUMNS = [
33
+ "id_mutation",
34
+ "date_mutation",
35
+ "nature_mutation",
36
+ "valeur_fonciere",
37
+ "code_postal",
38
+ "code_commune",
39
+ "nom_commune",
40
+ "code_departement",
41
+ "id_parcelle",
42
+ "code_type_local",
43
+ "type_local",
44
+ "surface_reelle_bati",
45
+ "nombre_pieces_principales",
46
+ "nombre_lots",
47
+ "longitude",
48
+ "latitude",
49
+ ]
50
+
51
+ # ---------------------------------------------------------------------------
52
+ # Filtering thresholds
53
+ # ---------------------------------------------------------------------------
54
+ VALID_NATURE_MUTATION = "Vente"
55
+
56
+ VALID_TYPE_LOCAL = ["Appartement", "Maison"] # Residential only per Carlos's feedback
57
+
58
+ TYPE_LOCAL_SHORT = {
59
+ "Appartement": "appartement",
60
+ "Maison": "maison",
61
+ }
62
+
63
+ # ---------------------------------------------------------------------------
64
+ # Temporal weighting parameters
65
+ # ---------------------------------------------------------------------------
66
+ REFERENCE_DATE = "2025-01-01" # Anchor date for temporal decay
67
+ TEMPORAL_LAMBDA = 0.97 # Monthly decay factor (half-life ~23 months)
68
+ TRIM_FRACTION = 0.20 # Trim 20% from each tail for trimmed mean
69
+
70
+ # Price per m² bounds for outlier removal
71
+ PRICE_M2_MIN = 200 # €/m² — below this is almost certainly an error
72
+ PRICE_M2_MAX = 25_000 # €/m² — above this is extreme luxury / error
73
+ SURFACE_MIN = 9 # m² — below 9m² is legally not habitable in France
74
+ SURFACE_MAX = 1000 # m² — above this for a single unit is suspect
75
+
76
+ # ---------------------------------------------------------------------------
77
+ # Department → Region mapping (2016 reform)
78
+ # ---------------------------------------------------------------------------
79
+ DEPT_TO_REGION: dict[str, str] = {}
80
+ _REGION_DEPTS = {
81
+ "84-Auvergne-Rhône-Alpes": "01,03,07,15,26,38,42,43,63,69,73,74",
82
+ "27-Bourgogne-Franche-Comté": "21,25,39,58,70,71,89,90",
83
+ "53-Bretagne": "22,29,35,56",
84
+ "24-Centre-Val de Loire": "18,28,36,37,41,45",
85
+ "94-Corse": "2A,2B",
86
+ "44-Grand Est": "08,10,51,52,54,55,57,67,68,88",
87
+ "32-Hauts-de-France": "02,59,60,62,80",
88
+ "11-Île-de-France": "75,77,78,91,92,93,94,95",
89
+ "28-Normandie": "14,27,50,61,76",
90
+ "75-Nouvelle-Aquitaine": "16,17,19,23,24,33,40,47,64,79,86,87",
91
+ "76-Occitanie": "09,11,12,30,31,32,34,46,48,65,66,81,82",
92
+ "52-Pays de la Loire": "44,49,53,72,85",
93
+ "93-Provence-Alpes-Côte d'Azur": "04,05,06,13,83,84",
94
+ "01-Guadeloupe": "971",
95
+ "02-Martinique": "972",
96
+ "03-Guyane": "973",
97
+ "04-La Réunion": "974",
98
+ "06-Mayotte": "976",
99
+ }
100
+
101
+ REGION_NAMES: dict[str, str] = {}
102
+ for key, depts_str in _REGION_DEPTS.items():
103
+ code, name = key.split("-", 1)
104
+ REGION_NAMES[code] = name
105
+ for d in depts_str.split(","):
106
+ DEPT_TO_REGION[d.strip()] = code
107
+
108
+ # Departments with no DVF data (Alsace-Moselle + Mayotte)
109
+ NO_DVF_DEPARTMENTS = {"57", "67", "68", "976"}
110
+
111
+ # ---------------------------------------------------------------------------
112
+ # Top 10 cities by population (INSEE code → name)
113
+ # ---------------------------------------------------------------------------
114
+ TOP_10_CITIES: dict[str, str] = {
115
+ "75056": "Paris",
116
+ "13055": "Marseille",
117
+ "69123": "Lyon",
118
+ "31555": "Toulouse",
119
+ "06088": "Nice",
120
+ "44109": "Nantes",
121
+ "34172": "Montpellier",
122
+ "67482": "Strasbourg",
123
+ "33063": "Bordeaux",
124
+ "59350": "Lille",
125
+ }
126
+
127
+ # Paris, Lyon, Marseille have arrondissements — we need to map them back
128
+ ARRONDISSEMENT_MAPPING: dict[str, str] = {}
129
+ # Paris: 75101-75120 → 75056
130
+ for i in range(1, 21):
131
+ ARRONDISSEMENT_MAPPING[f"751{i:02d}"] = "75056"
132
+ # Lyon: 69381-69389 → 69123
133
+ for i in range(1, 10):
134
+ ARRONDISSEMENT_MAPPING[f"6938{i}"] = "69123"
135
+ # Marseille: 13201-13216 → 13055
136
+ for i in range(1, 17):
137
+ ARRONDISSEMENT_MAPPING[f"132{i:02d}"] = "13055"
138
+
139
+ # ---------------------------------------------------------------------------
140
+ # Aggregation levels
141
+ # ---------------------------------------------------------------------------
142
+ AGGREGATION_LEVELS = [
143
+ "country",
144
+ "region",
145
+ "department",
146
+ "commune",
147
+ "postcode",
148
+ "section",
149
+ ]
src/downloader.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Download DVF geolocalized data from data.gouv.fr.
3
+
4
+ Responsibility: Fetch raw CSV files and store them locally.
5
+ Handles caching — won't re-download files that already exist.
6
+ """
7
+
8
+ import gzip
9
+ import logging
10
+ from pathlib import Path
11
+
12
+ import requests
13
+ from tqdm import tqdm
14
+
15
+ from src.config import DVF_YEARS, RAW_DIR, dvf_url
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ CHUNK_SIZE = 8192 # 8 KB chunks for streaming download
20
+
21
+
22
+ def download_file(url: str, dest: Path, *, force: bool = False) -> Path:
23
+ """
24
+ Download a single file with progress bar and caching.
25
+
26
+ Args:
27
+ url: Remote URL to download.
28
+ dest: Local path to save the file.
29
+ force: If True, re-download even if file exists.
30
+
31
+ Returns:
32
+ Path to the downloaded file.
33
+
34
+ Raises:
35
+ requests.HTTPError: If the server returns a non-2xx status.
36
+ """
37
+ if dest.exists() and not force:
38
+ logger.info("Cached: %s", dest.name)
39
+ return dest
40
+
41
+ dest.parent.mkdir(parents=True, exist_ok=True)
42
+ logger.info("Downloading: %s", url)
43
+
44
+ response = requests.get(url, stream=True, timeout=300)
45
+ response.raise_for_status()
46
+
47
+ total = int(response.headers.get("content-length", 0))
48
+ with (
49
+ open(dest, "wb") as f,
50
+ tqdm(total=total, unit="B", unit_scale=True, desc=dest.name) as bar,
51
+ ):
52
+ for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
53
+ f.write(chunk)
54
+ bar.update(len(chunk))
55
+
56
+ logger.info("Saved: %s (%.1f MB)", dest.name, dest.stat().st_size / 1e6)
57
+ return dest
58
+
59
+
60
+ def decompress_gz(gz_path: Path) -> Path:
61
+ """
62
+ Decompress a .gz file to .csv in the same directory.
63
+
64
+ Args:
65
+ gz_path: Path to the .gz file.
66
+
67
+ Returns:
68
+ Path to the decompressed .csv file.
69
+ """
70
+ csv_path = gz_path.with_suffix("") # removes .gz
71
+ if csv_path.exists():
72
+ logger.info("Already decompressed: %s", csv_path.name)
73
+ return csv_path
74
+
75
+ logger.info("Decompressing: %s", gz_path.name)
76
+ with gzip.open(gz_path, "rb") as f_in, open(csv_path, "wb") as f_out:
77
+ while chunk := f_in.read(CHUNK_SIZE * 128):
78
+ f_out.write(chunk)
79
+
80
+ logger.info("Decompressed: %s (%.1f MB)", csv_path.name, csv_path.stat().st_size / 1e6)
81
+ return csv_path
82
+
83
+
84
+ def download_dvf_year(year: int, *, force: bool = False) -> Path:
85
+ """
86
+ Download and decompress DVF data for a single year.
87
+
88
+ Args:
89
+ year: Year to download (e.g. 2024).
90
+ force: Re-download even if cached.
91
+
92
+ Returns:
93
+ Path to the decompressed CSV file.
94
+ """
95
+ url = dvf_url(year)
96
+ gz_path = RAW_DIR / f"dvf_{year}.csv.gz"
97
+ download_file(url, gz_path, force=force)
98
+ return decompress_gz(gz_path)
99
+
100
+
101
+ def download_all(years: list[int] | None = None, *, force: bool = False) -> list[Path]:
102
+ """
103
+ Download DVF data for all configured years.
104
+
105
+ Args:
106
+ years: List of years to download. Defaults to config.DVF_YEARS.
107
+ force: Re-download even if cached.
108
+
109
+ Returns:
110
+ List of paths to decompressed CSV files.
111
+ """
112
+ years = years or DVF_YEARS
113
+ paths = []
114
+ for year in years:
115
+ try:
116
+ path = download_dvf_year(year, force=force)
117
+ paths.append(path)
118
+ except requests.HTTPError as e:
119
+ logger.error("Failed to download year %d: %s", year, e)
120
+ return paths
121
+
122
+
123
+ if __name__ == "__main__":
124
+ logging.basicConfig(level=logging.INFO)
125
+ download_all()
src/pipeline.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Orchestrate the full DVF data pipeline.
3
+
4
+ Responsibility: Wire together download → clean → aggregate → export.
5
+ This is the main entry point for running the complete pipeline.
6
+ """
7
+
8
+ import logging
9
+ import time
10
+
11
+ from src.config import AGGREGATED_DIR, PROCESSED_DIR, RAW_DIR
12
+ from src.downloader import download_all
13
+ from src.cleaner import clean
14
+ from src.aggregator import aggregate_all_levels, export_json
15
+ from src.top_cities import compute_top_cities, export_top_cities
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ def run(
21
+ *,
22
+ years: list[int] | None = None,
23
+ skip_download: bool = False,
24
+ skip_section: bool = False,
25
+ ) -> None:
26
+ """
27
+ Run the complete pipeline: download → clean → aggregate → export.
28
+
29
+ Args:
30
+ years: Years to process. None = all configured years.
31
+ skip_download: Skip download step (use existing raw files).
32
+ skip_section: Skip section-level aggregation (slow, large output).
33
+ """
34
+ t0 = time.time()
35
+
36
+ # Step 1: Download
37
+ if not skip_download:
38
+ logger.info("=" * 60)
39
+ logger.info("STEP 1: Downloading DVF data")
40
+ logger.info("=" * 60)
41
+ csv_paths = download_all(years)
42
+ else:
43
+ logger.info("Skipping download, using existing files...")
44
+ csv_paths = sorted(RAW_DIR.glob("dvf_*.csv"))
45
+ csv_paths = [p for p in csv_paths if not p.name.endswith(".gz")]
46
+ logger.info("Found %d raw CSV files", len(csv_paths))
47
+
48
+ if not csv_paths:
49
+ logger.error("No CSV files found. Run without --skip-download first.")
50
+ return
51
+
52
+ # Step 2: Clean
53
+ logger.info("=" * 60)
54
+ logger.info("STEP 2: Cleaning data")
55
+ logger.info("=" * 60)
56
+ lf = clean(csv_paths)
57
+
58
+ # Materialize once and save as parquet for reuse
59
+ logger.info("Materializing cleaned data...")
60
+ PROCESSED_DIR.mkdir(parents=True, exist_ok=True)
61
+ df_clean = lf.collect()
62
+ parquet_path = PROCESSED_DIR / "dvf_clean.parquet"
63
+ df_clean.write_parquet(parquet_path)
64
+ logger.info(
65
+ "Saved: %s (%d rows, %.1f MB)",
66
+ parquet_path.name,
67
+ len(df_clean),
68
+ parquet_path.stat().st_size / 1e6,
69
+ )
70
+
71
+ # Step 3: Aggregate
72
+ logger.info("=" * 60)
73
+ logger.info("STEP 3: Aggregating prices")
74
+ logger.info("=" * 60)
75
+ lf_clean = df_clean.lazy()
76
+
77
+ if skip_section:
78
+ from src.config import AGGREGATION_LEVELS
79
+ levels_to_run = [l for l in AGGREGATION_LEVELS if l != "section"]
80
+ from src.aggregator import LEVEL_TO_COLUMN, aggregate_all_types
81
+ aggregated = {}
82
+ for level in levels_to_run:
83
+ col = LEVEL_TO_COLUMN[level]
84
+ logger.info("Aggregating: %s", level)
85
+ aggregated[level] = aggregate_all_types(lf_clean, col)
86
+ else:
87
+ aggregated = aggregate_all_levels(lf_clean)
88
+
89
+ export_json(aggregated)
90
+
91
+ # Step 4: Top cities
92
+ logger.info("=" * 60)
93
+ logger.info("STEP 4: Top 10 cities")
94
+ logger.info("=" * 60)
95
+ df_cities = compute_top_cities(lf_clean)
96
+ export_top_cities(df_cities)
97
+
98
+ elapsed = time.time() - t0
99
+ logger.info("=" * 60)
100
+ logger.info("Pipeline complete in %.1f seconds", elapsed)
101
+ logger.info("Output: %s", AGGREGATED_DIR)
102
+ logger.info("=" * 60)
103
+
104
+
105
+ if __name__ == "__main__":
106
+ logging.basicConfig(
107
+ level=logging.INFO,
108
+ format="%(asctime)s %(levelname)s %(name)s: %(message)s",
109
+ )
110
+ run()
src/top_cities.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Compute price/m² breakdown for the top 10 French cities.
3
+
4
+ Responsibility: Produce a clean table of median price per m²
5
+ by property type for the largest cities.
6
+ """
7
+
8
+ import json
9
+ import logging
10
+ from pathlib import Path
11
+
12
+ import polars as pl
13
+
14
+ from src.config import AGGREGATED_DIR, TOP_10_CITIES, TYPE_LOCAL_SHORT
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ def compute_top_cities(lf: pl.LazyFrame) -> pl.DataFrame:
20
+ """
21
+ Compute price statistics for top 10 cities, broken down by property type.
22
+
23
+ Uses code_commune_city (with arrondissements mapped to parent city)
24
+ to correctly aggregate Paris, Lyon, and Marseille.
25
+
26
+ Args:
27
+ lf: Cleaned LazyFrame with code_commune_city column.
28
+
29
+ Returns:
30
+ DataFrame with columns: city_code, city_name, type, median_price_m2,
31
+ mean_price_m2, volume, q1, q3.
32
+ """
33
+ city_codes = list(TOP_10_CITIES.keys())
34
+
35
+ # Filter to top 10 cities only
36
+ city_data = lf.filter(
37
+ pl.col("code_commune_city").is_in(city_codes)
38
+ )
39
+
40
+ results = []
41
+
42
+ # All types combined
43
+ df_all = (
44
+ city_data.group_by("code_commune_city")
45
+ .agg(
46
+ pl.col("prix_m2").median().alias("median_price_m2"),
47
+ pl.col("prix_m2").mean().alias("mean_price_m2"),
48
+ pl.col("prix_m2").quantile(0.25).alias("q1"),
49
+ pl.col("prix_m2").quantile(0.75).alias("q3"),
50
+ pl.col("prix_m2").count().alias("volume"),
51
+ )
52
+ .with_columns(pl.lit("tous").alias("type"))
53
+ .collect()
54
+ )
55
+ results.append(df_all)
56
+
57
+ # Per property type
58
+ for full_name, short_name in TYPE_LOCAL_SHORT.items():
59
+ df_type = (
60
+ city_data.filter(pl.col("type_local") == full_name)
61
+ .group_by("code_commune_city")
62
+ .agg(
63
+ pl.col("prix_m2").median().alias("median_price_m2"),
64
+ pl.col("prix_m2").mean().alias("mean_price_m2"),
65
+ pl.col("prix_m2").quantile(0.25).alias("q1"),
66
+ pl.col("prix_m2").quantile(0.75).alias("q3"),
67
+ pl.col("prix_m2").count().alias("volume"),
68
+ )
69
+ .with_columns(pl.lit(short_name).alias("type"))
70
+ .collect()
71
+ )
72
+ results.append(df_type)
73
+
74
+ combined = pl.concat(results, how="vertical_relaxed")
75
+
76
+ # Add city names
77
+ city_name_map = {code: name for code, name in TOP_10_CITIES.items()}
78
+ combined = combined.with_columns(
79
+ pl.col("code_commune_city")
80
+ .replace_strict(city_name_map, default="Unknown")
81
+ .alias("city_name")
82
+ ).rename({"code_commune_city": "city_code"})
83
+
84
+ return combined.sort(["city_name", "type"])
85
+
86
+
87
+ def export_top_cities(df: pl.DataFrame, output_dir: Path | None = None) -> None:
88
+ """
89
+ Export top cities data to JSON.
90
+
91
+ Output format:
92
+ {
93
+ "Paris": {
94
+ "code": "75056",
95
+ "tous": {"median": 10500, "volume": 45000, ...},
96
+ "appartement": {...},
97
+ ...
98
+ },
99
+ ...
100
+ }
101
+ """
102
+ output_dir = output_dir or AGGREGATED_DIR
103
+ output_dir.mkdir(parents=True, exist_ok=True)
104
+
105
+ data: dict = {}
106
+ for row in df.iter_rows(named=True):
107
+ name = row["city_name"]
108
+ if name not in data:
109
+ data[name] = {"code": row["city_code"]}
110
+ data[name][row["type"]] = {
111
+ "median": round(row["median_price_m2"] or 0, 1),
112
+ "mean": round(row["mean_price_m2"] or 0, 1),
113
+ "q1": round(row["q1"] or 0, 1),
114
+ "q3": round(row["q3"] or 0, 1),
115
+ "volume": row["volume"],
116
+ }
117
+
118
+ path = output_dir / "top_cities.json"
119
+ with open(path, "w") as f:
120
+ json.dump(data, f, ensure_ascii=False, indent=2)
121
+ logger.info("Exported: %s (%d cities)", path.name, len(data))