Spaces:
Running
Running
Commit
·
ba08c19
0
Parent(s):
feat: initial DVF data pipeline with 6-level price aggregation
Browse files- .gitignore +48 -0
- README.md +0 -0
- data/aggregated/prices_commune.json +0 -0
- data/aggregated/prices_country.json +1 -0
- data/aggregated/prices_department.json +1 -0
- data/aggregated/prices_postcode.json +0 -0
- data/aggregated/prices_region.json +1 -0
- data/aggregated/top_cities.json +281 -0
- main.py +6 -0
- ml_challenge.txt +36 -0
- notebooks/01_data_exploration.ipynb +0 -0
- pyproject.toml +17 -0
- src/__init__.py +1 -0
- src/aggregator.py +194 -0
- src/cleaner.py +245 -0
- src/config.py +149 -0
- src/downloader.py +125 -0
- src/pipeline.py +110 -0
- src/top_cities.py +121 -0
.gitignore
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ---- Data (raw and intermediate - too large to commit) ----
|
| 2 |
+
data/raw/
|
| 3 |
+
data/processed/
|
| 4 |
+
# NOTE: data/aggregated/ is NOT ignored - those JSON files are needed by the app
|
| 5 |
+
|
| 6 |
+
# ---- Python ----
|
| 7 |
+
__pycache__/
|
| 8 |
+
*.py[cod]
|
| 9 |
+
*.egg-info/
|
| 10 |
+
dist/
|
| 11 |
+
build/
|
| 12 |
+
wheels/
|
| 13 |
+
.eggs/
|
| 14 |
+
*.egg
|
| 15 |
+
.venv/
|
| 16 |
+
venv/
|
| 17 |
+
|
| 18 |
+
# ---- Jupyter ----
|
| 19 |
+
.ipynb_checkpoints/
|
| 20 |
+
|
| 21 |
+
# ---- Environment / secrets ----
|
| 22 |
+
.env
|
| 23 |
+
.env.*
|
| 24 |
+
|
| 25 |
+
# ---- Claude Code / MCP / IDE ----
|
| 26 |
+
.claude/
|
| 27 |
+
.mcp/
|
| 28 |
+
.vscode/
|
| 29 |
+
.idea/
|
| 30 |
+
*.code-workspace
|
| 31 |
+
|
| 32 |
+
# ---- Working notes (not part of the deliverable) ----
|
| 33 |
+
updates/
|
| 34 |
+
|
| 35 |
+
# ---- Reference repos (downloaded for research, not part of project) ----
|
| 36 |
+
explore.data.gouv.fr/
|
| 37 |
+
data-gouv-skill/
|
| 38 |
+
datagouv-mcp/
|
| 39 |
+
stats-explorer-datagouv/
|
| 40 |
+
|
| 41 |
+
# ---- OS ----
|
| 42 |
+
.DS_Store
|
| 43 |
+
Thumbs.db
|
| 44 |
+
|
| 45 |
+
# ---- Misc ----
|
| 46 |
+
*.log
|
| 47 |
+
uv.lock
|
| 48 |
+
.python-version
|
README.md
ADDED
|
File without changes
|
data/aggregated/prices_commune.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/aggregated/prices_country.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"FR": {"tous": {"median": 2307.7, "mean": 2953.2, "q1": 1361.4, "q3": 3690.4, "volume": 4735156, "confidence": 0.6}, "appartement": {"median": 3106.7, "mean": 3878.9, "q1": 1985.3, "q3": 4742.4, "volume": 1987882, "confidence": 0.645}, "maison": {"median": 1858.8, "mean": 2266.8, "q1": 1112.6, "q3": 2900.8, "volume": 2549352, "confidence": 0.615}, "local": {"median": 1597.9, "mean": 2497.1, "q1": 827.0, "q3": 3000.0, "volume": 197922, "confidence": 0.6}}}
|
data/aggregated/prices_department.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"01": {"tous": {"median": 2250.0, "mean": 2545.7, "q1": 1500.0, "q3": 3278.7, "volume": 44534, "confidence": 0.684}, "appartement": {"median": 2380.0, "mean": 2771.5, "q1": 1630.9, "q3": 3823.5, "volume": 15712, "confidence": 0.631}, "maison": {"median": 2250.0, "mean": 2488.5, "q1": 1500.0, "q3": 3125.0, "volume": 26914, "confidence": 0.711}, "local": {"median": 1098.4, "mean": 1493.8, "q1": 645.6, "q3": 1839.5, "volume": 1908, "confidence": 0.6}}, "02": {"tous": {"median": 1105.3, "mean": 1215.1, "q1": 749.5, "q3": 1545.5, "volume": 33145, "confidence": 0.712}, "appartement": {"median": 1197.9, "mean": 1298.5, "q1": 921.9, "q3": 1588.8, "volume": 3910, "confidence": 0.777}, "maison": {"median": 1092.1, "mean": 1186.5, "q1": 737.8, "q3": 1535.7, "volume": 28134, "confidence": 0.708}, "local": {"median": 877.2, "mean": 1648.9, "q1": 459.8, "q3": 1777.8, "volume": 1101, "confidence": 0.6}}, "03": {"tous": {"median": 971.7, "mean": 1111.3, "q1": 613.2, "q3": 1437.5, "volume": 27890, "confidence": 0.661}, "appartement": {"median": 1121.5, "mean": 1249.1, "q1": 724.6, "q3": 1578.9, "volume": 7543, "confidence": 0.695}, "maison": {"median": 926.1, "mean": 1058.1, "q1": 588.2, "q3": 1383.5, "volume": 19418, "confidence": 0.657}, "local": {"median": 735.3, "mean": 1105.8, "q1": 416.7, "q3": 1270.5, "volume": 929, "confidence": 0.6}}, "04": {"tous": {"median": 1987.2, "mean": 2106.6, "q1": 1363.6, "q3": 2681.7, "volume": 17062, "confidence": 0.735}, "appartement": {"median": 1890.6, "mean": 1987.9, "q1": 1367.1, "q3": 2493.3, "volume": 7888, "confidence": 0.762}, "maison": {"median": 2170.0, "mean": 2270.8, "q1": 1443.8, "q3": 2891.6, "volume": 8453, "confidence": 0.733}, "local": {"median": 1118.9, "mean": 1480.4, "q1": 687.5, "q3": 1880.0, "volume": 721, "confidence": 0.6}}, "05": {"tous": {"median": 2418.4, "mean": 2596.6, "q1": 1750.0, "q3": 3222.2, "volume": 19480, "confidence": 0.756}, "appartement": {"median": 2473.2, "mean": 2661.5, "q1": 1875.0, "q3": 3254.0, "volume": 12155, "confidence": 0.777}, "maison": {"median": 2433.3, "mean": 2618.8, "q1": 1536.0, "q3": 3333.3, "volume": 5287, "confidence": 0.705}, "local": {"median": 2049.6, "mean": 2151.6, "q1": 1324.8, "q3": 2677.1, "volume": 2038, "confidence": 0.736}}, "06": {"tous": {"median": 4347.8, "mean": 4709.7, "q1": 3220.9, "q3": 5659.1, "volume": 138875, "confidence": 0.776}, "appartement": {"median": 4444.4, "mean": 4801.3, "q1": 3423.9, "q3": 5684.2, "volume": 110545, "confidence": 0.797}, "maison": {"median": 4000.0, "mean": 4622.5, "q1": 2520.2, "q3": 5769.2, "volume": 21801, "confidence": 0.675}, "local": {"median": 2900.0, "mean": 3449.3, "q1": 1829.3, "q3": 4400.0, "volume": 6529, "confidence": 0.645}}, "07": {"tous": {"median": 1634.1, "mean": 1786.4, "q1": 1067.6, "q3": 2338.2, "volume": 23768, "confidence": 0.689}, "appartement": {"median": 1384.6, "mean": 1510.9, "q1": 959.0, "q3": 1916.3, "volume": 6004, "confidence": 0.723}, "maison": {"median": 1804.0, "mean": 1921.8, "q1": 1178.6, "q3": 2481.8, "volume": 16726, "confidence": 0.711}, "local": {"median": 918.6, "mean": 1198.0, "q1": 533.4, "q3": 1500.0, "volume": 1038, "confidence": 0.6}}, "08": {"tous": {"median": 1045.5, "mean": 1154.5, "q1": 684.4, "q3": 1506.8, "volume": 16526, "confidence": 0.685}, "appartement": {"median": 964.3, "mean": 1022.0, "q1": 684.8, "q3": 1288.7, "volume": 3070, "confidence": 0.75}, "maison": {"median": 1089.1, "mean": 1185.5, "q1": 702.5, "q3": 1581.2, "volume": 12847, "confidence": 0.677}, "local": {"median": 692.3, "mean": 1169.0, "q1": 432.4, "q3": 1155.2, "volume": 609, "confidence": 0.6}}, "09": {"tous": {"median": 1277.1, "mean": 1384.5, "q1": 833.3, "q3": 1785.7, "volume": 14126, "confidence": 0.702}, "appartement": {"median": 1296.7, "mean": 1383.2, "q1": 893.8, "q3": 1704.5, "volume": 2343, "confidence": 0.75}, "maison": {"median": 1283.2, "mean": 1394.6, "q1": 833.3, "q3": 1808.8, "volume": 11335, "confidence": 0.696}, "local": {"median": 855.7, "mean": 1136.5, "q1": 500.0, "q3": 1649.5, "volume": 448, "confidence": 0.6}}, "10": {"tous": {"median": 1403.4, "mean": 1492.9, "q1": 944.7, "q3": 1904.8, "volume": 22180, "confidence": 0.726}, "appartement": {"median": 1529.7, "mean": 1649.0, "q1": 1189.2, "q3": 1948.3, "volume": 7506, "confidence": 0.802}, "maison": {"median": 1315.8, "mean": 1418.5, "q1": 844.4, "q3": 1891.0, "volume": 13764, "confidence": 0.682}, "local": {"median": 969.0, "mean": 1331.4, "q1": 559.4, "q3": 1509.4, "volume": 910, "confidence": 0.608}}, "11": {"tous": {"median": 1700.0, "mean": 1939.3, "q1": 1070.9, "q3": 2520.8, "volume": 41904, "confidence": 0.659}, "appartement": {"median": 2031.0, "mean": 2207.6, "q1": 1287.0, "q3": 2910.4, "volume": 12311, "confidence": 0.68}, "maison": {"median": 1608.6, "mean": 1852.0, "q1": 1011.9, "q3": 2355.6, "volume": 28398, "confidence": 0.666}, "local": {"median": 950.0, "mean": 1251.1, "q1": 547.8, "q3": 1571.4, "volume": 1195, "confidence": 0.6}}, "12": {"tous": {"median": 1331.3, "mean": 1435.8, "q1": 821.3, "q3": 1943.5, "volume": 21024, "confidence": 0.663}, "appartement": {"median": 1641.0, "mean": 1641.7, "q1": 1122.4, "q3": 2145.3, "volume": 6345, "confidence": 0.751}, "maison": {"median": 1223.7, "mean": 1371.3, "q1": 760.0, "q3": 1848.8, "volume": 13774, "confidence": 0.644}, "local": {"median": 755.4, "mean": 973.0, "q1": 429.4, "q3": 1235.3, "volume": 905, "confidence": 0.6}}, "13": {"tous": {"median": 3283.6, "mean": 3552.7, "q1": 2302.6, "q3": 4406.8, "volume": 149587, "confidence": 0.744}, "appartement": {"median": 3112.2, "mean": 3348.8, "q1": 2220.0, "q3": 4184.6, "volume": 96487, "confidence": 0.748}, "maison": {"median": 3798.1, "mean": 4132.5, "q1": 2830.9, "q3": 4955.2, "volume": 45523, "confidence": 0.776}, "local": {"median": 2064.3, "mean": 2665.1, "q1": 1269.2, "q3": 3271.0, "volume": 7577, "confidence": 0.612}}, "14": {"tous": {"median": 2400.0, "mean": 2644.1, "q1": 1481.5, "q3": 3381.0, "volume": 64499, "confidence": 0.683}, "appartement": {"median": 3015.9, "mean": 3312.5, "q1": 2191.0, "q3": 4105.3, "volume": 25762, "confidence": 0.746}, "maison": {"median": 1988.8, "mean": 2193.3, "q1": 1137.2, "q3": 2830.2, "volume": 36108, "confidence": 0.659}, "local": {"median": 1785.7, "mean": 2286.8, "q1": 894.6, "q3": 2947.4, "volume": 2629, "confidence": 0.6}}, "15": {"tous": {"median": 1098.6, "mean": 1216.9, "q1": 700.0, "q3": 1586.2, "volume": 10635, "confidence": 0.677}, "appartement": {"median": 1210.9, "mean": 1292.1, "q1": 883.9, "q3": 1575.5, "volume": 2845, "confidence": 0.772}, "maison": {"median": 1060.7, "mean": 1196.2, "q1": 664.7, "q3": 1612.1, "volume": 7399, "confidence": 0.643}, "local": {"median": 633.3, "mean": 1060.6, "q1": 388.3, "q3": 1044.8, "volume": 391, "confidence": 0.6}}, "16": {"tous": {"median": 1170.0, "mean": 1268.0, "q1": 757.5, "q3": 1663.0, "volume": 28466, "confidence": 0.69}, "appartement": {"median": 1486.5, "mean": 1529.6, "q1": 1156.2, "q3": 1807.0, "volume": 2841, "confidence": 0.825}, "maison": {"median": 1131.6, "mean": 1241.9, "q1": 739.6, "q3": 1641.8, "volume": 24709, "confidence": 0.681}, "local": {"median": 779.0, "mean": 1162.0, "q1": 439.7, "q3": 1294.0, "volume": 916, "confidence": 0.6}}, "17": {"tous": {"median": 2538.5, "mean": 2917.4, "q1": 1501.8, "q3": 3898.6, "volume": 68098, "confidence": 0.622}, "appartement": {"median": 3692.3, "mean": 3784.1, "q1": 2398.4, "q3": 4963.0, "volume": 14112, "confidence": 0.722}, "maison": {"median": 2307.7, "mean": 2706.4, "q1": 1358.0, "q3": 3542.2, "volume": 51383, "confidence": 0.621}, "local": {"median": 1817.2, "mean": 2383.2, "q1": 945.9, "q3": 3302.3, "volume": 2603, "confidence": 0.6}}, "18": {"tous": {"median": 1000.0, "mean": 1128.5, "q1": 647.5, "q3": 1489.8, "volume": 24469, "confidence": 0.663}, "appartement": {"median": 1282.0, "mean": 1312.1, "q1": 865.4, "q3": 1698.1, "volume": 3982, "confidence": 0.74}, "maison": {"median": 958.3, "mean": 1098.8, "q1": 626.9, "q3": 1433.7, "volume": 19736, "confidence": 0.663}, "local": {"median": 700.0, "mean": 937.1, "q1": 400.0, "q3": 1150.4, "volume": 751, "confidence": 0.6}}, "19": {"tous": {"median": 1132.4, "mean": 1261.2, "q1": 721.2, "q3": 1666.7, "volume": 18577, "confidence": 0.666}, "appartement": {"median": 1300.0, "mean": 1360.8, "q1": 873.0, "q3": 1755.6, "volume": 4063, "confidence": 0.728}, "maison": {"median": 1094.6, "mean": 1248.8, "q1": 714.3, "q3": 1650.9, "volume": 13885, "confidence": 0.658}, "local": {"median": 666.7, "mean": 892.0, "q1": 407.6, "q3": 1100.0, "volume": 629, "confidence": 0.6}}, "21": {"tous": {"median": 2166.7, "mean": 2181.1, "q1": 1371.8, "q3": 2833.3, "volume": 43798, "confidence": 0.73}, "appartement": {"median": 2416.7, "mean": 2417.0, "q1": 1831.3, "q3": 2971.4, "volume": 20913, "confidence": 0.811}, "maison": {"median": 1852.6, "mean": 1970.9, "q1": 1076.4, "q3": 2666.7, "volume": 20961, "confidence": 0.657}, "local": {"median": 1500.0, "mean": 1906.1, "q1": 774.2, "q3": 2368.4, "volume": 1924, "confidence": 0.6}}, "22": {"tous": {"median": 1574.6, "mean": 1793.6, "q1": 967.7, "q3": 2311.8, "volume": 55291, "confidence": 0.659}, "appartement": {"median": 1958.3, "mean": 2244.5, "q1": 1426.8, "q3": 2758.6, "volume": 9533, "confidence": 0.728}, "maison": {"median": 1500.0, "mean": 1711.2, "q1": 911.5, "q3": 2226.1, "volume": 43708, "confidence": 0.649}, "local": {"median": 1027.4, "mean": 1453.6, "q1": 575.5, "q3": 1866.7, "volume": 2050, "confidence": 0.6}}, "23": {"tous": {"median": 710.9, "mean": 831.5, "q1": 461.5, "q3": 1050.0, "volume": 10623, "confidence": 0.669}, "appartement": {"median": 714.3, "mean": 761.4, "q1": 486.9, "q3": 971.8, "volume": 752, "confidence": 0.728}, "maison": {"median": 714.3, "mean": 838.7, "q1": 463.9, "q3": 1060.6, "volume": 9687, "confidence": 0.666}, "local": {"median": 473.1, "mean": 740.0, "q1": 326.1, "q3": 833.3, "volume": 184, "confidence": 0.6}}, "24": {"tous": {"median": 1214.3, "mean": 1359.1, "q1": 808.1, "q3": 1727.3, "volume": 36883, "confidence": 0.697}, "appartement": {"median": 1478.4, "mean": 1527.9, "q1": 1162.2, "q3": 1822.6, "volume": 4927, "confidence": 0.821}, "maison": {"median": 1166.7, "mean": 1330.9, "q1": 784.7, "q3": 1705.7, "volume": 30766, "confidence": 0.684}, "local": {"median": 854.5, "mean": 1389.3, "q1": 488.9, "q3": 1486.5, "volume": 1190, "confidence": 0.6}}, "25": {"tous": {"median": 1830.2, "mean": 1916.2, "q1": 1194.6, "q3": 2500.0, "volume": 38158, "confidence": 0.715}, "appartement": {"median": 1906.2, "mean": 1953.0, "q1": 1290.9, "q3": 2528.6, "volume": 20055, "confidence": 0.74}, "maison": {"median": 1800.0, "mean": 1927.4, "q1": 1180.8, "q3": 2509.3, "volume": 16302, "confidence": 0.705}, "local": {"median": 1136.4, "mean": 1406.0, "q1": 615.4, "q3": 1796.4, "volume": 1801, "confidence": 0.6}}, "26": {"tous": {"median": 1927.5, "mean": 2016.8, "q1": 1366.3, "q3": 2551.0, "volume": 37930, "confidence": 0.754}, "appartement": {"median": 1760.0, "mean": 1841.8, "q1": 1338.7, "q3": 2263.2, "volume": 13998, "confidence": 0.79}, "maison": {"median": 2133.3, "mean": 2170.1, "q1": 1478.7, "q3": 2714.3, "volume": 21876, "confidence": 0.768}, "local": {"median": 1192.6, "mean": 1576.9, "q1": 703.1, "q3": 1921.8, "volume": 2056, "confidence": 0.6}}, "27": {"tous": {"median": 1515.5, "mean": 1613.4, "q1": 1006.4, "q3": 2084.3, "volume": 42426, "confidence": 0.716}, "appartement": {"median": 1746.3, "mean": 1812.5, "q1": 1372.5, "q3": 2197.4, "volume": 5919, "confidence": 0.811}, "maison": {"median": 1470.2, "mean": 1590.2, "q1": 977.3, "q3": 2071.4, "volume": 35334, "confidence": 0.702}, "local": {"median": 960.7, "mean": 1309.6, "q1": 535.7, "q3": 1625.0, "volume": 1173, "confidence": 0.6}}, "28": {"tous": {"median": 1655.6, "mean": 1742.6, "q1": 1075.9, "q3": 2275.6, "volume": 31209, "confidence": 0.71}, "appartement": {"median": 2059.5, "mean": 2043.8, "q1": 1436.4, "q3": 2600.0, "volume": 6087, "confidence": 0.774}, "maison": {"median": 1575.8, "mean": 1674.2, "q1": 1026.5, "q3": 2182.7, "volume": 24261, "confidence": 0.707}, "local": {"median": 994.9, "mean": 1539.4, "q1": 555.6, "q3": 1750.0, "volume": 861, "confidence": 0.6}}, "29": {"tous": {"median": 1818.2, "mean": 1924.9, "q1": 1231.9, "q3": 2432.4, "volume": 78321, "confidence": 0.736}, "appartement": {"median": 1971.8, "mean": 2094.8, "q1": 1529.8, "q3": 2515.0, "volume": 23098, "confidence": 0.8}, "maison": {"median": 1758.9, "mean": 1885.9, "q1": 1148.5, "q3": 2420.0, "volume": 52020, "confidence": 0.711}, "local": {"median": 1103.4, "mean": 1332.8, "q1": 631.6, "q3": 1797.9, "volume": 3203, "confidence": 0.6}}, "2A": {"tous": {"median": 3580.2, "mean": 3935.7, "q1": 2414.2, "q3": 4993.6, "volume": 11405, "confidence": 0.712}, "appartement": {"median": 3623.1, "mean": 3796.1, "q1": 2608.7, "q3": 4821.4, "volume": 6834, "confidence": 0.756}, "maison": {"median": 3812.6, "mean": 4459.6, "q1": 2271.9, "q3": 5700.0, "volume": 3746, "confidence": 0.64}, "local": {"median": 2411.8, "mean": 2713.4, "q1": 1428.6, "q3": 3555.6, "volume": 825, "confidence": 0.647}}, "2B": {"tous": {"median": 2894.7, "mean": 3047.9, "q1": 2000.0, "q3": 3800.0, "volume": 11685, "confidence": 0.751}, "appartement": {"median": 2924.5, "mean": 3051.2, "q1": 2131.9, "q3": 3777.8, "volume": 7342, "confidence": 0.775}, "maison": {"median": 2963.2, "mean": 3179.1, "q1": 1859.0, "q3": 3966.5, "volume": 3753, "confidence": 0.716}, "local": {"median": 1916.7, "mean": 2171.6, "q1": 1048.2, "q3": 2777.8, "volume": 590, "confidence": 0.639}}, "30": {"tous": {"median": 2176.1, "mean": 2382.8, "q1": 1415.4, "q3": 2994.0, "volume": 61106, "confidence": 0.71}, "appartement": {"median": 2066.7, "mean": 2448.3, "q1": 1325.0, "q3": 3075.0, "volume": 23317, "confidence": 0.661}, "maison": {"median": 2297.3, "mean": 2399.3, "q1": 1548.7, "q3": 3000.0, "volume": 35239, "confidence": 0.747}, "local": {"median": 1286.5, "mean": 1556.1, "q1": 760.0, "q3": 2000.0, "volume": 2550, "confidence": 0.614}}, "31": {"tous": {"median": 2718.4, "mean": 2828.5, "q1": 1972.2, "q3": 3468.1, "volume": 105201, "confidence": 0.78}, "appartement": {"median": 2875.0, "mean": 3069.4, "q1": 2295.7, "q3": 3673.8, "volume": 54728, "confidence": 0.808}, "maison": {"median": 2543.9, "mean": 2585.7, "q1": 1588.2, "q3": 3263.7, "volume": 46518, "confidence": 0.737}, "local": {"median": 1882.0, "mean": 2351.1, "q1": 1153.8, "q3": 2925.0, "volume": 3955, "confidence": 0.624}}, "32": {"tous": {"median": 1294.0, "mean": 1412.3, "q1": 873.2, "q3": 1804.6, "volume": 14858, "confidence": 0.712}, "appartement": {"median": 1445.3, "mean": 1473.4, "q1": 1125.0, "q3": 1779.2, "volume": 2273, "confidence": 0.819}, "maison": {"median": 1275.2, "mean": 1419.0, "q1": 854.2, "q3": 1830.1, "volume": 12124, "confidence": 0.694}, "local": {"median": 714.3, "mean": 935.8, "q1": 435.7, "q3": 1134.0, "volume": 461, "confidence": 0.609}}, "33": {"tous": {"median": 3351.1, "mean": 3543.5, "q1": 2126.3, "q3": 4464.3, "volume": 126310, "confidence": 0.721}, "appartement": {"median": 3776.3, "mean": 4001.3, "q1": 2944.4, "q3": 4736.1, "volume": 47692, "confidence": 0.81}, "maison": {"median": 2976.2, "mean": 3286.0, "q1": 1717.6, "q3": 4268.3, "volume": 74176, "confidence": 0.657}, "local": {"median": 2299.4, "mean": 2929.6, "q1": 1321.2, "q3": 3788.4, "volume": 4442, "confidence": 0.6}}, "34": {"tous": {"median": 2940.7, "mean": 3074.1, "q1": 1944.4, "q3": 3948.3, "volume": 118740, "confidence": 0.727}, "appartement": {"median": 3205.9, "mean": 3315.5, "q1": 2232.1, "q3": 4166.7, "volume": 63508, "confidence": 0.759}, "maison": {"median": 2698.8, "mean": 2857.4, "q1": 1751.3, "q3": 3705.1, "volume": 49976, "confidence": 0.71}, "local": {"median": 1902.8, "mean": 2216.6, "q1": 1145.0, "q3": 2875.4, "volume": 5256, "confidence": 0.636}}, "35": {"tous": {"median": 2562.5, "mean": 2768.9, "q1": 1688.2, "q3": 3550.0, "volume": 80001, "confidence": 0.709}, "appartement": {"median": 3238.7, "mean": 3415.0, "q1": 2500.0, "q3": 4156.2, "volume": 31218, "confidence": 0.795}, "maison": {"median": 2117.6, "mean": 2379.2, "q1": 1321.8, "q3": 2961.5, "volume": 45505, "confidence": 0.69}, "local": {"median": 1598.7, "mean": 2026.1, "q1": 914.3, "q3": 2661.3, "volume": 3278, "confidence": 0.6}}, "36": {"tous": {"median": 895.5, "mean": 1003.7, "q1": 589.3, "q3": 1289.5, "volume": 17022, "confidence": 0.687}, "appartement": {"median": 1026.4, "mean": 1059.4, "q1": 771.9, "q3": 1278.5, "volume": 1613, "confidence": 0.803}, "maison": {"median": 884.1, "mean": 999.5, "q1": 584.0, "q3": 1294.1, "volume": 14964, "confidence": 0.679}, "local": {"median": 598.5, "mean": 945.6, "q1": 370.4, "q3": 1031.2, "volume": 445, "confidence": 0.6}}, "37": {"tous": {"median": 2069.5, "mean": 2170.6, "q1": 1291.4, "q3": 2844.4, "volume": 45530, "confidence": 0.7}, "appartement": {"median": 2579.9, "mean": 2586.9, "q1": 1899.0, "q3": 3179.5, "volume": 14294, "confidence": 0.801}, "maison": {"median": 1822.9, "mean": 1996.7, "q1": 1117.6, "q3": 2628.6, "volume": 29305, "confidence": 0.668}, "local": {"median": 1241.7, "mean": 1727.5, "q1": 681.2, "q3": 2173.9, "volume": 1931, "confidence": 0.6}}, "38": {"tous": {"median": 2420.1, "mean": 2571.7, "q1": 1724.5, "q3": 3197.7, "volume": 93703, "confidence": 0.757}, "appartement": {"median": 2443.8, "mean": 2628.5, "q1": 1839.5, "q3": 3166.4, "volume": 49243, "confidence": 0.783}, "maison": {"median": 2500.0, "mean": 2612.3, "q1": 1700.4, "q3": 3309.9, "volume": 39048, "confidence": 0.742}, "local": {"median": 1408.5, "mean": 1762.5, "q1": 882.4, "q3": 2255.7, "volume": 5412, "confidence": 0.61}}, "39": {"tous": {"median": 1342.8, "mean": 1485.6, "q1": 864.7, "q3": 1902.7, "volume": 17911, "confidence": 0.691}, "appartement": {"median": 1327.4, "mean": 1487.3, "q1": 911.9, "q3": 1834.6, "volume": 5888, "confidence": 0.722}, "maison": {"median": 1400.0, "mean": 1516.9, "q1": 884.6, "q3": 1959.6, "volume": 11266, "confidence": 0.693}, "local": {"median": 719.3, "mean": 1006.4, "q1": 408.2, "q3": 1220.3, "volume": 757, "confidence": 0.6}}, "40": {"tous": {"median": 2359.1, "mean": 2836.0, "q1": 1553.8, "q3": 3565.2, "volume": 38674, "confidence": 0.659}, "appartement": {"median": 2851.9, "mean": 3379.5, "q1": 2083.3, "q3": 4266.7, "volume": 11301, "confidence": 0.694}, "maison": {"median": 2195.1, "mean": 2661.5, "q1": 1386.8, "q3": 3333.3, "volume": 25876, "confidence": 0.645}, "local": {"median": 1486.5, "mean": 1748.3, "q1": 803.1, "q3": 2410.0, "volume": 1497, "confidence": 0.6}}, "41": {"tous": {"median": 1250.0, "mean": 1360.6, "q1": 813.3, "q3": 1764.7, "volume": 25118, "confidence": 0.696}, "appartement": {"median": 1646.1, "mean": 1628.2, "q1": 1193.8, "q3": 2023.8, "volume": 3583, "confidence": 0.798}, "maison": {"median": 1194.4, "mean": 1320.2, "q1": 791.1, "q3": 1703.7, "volume": 20577, "confidence": 0.694}, "local": {"median": 844.8, "mean": 1228.8, "q1": 466.7, "q3": 1400.0, "volume": 958, "confidence": 0.6}}, "42": {"tous": {"median": 1333.3, "mean": 1509.5, "q1": 949.0, "q3": 1935.5, "volume": 55648, "confidence": 0.704}, "appartement": {"median": 1214.3, "mean": 1321.8, "q1": 925.0, "q3": 1592.1, "volume": 27539, "confidence": 0.78}, "maison": {"median": 1674.9, "mean": 1778.4, "q1": 1094.3, "q3": 2347.0, "volume": 25044, "confidence": 0.701}, "local": {"median": 795.5, "mean": 999.7, "q1": 483.9, "q3": 1220.3, "volume": 3065, "confidence": 0.63}}, "43": {"tous": {"median": 1159.1, "mean": 1282.4, "q1": 756.1, "q3": 1688.2, "volume": 15789, "confidence": 0.678}, "appartement": {"median": 1227.3, "mean": 1289.2, "q1": 898.9, "q3": 1636.4, "volume": 3591, "confidence": 0.76}, "maison": {"median": 1160.7, "mean": 1304.4, "q1": 750.0, "q3": 1740.0, "volume": 11553, "confidence": 0.659}, "local": {"median": 633.6, "mean": 850.0, "q1": 393.4, "q3": 1034.5, "volume": 645, "confidence": 0.6}}, "44": {"tous": {"median": 3030.3, "mean": 3188.0, "q1": 2086.5, "q3": 3962.3, "volume": 113530, "confidence": 0.752}, "appartement": {"median": 3475.7, "mean": 3679.6, "q1": 2693.9, "q3": 4328.4, "volume": 42694, "confidence": 0.812}, "maison": {"median": 2750.0, "mean": 2937.6, "q1": 1776.9, "q3": 3695.7, "volume": 66161, "confidence": 0.721}, "local": {"median": 1826.5, "mean": 2241.7, "q1": 1033.6, "q3": 2894.7, "volume": 4675, "confidence": 0.6}}, "45": {"tous": {"median": 1692.3, "mean": 1808.8, "q1": 1088.9, "q3": 2348.5, "volume": 48866, "confidence": 0.702}, "appartement": {"median": 2118.6, "mean": 2176.1, "q1": 1553.2, "q3": 2750.5, "volume": 13805, "confidence": 0.774}, "maison": {"median": 1524.3, "mean": 1654.4, "q1": 983.5, "q3": 2157.6, "volume": 33253, "confidence": 0.692}, "local": {"median": 1219.8, "mean": 1844.8, "q1": 648.0, "q3": 2164.3, "volume": 1808, "confidence": 0.6}}, "46": {"tous": {"median": 1234.5, "mean": 1361.3, "q1": 844.7, "q3": 1740.0, "volume": 14418, "confidence": 0.71}, "appartement": {"median": 1333.3, "mean": 1346.5, "q1": 997.1, "q3": 1666.7, "volume": 1923, "confidence": 0.799}, "maison": {"median": 1232.5, "mean": 1380.8, "q1": 846.4, "q3": 1775.7, "volume": 11956, "confidence": 0.698}, "local": {"median": 764.9, "mean": 980.6, "q1": 455.3, "q3": 1237.0, "volume": 539, "confidence": 0.6}}, "47": {"tous": {"median": 1196.1, "mean": 1322.2, "q1": 797.2, "q3": 1701.4, "volume": 27456, "confidence": 0.698}, "appartement": {"median": 1359.9, "mean": 1402.5, "q1": 1000.0, "q3": 1721.9, "volume": 4764, "confidence": 0.788}, "maison": {"median": 1166.7, "mean": 1299.4, "q1": 778.3, "q3": 1702.1, "volume": 21627, "confidence": 0.683}, "local": {"median": 784.3, "mean": 1425.4, "q1": 465.3, "q3": 1428.6, "volume": 1065, "confidence": 0.6}}, "48": {"tous": {"median": 1250.0, "mean": 1382.7, "q1": 810.0, "q3": 1820.4, "volume": 4885, "confidence": 0.677}, "appartement": {"median": 1195.1, "mean": 1270.7, "q1": 789.5, "q3": 1689.6, "volume": 1027, "confidence": 0.699}, "maison": {"median": 1301.4, "mean": 1437.7, "q1": 846.2, "q3": 1863.6, "volume": 3691, "confidence": 0.687}, "local": {"median": 646.0, "mean": 855.3, "q1": 388.7, "q3": 1058.8, "volume": 167, "confidence": 0.6}}, "49": {"tous": {"median": 1936.9, "mean": 2076.8, "q1": 1242.4, "q3": 2678.6, "volume": 56014, "confidence": 0.703}, "appartement": {"median": 2625.6, "mean": 2705.0, "q1": 1895.2, "q3": 3421.1, "volume": 14293, "confidence": 0.768}, "maison": {"median": 1761.4, "mean": 1866.7, "q1": 1136.4, "q3": 2398.6, "volume": 39175, "confidence": 0.713}, "local": {"median": 1139.1, "mean": 1782.6, "q1": 596.2, "q3": 2227.3, "volume": 2546, "confidence": 0.6}}, "50": {"tous": {"median": 1500.0, "mean": 1714.5, "q1": 896.9, "q3": 2234.5, "volume": 39558, "confidence": 0.643}, "appartement": {"median": 1964.3, "mean": 2239.8, "q1": 1431.6, "q3": 2769.2, "volume": 5510, "confidence": 0.728}, "maison": {"median": 1422.8, "mean": 1643.6, "q1": 852.0, "q3": 2162.8, "volume": 32803, "confidence": 0.631}, "local": {"median": 942.6, "mean": 1259.3, "q1": 535.7, "q3": 1597.2, "volume": 1245, "confidence": 0.6}}, "51": {"tous": {"median": 1933.3, "mean": 2047.2, "q1": 1304.3, "q3": 2625.0, "volume": 37107, "confidence": 0.727}, "appartement": {"median": 2184.6, "mean": 2285.0, "q1": 1590.9, "q3": 2869.0, "volume": 15037, "confidence": 0.766}, "maison": {"median": 1779.5, "mean": 1887.6, "q1": 1162.8, "q3": 2422.7, "volume": 20414, "confidence": 0.717}, "local": {"median": 1272.2, "mean": 1856.1, "q1": 722.2, "q3": 2352.9, "volume": 1656, "confidence": 0.6}}, "52": {"tous": {"median": 887.0, "mean": 968.8, "q1": 535.7, "q3": 1312.5, "volume": 10684, "confidence": 0.65}, "appartement": {"median": 1010.8, "mean": 1010.0, "q1": 763.6, "q3": 1236.6, "volume": 1364, "confidence": 0.813}, "maison": {"median": 860.2, "mean": 970.4, "q1": 517.7, "q3": 1343.8, "volume": 8989, "confidence": 0.616}, "local": {"median": 625.0, "mean": 757.2, "q1": 353.7, "q3": 1000.0, "volume": 331, "confidence": 0.6}}, "53": {"tous": {"median": 1296.3, "mean": 1376.1, "q1": 790.0, "q3": 1851.5, "volume": 23269, "confidence": 0.672}, "appartement": {"median": 1698.1, "mean": 1724.4, "q1": 1242.4, "q3": 2150.5, "volume": 3026, "confidence": 0.786}, "maison": {"median": 1243.1, "mean": 1330.3, "q1": 759.5, "q3": 1796.6, "volume": 19550, "confidence": 0.666}, "local": {"median": 800.0, "mean": 1149.0, "q1": 463.9, "q3": 1320.0, "volume": 693, "confidence": 0.6}}, "54": {"tous": {"median": 1733.8, "mean": 1784.4, "q1": 1145.8, "q3": 2314.3, "volume": 52852, "confidence": 0.73}, "appartement": {"median": 1803.6, "mean": 1829.3, "q1": 1258.8, "q3": 2327.5, "volume": 22396, "confidence": 0.763}, "maison": {"median": 1708.8, "mean": 1770.6, "q1": 1100.0, "q3": 2321.0, "volume": 28474, "confidence": 0.714}, "local": {"median": 1129.3, "mean": 1475.6, "q1": 650.0, "q3": 1842.1, "volume": 1982, "confidence": 0.6}}, "55": {"tous": {"median": 905.3, "mean": 1005.5, "q1": 576.9, "q3": 1333.3, "volume": 11660, "confidence": 0.666}, "appartement": {"median": 949.7, "mean": 989.6, "q1": 656.6, "q3": 1240.7, "volume": 1636, "confidence": 0.754}, "maison": {"median": 900.0, "mean": 1010.3, "q1": 568.4, "q3": 1355.6, "volume": 9736, "confidence": 0.65}, "local": {"median": 690.2, "mean": 931.1, "q1": 404.2, "q3": 1142.9, "volume": 288, "confidence": 0.6}}, "56": {"tous": {"median": 2264.4, "mean": 2529.1, "q1": 1300.0, "q3": 3375.0, "volume": 66390, "confidence": 0.633}, "appartement": {"median": 3012.3, "mean": 3213.2, "q1": 2152.5, "q3": 3981.5, "volume": 16777, "confidence": 0.757}, "maison": {"median": 2024.2, "mean": 2336.4, "q1": 1128.7, "q3": 3113.4, "volume": 46761, "confidence": 0.608}, "local": {"median": 1300.6, "mean": 1665.7, "q1": 733.3, "q3": 2200.0, "volume": 2852, "confidence": 0.6}}, "58": {"tous": {"median": 888.9, "mean": 1003.4, "q1": 578.9, "q3": 1277.8, "volume": 18420, "confidence": 0.686}, "appartement": {"median": 896.3, "mean": 923.1, "q1": 629.1, "q3": 1155.2, "volume": 2858, "confidence": 0.765}, "maison": {"median": 892.9, "mean": 1013.8, "q1": 579.7, "q3": 1318.0, "volume": 15040, "confidence": 0.669}, "local": {"median": 600.0, "mean": 1143.6, "q1": 359.0, "q3": 1060.6, "volume": 522, "confidence": 0.6}}, "59": {"tous": {"median": 1950.0, "mean": 2177.8, "q1": 1273.5, "q3": 2833.3, "volume": 168756, "confidence": 0.68}, "appartement": {"median": 2617.2, "mean": 2789.7, "q1": 1753.6, "q3": 3630.8, "volume": 44138, "confidence": 0.713}, "maison": {"median": 1788.1, "mean": 1962.1, "q1": 1184.2, "q3": 2531.9, "volume": 118578, "confidence": 0.699}, "local": {"median": 1305.8, "mean": 1940.1, "q1": 701.8, "q3": 2457.6, "volume": 6040, "confidence": 0.6}}, "60": {"tous": {"median": 2075.0, "mean": 2207.1, "q1": 1456.5, "q3": 2753.0, "volume": 52270, "confidence": 0.75}, "appartement": {"median": 2200.0, "mean": 2360.9, "q1": 1580.0, "q3": 2985.1, "volume": 12578, "confidence": 0.745}, "maison": {"median": 2058.8, "mean": 2162.1, "q1": 1437.5, "q3": 2691.5, "volume": 38044, "confidence": 0.756}, "local": {"median": 1475.4, "mean": 2070.4, "q1": 803.6, "q3": 2540.8, "volume": 1648, "confidence": 0.6}}, "61": {"tous": {"median": 1015.0, "mean": 1140.1, "q1": 655.0, "q3": 1470.0, "volume": 21482, "confidence": 0.679}, "appartement": {"median": 1155.6, "mean": 1250.9, "q1": 846.2, "q3": 1566.0, "volume": 2366, "confidence": 0.751}, "maison": {"median": 1000.0, "mean": 1136.1, "q1": 650.0, "q3": 1467.5, "volume": 18455, "confidence": 0.673}, "local": {"median": 587.8, "mean": 854.5, "q1": 357.1, "q3": 973.5, "volume": 661, "confidence": 0.6}}, "62": {"tous": {"median": 1569.3, "mean": 1862.4, "q1": 1098.9, "q3": 2125.2, "volume": 89090, "confidence": 0.738}, "appartement": {"median": 2074.1, "mean": 2842.0, "q1": 1410.7, "q3": 3333.3, "volume": 15170, "confidence": 0.629}, "maison": {"median": 1516.1, "mean": 1664.9, "q1": 1070.8, "q3": 2000.0, "volume": 71120, "confidence": 0.755}, "local": {"median": 978.6, "mean": 1569.8, "q1": 555.6, "q3": 1737.9, "volume": 2800, "confidence": 0.6}}, "63": {"tous": {"median": 1734.7, "mean": 1777.4, "q1": 1052.2, "q3": 2371.1, "volume": 48373, "confidence": 0.696}, "appartement": {"median": 2011.3, "mean": 2048.6, "q1": 1550.0, "q3": 2512.2, "volume": 17759, "confidence": 0.809}, "maison": {"median": 1484.5, "mean": 1639.8, "q1": 866.7, "q3": 2272.7, "volume": 28830, "confidence": 0.621}, "local": {"median": 1022.5, "mean": 1300.8, "q1": 591.1, "q3": 1696.4, "volume": 1784, "confidence": 0.6}}, "64": {"tous": {"median": 2444.4, "mean": 3210.6, "q1": 1574.1, "q3": 4230.8, "volume": 57491, "confidence": 0.6}, "appartement": {"median": 3068.0, "mean": 3726.5, "q1": 1942.9, "q3": 4863.9, "volume": 31645, "confidence": 0.619}, "maison": {"median": 2020.0, "mean": 2642.7, "q1": 1229.5, "q3": 3082.8, "volume": 22995, "confidence": 0.633}, "local": {"median": 1513.3, "mean": 2064.8, "q1": 819.0, "q3": 2546.5, "volume": 2851, "confidence": 0.6}}, "65": {"tous": {"median": 1495.3, "mean": 1704.3, "q1": 1036.6, "q3": 2122.0, "volume": 21988, "confidence": 0.71}, "appartement": {"median": 1550.0, "mean": 1860.7, "q1": 1111.1, "q3": 2450.0, "volume": 9970, "confidence": 0.654}, "maison": {"median": 1497.5, "mean": 1594.0, "q1": 1013.6, "q3": 2000.0, "volume": 11159, "confidence": 0.737}, "local": {"median": 763.6, "mean": 1322.8, "q1": 448.2, "q3": 1250.0, "volume": 859, "confidence": 0.6}}, "66": {"tous": {"median": 2168.7, "mean": 2349.5, "q1": 1421.9, "q3": 3019.9, "volume": 60224, "confidence": 0.705}, "appartement": {"median": 2073.2, "mean": 2341.5, "q1": 1358.8, "q3": 3095.2, "volume": 27223, "confidence": 0.665}, "maison": {"median": 2295.6, "mean": 2416.5, "q1": 1573.1, "q3": 3013.9, "volume": 30564, "confidence": 0.749}, "local": {"median": 1333.3, "mean": 1597.0, "q1": 791.7, "q3": 2076.9, "volume": 2437, "confidence": 0.614}}, "69": {"tous": {"median": 3651.7, "mean": 3743.9, "q1": 2547.6, "q3": 4733.3, "volume": 123788, "confidence": 0.761}, "appartement": {"median": 3846.2, "mean": 3933.0, "q1": 2839.2, "q3": 4889.7, "volume": 84201, "confidence": 0.787}, "maison": {"median": 3333.3, "mean": 3488.7, "q1": 2154.8, "q3": 4436.6, "volume": 32288, "confidence": 0.726}, "local": {"median": 2077.5, "mean": 2692.4, "q1": 1210.7, "q3": 3481.2, "volume": 7299, "confidence": 0.6}}, "70": {"tous": {"median": 972.8, "mean": 1083.9, "q1": 634.9, "q3": 1401.6, "volume": 14482, "confidence": 0.685}, "appartement": {"median": 950.0, "mean": 989.4, "q1": 678.6, "q3": 1225.8, "volume": 2695, "confidence": 0.77}, "maison": {"median": 1000.0, "mean": 1119.1, "q1": 645.2, "q3": 1478.8, "volume": 11312, "confidence": 0.667}, "local": {"median": 615.4, "mean": 782.9, "q1": 382.7, "q3": 978.6, "volume": 475, "confidence": 0.613}}, "71": {"tous": {"median": 1180.6, "mean": 1316.5, "q1": 783.1, "q3": 1707.3, "volume": 40251, "confidence": 0.687}, "appartement": {"median": 1191.2, "mean": 1286.3, "q1": 857.1, "q3": 1616.6, "volume": 10310, "confidence": 0.745}, "maison": {"median": 1196.6, "mean": 1339.9, "q1": 781.2, "q3": 1763.7, "volume": 28356, "confidence": 0.672}, "local": {"median": 737.2, "mean": 1094.2, "q1": 434.8, "q3": 1266.7, "volume": 1585, "confidence": 0.6}}, "72": {"tous": {"median": 1366.3, "mean": 1443.3, "q1": 864.3, "q3": 1916.7, "volume": 42319, "confidence": 0.692}, "appartement": {"median": 1671.5, "mean": 1739.4, "q1": 1259.3, "q3": 2131.1, "volume": 7772, "confidence": 0.791}, "maison": {"median": 1290.9, "mean": 1386.7, "q1": 809.5, "q3": 1860.5, "volume": 33115, "confidence": 0.674}, "local": {"median": 862.9, "mean": 1145.1, "q1": 488.4, "q3": 1396.6, "volume": 1432, "confidence": 0.6}}, "73": {"tous": {"median": 3159.2, "mean": 3778.2, "q1": 2142.9, "q3": 4500.0, "volume": 47162, "confidence": 0.702}, "appartement": {"median": 3399.9, "mean": 4094.0, "q1": 2414.9, "q3": 4778.6, "volume": 31057, "confidence": 0.722}, "maison": {"median": 2745.0, "mean": 3178.9, "q1": 1787.1, "q3": 3951.9, "volume": 13000, "confidence": 0.685}, "local": {"median": 2138.9, "mean": 3127.9, "q1": 1156.6, "q3": 4027.8, "volume": 3105, "confidence": 0.6}}, "74": {"tous": {"median": 4000.0, "mean": 4407.2, "q1": 2979.6, "q3": 5240.8, "volume": 77027, "confidence": 0.774}, "appartement": {"median": 4023.8, "mean": 4411.1, "q1": 3080.4, "q3": 5227.3, "volume": 51353, "confidence": 0.787}, "maison": {"median": 4137.0, "mean": 4624.3, "q1": 2981.5, "q3": 5429.3, "volume": 22456, "confidence": 0.763}, "local": {"median": 2307.7, "mean": 2830.7, "q1": 1351.7, "q3": 3571.4, "volume": 3218, "confidence": 0.615}}, "75": {"tous": {"median": 10284.5, "mean": 10351.6, "q1": 8541.6, "q3": 12083.3, "volume": 159360, "confidence": 0.862}, "appartement": {"median": 10362.8, "mean": 10477.6, "q1": 8709.5, "q3": 12106.8, "volume": 147088, "confidence": 0.869}, "maison": {"median": 13457.8, "mean": 13777.4, "q1": 10400.0, "q3": 17260.3, "volume": 652, "confidence": 0.796}, "local": {"median": 8255.6, "mean": 8564.9, "q1": 5411.0, "q3": 11215.0, "volume": 11620, "confidence": 0.719}}, "76": {"tous": {"median": 1900.0, "mean": 2007.4, "q1": 1269.8, "q3": 2555.3, "volume": 90674, "confidence": 0.729}, "appartement": {"median": 2153.8, "mean": 2257.1, "q1": 1534.0, "q3": 2864.8, "volume": 32914, "confidence": 0.753}, "maison": {"median": 1781.2, "mean": 1870.3, "q1": 1160.7, "q3": 2376.5, "volume": 54432, "confidence": 0.727}, "local": {"median": 1285.2, "mean": 1779.6, "q1": 732.9, "q3": 2232.0, "volume": 3328, "confidence": 0.6}}, "77": {"tous": {"median": 2867.2, "mean": 2913.1, "q1": 1954.5, "q3": 3692.3, "volume": 91321, "confidence": 0.758}, "appartement": {"median": 3228.3, "mean": 3259.3, "q1": 2473.4, "q3": 3979.6, "volume": 33952, "confidence": 0.813}, "maison": {"median": 2641.8, "mean": 2725.9, "q1": 1731.6, "q3": 3485.7, "volume": 53910, "confidence": 0.734}, "local": {"median": 1800.0, "mean": 2431.2, "q1": 1083.3, "q3": 2987.0, "volume": 3459, "confidence": 0.6}}, "78": {"tous": {"median": 3790.0, "mean": 4155.8, "q1": 2783.7, "q3": 5081.6, "volume": 88078, "confidence": 0.757}, "appartement": {"median": 3966.7, "mean": 4284.9, "q1": 3000.0, "q3": 5204.1, "volume": 45616, "confidence": 0.778}, "maison": {"median": 3636.4, "mean": 4091.1, "q1": 2607.4, "q3": 4983.3, "volume": 39315, "confidence": 0.739}, "local": {"median": 2397.3, "mean": 3094.8, "q1": 1363.6, "q3": 4038.5, "volume": 3147, "confidence": 0.6}}, "79": {"tous": {"median": 1167.0, "mean": 1279.0, "q1": 750.0, "q3": 1700.0, "volume": 31041, "confidence": 0.674}, "appartement": {"median": 1604.7, "mean": 1644.8, "q1": 1149.1, "q3": 2055.6, "volume": 2840, "confidence": 0.774}, "maison": {"median": 1134.2, "mean": 1246.2, "q1": 729.9, "q3": 1658.3, "volume": 27200, "confidence": 0.673}, "local": {"median": 833.3, "mean": 1132.6, "q1": 458.3, "q3": 1428.6, "volume": 1001, "confidence": 0.6}}, "80": {"tous": {"median": 1600.0, "mean": 1784.1, "q1": 1029.0, "q3": 2307.7, "volume": 38817, "confidence": 0.68}, "appartement": {"median": 2326.5, "mean": 2454.1, "q1": 1698.7, "q3": 2996.2, "volume": 7619, "confidence": 0.777}, "maison": {"median": 1461.3, "mean": 1621.5, "q1": 952.8, "q3": 2083.3, "volume": 30105, "confidence": 0.691}, "local": {"median": 1191.4, "mean": 1592.6, "q1": 660.0, "q3": 2000.0, "volume": 1093, "confidence": 0.6}}, "81": {"tous": {"median": 1362.5, "mean": 1491.6, "q1": 906.0, "q3": 1961.5, "volume": 26542, "confidence": 0.69}, "appartement": {"median": 1710.9, "mean": 1748.7, "q1": 1203.7, "q3": 2187.5, "volume": 4300, "confidence": 0.77}, "maison": {"median": 1306.9, "mean": 1454.8, "q1": 888.8, "q3": 1910.7, "volume": 21376, "confidence": 0.687}, "local": {"median": 750.0, "mean": 1124.6, "q1": 436.0, "q3": 1352.9, "volume": 866, "confidence": 0.6}}, "82": {"tous": {"median": 1460.7, "mean": 1541.5, "q1": 975.6, "q3": 2002.5, "volume": 18714, "confidence": 0.719}, "appartement": {"median": 1636.4, "mean": 1647.4, "q1": 1303.6, "q3": 2000.0, "volume": 3733, "confidence": 0.83}, "maison": {"median": 1409.5, "mean": 1528.2, "q1": 933.8, "q3": 2019.6, "volume": 14403, "confidence": 0.692}, "local": {"median": 938.6, "mean": 1189.5, "q1": 590.3, "q3": 1377.8, "volume": 578, "confidence": 0.664}}, "83": {"tous": {"median": 3352.3, "mean": 3779.0, "q1": 2185.2, "q3": 4816.7, "volume": 126979, "confidence": 0.686}, "appartement": {"median": 3529.4, "mean": 3905.7, "q1": 2411.5, "q3": 4991.4, "volume": 73026, "confidence": 0.708}, "maison": {"median": 3214.3, "mean": 3710.1, "q1": 1940.3, "q3": 4675.9, "volume": 49080, "confidence": 0.66}, "local": {"median": 2060.6, "mean": 2575.2, "q1": 1304.3, "q3": 3148.1, "volume": 4873, "confidence": 0.642}}, "84": {"tous": {"median": 2340.4, "mean": 2515.8, "q1": 1644.1, "q3": 3088.2, "volume": 41739, "confidence": 0.753}, "appartement": {"median": 2093.2, "mean": 2197.7, "q1": 1481.3, "q3": 2750.0, "volume": 14836, "confidence": 0.758}, "maison": {"median": 2551.3, "mean": 2760.6, "q1": 1857.1, "q3": 3319.0, "volume": 25053, "confidence": 0.771}, "local": {"median": 1360.6, "mean": 1751.8, "q1": 830.2, "q3": 2193.2, "volume": 1850, "confidence": 0.6}}, "85": {"tous": {"median": 2200.0, "mean": 2456.4, "q1": 1372.5, "q3": 3159.1, "volume": 66695, "confidence": 0.675}, "appartement": {"median": 3030.3, "mean": 3325.6, "q1": 2200.0, "q3": 4170.6, "volume": 11325, "confidence": 0.74}, "maison": {"median": 2080.6, "mean": 2306.9, "q1": 1295.3, "q3": 2945.2, "volume": 52440, "confidence": 0.683}, "local": {"median": 1333.1, "mean": 1773.0, "q1": 703.7, "q3": 2252.5, "volume": 2930, "confidence": 0.6}}, "86": {"tous": {"median": 1309.5, "mean": 1447.7, "q1": 806.5, "q3": 1930.6, "volume": 33289, "confidence": 0.657}, "appartement": {"median": 1963.4, "mean": 1992.8, "q1": 1472.0, "q3": 2496.3, "volume": 6618, "confidence": 0.791}, "maison": {"median": 1164.3, "mean": 1296.7, "q1": 750.0, "q3": 1750.0, "volume": 25387, "confidence": 0.656}, "local": {"median": 964.9, "mean": 1623.3, "q1": 509.5, "q3": 1642.9, "volume": 1284, "confidence": 0.6}}, "87": {"tous": {"median": 1250.0, "mean": 1331.7, "q1": 801.4, "q3": 1759.6, "volume": 28742, "confidence": 0.693}, "appartement": {"median": 1516.1, "mean": 1546.7, "q1": 1182.1, "q3": 1869.6, "volume": 8255, "confidence": 0.819}, "maison": {"median": 1088.5, "mean": 1250.9, "q1": 705.7, "q3": 1687.5, "volume": 19529, "confidence": 0.639}, "local": {"median": 933.3, "mean": 1126.0, "q1": 529.2, "q3": 1503.0, "volume": 958, "confidence": 0.6}}, "88": {"tous": {"median": 1071.0, "mean": 1240.1, "q1": 689.7, "q3": 1580.5, "volume": 25803, "confidence": 0.667}, "appartement": {"median": 1094.8, "mean": 1286.6, "q1": 743.4, "q3": 1568.3, "volume": 8141, "confidence": 0.699}, "maison": {"median": 1075.7, "mean": 1231.5, "q1": 687.5, "q3": 1603.3, "volume": 16612, "confidence": 0.659}, "local": {"median": 750.0, "mean": 1015.5, "q1": 444.4, "q3": 1201.9, "volume": 1050, "confidence": 0.6}}, "89": {"tous": {"median": 1197.9, "mean": 1291.7, "q1": 783.1, "q3": 1671.2, "volume": 26732, "confidence": 0.703}, "appartement": {"median": 1354.2, "mean": 1378.0, "q1": 994.7, "q3": 1723.9, "volume": 4285, "confidence": 0.785}, "maison": {"median": 1172.2, "mean": 1271.4, "q1": 763.6, "q3": 1665.4, "volume": 21553, "confidence": 0.692}, "local": {"median": 849.5, "mean": 1365.5, "q1": 506.8, "q3": 1448.3, "volume": 894, "confidence": 0.6}}, "90": {"tous": {"median": 1333.3, "mean": 1399.7, "q1": 962.3, "q3": 1776.2, "volume": 8960, "confidence": 0.756}, "appartement": {"median": 1214.3, "mean": 1263.9, "q1": 913.6, "q3": 1554.1, "volume": 4715, "confidence": 0.789}, "maison": {"median": 1602.7, "mean": 1605.5, "q1": 1114.8, "q3": 2026.7, "volume": 3850, "confidence": 0.772}, "local": {"median": 824.4, "mean": 1014.8, "q1": 500.0, "q3": 1272.7, "volume": 395, "confidence": 0.625}}, "91": {"tous": {"median": 3165.6, "mean": 3282.5, "q1": 2427.2, "q3": 3921.6, "volume": 77368, "confidence": 0.811}, "appartement": {"median": 2983.3, "mean": 3097.1, "q1": 2347.2, "q3": 3712.1, "volume": 36975, "confidence": 0.817}, "maison": {"median": 3378.4, "mean": 3503.6, "q1": 2674.4, "q3": 4147.5, "volume": 37594, "confidence": 0.826}, "local": {"median": 2027.0, "mean": 2760.2, "q1": 1205.7, "q3": 3333.3, "volume": 2799, "confidence": 0.6}}, "92": {"tous": {"median": 6862.9, "mean": 7100.3, "q1": 5420.9, "q3": 8500.0, "volume": 97813, "confidence": 0.821}, "appartement": {"median": 6823.3, "mean": 7038.5, "q1": 5448.7, "q3": 8375.0, "volume": 82181, "confidence": 0.828}, "maison": {"median": 7822.6, "mean": 8225.7, "q1": 6134.5, "q3": 9820.0, "volume": 11649, "confidence": 0.812}, "local": {"median": 4756.1, "mean": 5083.4, "q1": 2744.2, "q3": 6625.0, "volume": 3983, "confidence": 0.674}}, "93": {"tous": {"median": 3913.0, "mean": 4302.8, "q1": 2947.8, "q3": 5294.1, "volume": 70849, "confidence": 0.76}, "appartement": {"median": 3911.3, "mean": 4287.9, "q1": 2892.9, "q3": 5409.1, "volume": 46907, "confidence": 0.743}, "maison": {"median": 4000.0, "mean": 4430.4, "q1": 3181.8, "q3": 5147.1, "volume": 21420, "confidence": 0.803}, "local": {"median": 2717.4, "mean": 3496.7, "q1": 1625.0, "q3": 4359.0, "volume": 2522, "confidence": 0.6}}, "94": {"tous": {"median": 4900.0, "mean": 5305.3, "q1": 3653.8, "q3": 6466.7, "volume": 76197, "confidence": 0.77}, "appartement": {"median": 4880.9, "mean": 5241.3, "q1": 3658.5, "q3": 6363.6, "volume": 55606, "confidence": 0.778}, "maison": {"median": 5125.0, "mean": 5703.1, "q1": 3854.2, "q3": 7000.0, "volume": 17890, "confidence": 0.754}, "local": {"median": 3378.4, "mean": 3987.9, "q1": 2000.0, "q3": 5294.1, "volume": 2701, "confidence": 0.61}}, "95": {"tous": {"median": 3400.0, "mean": 3530.8, "q1": 2687.4, "q3": 4184.6, "volume": 68727, "confidence": 0.824}, "appartement": {"median": 3226.4, "mean": 3324.0, "q1": 2600.0, "q3": 3942.3, "volume": 33240, "confidence": 0.834}, "maison": {"median": 3611.1, "mean": 3782.1, "q1": 2905.6, "q3": 4447.8, "volume": 33220, "confidence": 0.829}, "local": {"median": 2101.6, "mean": 2881.0, "q1": 1174.7, "q3": 3500.0, "volume": 2267, "confidence": 0.6}}, "971": {"tous": {"median": 2826.1, "mean": 3033.2, "q1": 1743.6, "q3": 3912.1, "volume": 10721, "confidence": 0.693}, "appartement": {"median": 2951.7, "mean": 3120.8, "q1": 2000.0, "q3": 3960.4, "volume": 4988, "confidence": 0.734}, "maison": {"median": 2771.3, "mean": 3011.1, "q1": 1602.2, "q3": 3921.1, "volume": 4326, "confidence": 0.665}, "local": {"median": 2470.0, "mean": 2791.0, "q1": 1395.3, "q3": 3691.3, "volume": 1407, "confidence": 0.628}}, "972": {"tous": {"median": 2500.0, "mean": 2682.8, "q1": 1600.0, "q3": 3460.0, "volume": 9912, "confidence": 0.702}, "appartement": {"median": 2666.7, "mean": 2846.0, "q1": 1966.3, "q3": 3476.9, "volume": 4813, "confidence": 0.773}, "maison": {"median": 2348.7, "mean": 2604.3, "q1": 1322.4, "q3": 3584.4, "volume": 4142, "confidence": 0.615}, "local": {"median": 1925.9, "mean": 2201.6, "q1": 1198.0, "q3": 2872.4, "volume": 957, "confidence": 0.652}}, "973": {"tous": {"median": 2469.1, "mean": 2500.8, "q1": 1760.0, "q3": 3078.9, "volume": 4002, "confidence": 0.786}, "appartement": {"median": 2576.8, "mean": 2603.9, "q1": 2112.7, "q3": 3111.1, "volume": 1586, "confidence": 0.845}, "maison": {"median": 2401.3, "mean": 2478.9, "q1": 1619.8, "q3": 3088.2, "volume": 2260, "confidence": 0.755}, "local": {"median": 1382.9, "mean": 1769.8, "q1": 882.4, "q3": 2036.4, "volume": 156, "confidence": 0.666}}, "974": {"tous": {"median": 2492.1, "mean": 2800.9, "q1": 1833.3, "q3": 3402.8, "volume": 28084, "confidence": 0.748}, "appartement": {"median": 2473.7, "mean": 2781.1, "q1": 1966.7, "q3": 3214.3, "volume": 12306, "confidence": 0.798}, "maison": {"median": 2521.7, "mean": 2829.5, "q1": 1666.7, "q3": 3571.4, "volume": 14873, "confidence": 0.698}, "local": {"median": 2162.2, "mean": 2600.0, "q1": 1386.4, "q3": 3164.6, "volume": 905, "confidence": 0.671}}}
|
data/aggregated/prices_postcode.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/aggregated/prices_region.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"01": {"tous": {"median": 2826.1, "mean": 3033.2, "q1": 1743.6, "q3": 3912.1, "volume": 10721, "confidence": 0.693}, "appartement": {"median": 2951.7, "mean": 3120.8, "q1": 2000.0, "q3": 3960.4, "volume": 4988, "confidence": 0.734}, "maison": {"median": 2771.3, "mean": 3011.1, "q1": 1602.2, "q3": 3921.1, "volume": 4326, "confidence": 0.665}, "local": {"median": 2470.0, "mean": 2791.0, "q1": 1395.3, "q3": 3691.3, "volume": 1407, "confidence": 0.628}}, "02": {"tous": {"median": 2500.0, "mean": 2682.8, "q1": 1600.0, "q3": 3460.0, "volume": 9912, "confidence": 0.702}, "appartement": {"median": 2666.7, "mean": 2846.0, "q1": 1966.3, "q3": 3476.9, "volume": 4813, "confidence": 0.773}, "maison": {"median": 2348.7, "mean": 2604.3, "q1": 1322.4, "q3": 3584.4, "volume": 4142, "confidence": 0.615}, "local": {"median": 1925.9, "mean": 2201.6, "q1": 1198.0, "q3": 2872.4, "volume": 957, "confidence": 0.652}}, "03": {"tous": {"median": 2469.1, "mean": 2500.8, "q1": 1760.0, "q3": 3078.9, "volume": 4002, "confidence": 0.786}, "appartement": {"median": 2576.8, "mean": 2603.9, "q1": 2112.7, "q3": 3111.1, "volume": 1586, "confidence": 0.845}, "maison": {"median": 2401.3, "mean": 2478.9, "q1": 1619.8, "q3": 3088.2, "volume": 2260, "confidence": 0.755}, "local": {"median": 1382.9, "mean": 1769.8, "q1": 882.4, "q3": 2036.4, "volume": 156, "confidence": 0.666}}, "04": {"tous": {"median": 2492.1, "mean": 2800.9, "q1": 1833.3, "q3": 3402.8, "volume": 28084, "confidence": 0.748}, "appartement": {"median": 2473.7, "mean": 2781.1, "q1": 1966.7, "q3": 3214.3, "volume": 12306, "confidence": 0.798}, "maison": {"median": 2521.7, "mean": 2829.5, "q1": 1666.7, "q3": 3571.4, "volume": 14873, "confidence": 0.698}, "local": {"median": 2162.2, "mean": 2600.0, "q1": 1386.4, "q3": 3164.6, "volume": 905, "confidence": 0.671}}, "11": {"tous": {"median": 4545.5, "mean": 5730.9, "q1": 3077.3, "q3": 7896.2, "volume": 729713, "confidence": 0.6}, "appartement": {"median": 5606.1, "mean": 6527.2, "q1": 3481.5, "q3": 9145.5, "volume": 481565, "confidence": 0.6}, "maison": {"median": 3529.4, "mean": 4019.9, "q1": 2593.4, "q3": 4755.6, "volume": 215650, "confidence": 0.755}, "local": {"median": 3975.0, "mean": 5285.5, "q1": 1916.7, "q3": 7640.4, "volume": 32498, "confidence": 0.6}}, "24": {"tous": {"median": 1489.4, "mean": 1667.3, "q1": 916.7, "q3": 2209.6, "volume": 192214, "confidence": 0.653}, "appartement": {"median": 2045.5, "mean": 2126.8, "q1": 1428.6, "q3": 2750.0, "volume": 43364, "confidence": 0.742}, "maison": {"median": 1344.8, "mean": 1533.8, "q1": 845.1, "q3": 2015.4, "volume": 142096, "confidence": 0.652}, "local": {"median": 1000.0, "mean": 1524.8, "q1": 545.5, "q3": 1829.8, "volume": 6754, "confidence": 0.6}}, "27": {"tous": {"median": 1392.9, "mean": 1578.7, "q1": 869.0, "q3": 2099.0, "volume": 208712, "confidence": 0.647}, "appartement": {"median": 1659.7, "mean": 1797.3, "q1": 1062.5, "q3": 2411.8, "volume": 71719, "confidence": 0.675}, "maison": {"median": 1290.3, "mean": 1471.6, "q1": 807.7, "q3": 1935.5, "volume": 128640, "confidence": 0.65}, "local": {"median": 933.3, "mean": 1351.1, "q1": 515.4, "q3": 1637.9, "volume": 8353, "confidence": 0.6}}, "28": {"tous": {"median": 1768.4, "mean": 1984.7, "q1": 1094.1, "q3": 2543.3, "volume": 258639, "confidence": 0.672}, "appartement": {"median": 2305.6, "mean": 2561.8, "q1": 1612.9, "q3": 3192.4, "volume": 72471, "confidence": 0.726}, "maison": {"median": 1574.5, "mean": 1761.8, "q1": 982.1, "q3": 2288.6, "volume": 177132, "confidence": 0.668}, "local": {"median": 1215.5, "mean": 1726.8, "q1": 654.2, "q3": 2217.0, "volume": 9036, "confidence": 0.6}}, "32": {"tous": {"median": 1735.8, "mean": 1984.7, "q1": 1145.8, "q3": 2500.0, "volume": 382078, "confidence": 0.688}, "appartement": {"median": 2333.3, "mean": 2634.0, "q1": 1557.4, "q3": 3362.1, "volume": 83415, "confidence": 0.691}, "maison": {"median": 1625.0, "mean": 1802.6, "q1": 1082.6, "q3": 2285.7, "volume": 285981, "confidence": 0.704}, "local": {"median": 1190.5, "mean": 1820.0, "q1": 642.2, "q3": 2210.5, "volume": 12682, "confidence": 0.6}}, "44": {"tous": {"median": 1417.3, "mean": 1564.0, "q1": 884.6, "q3": 2071.4, "volume": 176812, "confidence": 0.665}, "appartement": {"median": 1631.1, "mean": 1763.5, "q1": 1101.7, "q3": 2286.3, "volume": 59150, "confidence": 0.709}, "maison": {"median": 1324.3, "mean": 1468.1, "q1": 809.3, "q3": 1961.9, "volume": 110836, "confidence": 0.652}, "local": {"median": 966.9, "mean": 1392.8, "q1": 550.8, "q3": 1649.5, "volume": 6826, "confidence": 0.6}}, "52": {"tous": {"median": 2160.0, "mean": 2435.8, "q1": 1319.6, "q3": 3219.2, "volume": 301827, "confidence": 0.648}, "appartement": {"median": 3005.7, "mean": 3187.4, "q1": 2125.0, "q3": 3958.3, "volume": 79110, "confidence": 0.756}, "maison": {"median": 1917.1, "mean": 2187.7, "q1": 1173.5, "q3": 2841.5, "volume": 210441, "confidence": 0.652}, "local": {"median": 1347.0, "mean": 1845.0, "q1": 699.3, "q3": 2381.0, "volume": 12276, "confidence": 0.6}}, "53": {"tous": {"median": 2033.0, "mean": 2283.4, "q1": 1261.7, "q3": 2948.7, "volume": 280003, "confidence": 0.668}, "appartement": {"median": 2614.4, "mean": 2856.4, "q1": 1868.3, "q3": 3613.6, "volume": 80626, "confidence": 0.733}, "maison": {"median": 1829.3, "mean": 2076.7, "q1": 1111.1, "q3": 2666.7, "volume": 187994, "confidence": 0.66}, "local": {"median": 1255.8, "mean": 1637.6, "q1": 712.5, "q3": 2142.9, "volume": 11383, "confidence": 0.6}}, "75": {"tous": {"median": 1853.8, "mean": 2415.6, "q1": 1072.1, "q3": 3248.8, "volume": 505650, "confidence": 0.6}, "appartement": {"median": 2859.1, "mean": 3258.9, "q1": 1756.7, "q3": 4290.3, "volume": 139810, "confidence": 0.646}, "maison": {"median": 1586.2, "mean": 2099.2, "q1": 936.6, "q3": 2652.9, "volume": 347220, "confidence": 0.6}, "local": {"median": 1343.4, "mean": 1984.4, "q1": 676.7, "q3": 2547.2, "volume": 18620, "confidence": 0.6}}, "76": {"tous": {"median": 2125.0, "mean": 2356.1, "q1": 1300.0, "q3": 3095.2, "volume": 523730, "confidence": 0.662}, "appartement": {"median": 2520.9, "mean": 2721.5, "q1": 1629.6, "q3": 3531.2, "volume": 213001, "confidence": 0.698}, "maison": {"median": 1900.0, "mean": 2129.7, "q1": 1166.7, "q3": 2804.6, "volume": 290513, "confidence": 0.655}, "local": {"median": 1353.2, "mean": 1760.7, "q1": 746.1, "q3": 2254.5, "volume": 20216, "confidence": 0.6}}, "84": {"tous": {"median": 2391.3, "mean": 2785.3, "q1": 1434.8, "q3": 3673.5, "volume": 606247, "confidence": 0.626}, "appartement": {"median": 2804.3, "mean": 3162.9, "q1": 1756.1, "q3": 4148.9, "volume": 310845, "confidence": 0.659}, "maison": {"median": 2086.6, "mean": 2429.4, "q1": 1238.7, "q3": 3126.3, "volume": 264552, "confidence": 0.638}, "local": {"median": 1414.4, "mean": 2033.1, "q1": 789.1, "q3": 2527.8, "volume": 30850, "confidence": 0.6}}, "93": {"tous": {"median": 3381.3, "mean": 3761.0, "q1": 2263.1, "q3": 4736.8, "volume": 493722, "confidence": 0.707}, "appartement": {"median": 3546.9, "mean": 3872.9, "q1": 2419.4, "q3": 4862.4, "volume": 314937, "confidence": 0.724}, "maison": {"median": 3220.2, "mean": 3693.3, "q1": 2140.2, "q3": 4588.2, "volume": 155197, "confidence": 0.696}, "local": {"median": 2155.5, "mean": 2711.4, "q1": 1305.6, "q3": 3366.7, "volume": 23588, "confidence": 0.618}}, "94": {"tous": {"median": 3166.7, "mean": 3486.4, "q1": 2169.2, "q3": 4375.0, "volume": 23090, "confidence": 0.721}, "appartement": {"median": 3200.1, "mean": 3410.3, "q1": 2333.3, "q3": 4317.2, "volume": 14176, "confidence": 0.752}, "maison": {"median": 3274.6, "mean": 3818.7, "q1": 2037.0, "q3": 4727.3, "volume": 7499, "confidence": 0.671}, "local": {"median": 2146.3, "mean": 2487.5, "q1": 1280.0, "q3": 3250.0, "volume": 1415, "confidence": 0.633}}}
|
data/aggregated/top_cities.json
ADDED
|
@@ -0,0 +1,281 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Bordeaux": {
|
| 3 |
+
"code": "33063",
|
| 4 |
+
"appartement": {
|
| 5 |
+
"median": 4439.2,
|
| 6 |
+
"mean": 4566.7,
|
| 7 |
+
"q1": 3653.8,
|
| 8 |
+
"q3": 5333.3,
|
| 9 |
+
"volume": 18352
|
| 10 |
+
},
|
| 11 |
+
"local": {
|
| 12 |
+
"median": 3487.2,
|
| 13 |
+
"mean": 4017.4,
|
| 14 |
+
"q1": 2343.5,
|
| 15 |
+
"q3": 4945.1,
|
| 16 |
+
"volume": 1218
|
| 17 |
+
},
|
| 18 |
+
"maison": {
|
| 19 |
+
"median": 5147.0,
|
| 20 |
+
"mean": 5317.0,
|
| 21 |
+
"q1": 4210.5,
|
| 22 |
+
"q3": 6153.8,
|
| 23 |
+
"volume": 6018
|
| 24 |
+
},
|
| 25 |
+
"tous": {
|
| 26 |
+
"median": 4566.4,
|
| 27 |
+
"mean": 4717.0,
|
| 28 |
+
"q1": 3697.8,
|
| 29 |
+
"q3": 5543.5,
|
| 30 |
+
"volume": 25588
|
| 31 |
+
}
|
| 32 |
+
},
|
| 33 |
+
"Lille": {
|
| 34 |
+
"code": "59350",
|
| 35 |
+
"appartement": {
|
| 36 |
+
"median": 3673.5,
|
| 37 |
+
"mean": 3774.2,
|
| 38 |
+
"q1": 2897.4,
|
| 39 |
+
"q3": 4517.9,
|
| 40 |
+
"volume": 14741
|
| 41 |
+
},
|
| 42 |
+
"local": {
|
| 43 |
+
"median": 2542.9,
|
| 44 |
+
"mean": 3167.6,
|
| 45 |
+
"q1": 1563.9,
|
| 46 |
+
"q3": 3812.5,
|
| 47 |
+
"volume": 1064
|
| 48 |
+
},
|
| 49 |
+
"maison": {
|
| 50 |
+
"median": 2785.8,
|
| 51 |
+
"mean": 2930.7,
|
| 52 |
+
"q1": 2172.4,
|
| 53 |
+
"q3": 3500.0,
|
| 54 |
+
"volume": 4911
|
| 55 |
+
},
|
| 56 |
+
"tous": {
|
| 57 |
+
"median": 3405.1,
|
| 58 |
+
"mean": 3543.1,
|
| 59 |
+
"q1": 2600.0,
|
| 60 |
+
"q3": 4312.5,
|
| 61 |
+
"volume": 20716
|
| 62 |
+
}
|
| 63 |
+
},
|
| 64 |
+
"Lyon": {
|
| 65 |
+
"code": "69123",
|
| 66 |
+
"appartement": {
|
| 67 |
+
"median": 4826.2,
|
| 68 |
+
"mean": 4830.9,
|
| 69 |
+
"q1": 3875.0,
|
| 70 |
+
"q3": 5711.9,
|
| 71 |
+
"volume": 35236
|
| 72 |
+
},
|
| 73 |
+
"local": {
|
| 74 |
+
"median": 3311.4,
|
| 75 |
+
"mean": 3753.1,
|
| 76 |
+
"q1": 2173.9,
|
| 77 |
+
"q3": 4703.4,
|
| 78 |
+
"volume": 2734
|
| 79 |
+
},
|
| 80 |
+
"maison": {
|
| 81 |
+
"median": 5990.0,
|
| 82 |
+
"mean": 6463.1,
|
| 83 |
+
"q1": 4662.2,
|
| 84 |
+
"q3": 7609.6,
|
| 85 |
+
"volume": 949
|
| 86 |
+
},
|
| 87 |
+
"tous": {
|
| 88 |
+
"median": 4777.8,
|
| 89 |
+
"mean": 4795.0,
|
| 90 |
+
"q1": 3756.5,
|
| 91 |
+
"q3": 5710.7,
|
| 92 |
+
"volume": 38919
|
| 93 |
+
}
|
| 94 |
+
},
|
| 95 |
+
"Marseille": {
|
| 96 |
+
"code": "13055",
|
| 97 |
+
"appartement": {
|
| 98 |
+
"median": 2935.5,
|
| 99 |
+
"mean": 3110.5,
|
| 100 |
+
"q1": 2089.6,
|
| 101 |
+
"q3": 3909.1,
|
| 102 |
+
"volume": 59629
|
| 103 |
+
},
|
| 104 |
+
"local": {
|
| 105 |
+
"median": 2062.5,
|
| 106 |
+
"mean": 2473.1,
|
| 107 |
+
"q1": 1279.1,
|
| 108 |
+
"q3": 3157.9,
|
| 109 |
+
"volume": 3731
|
| 110 |
+
},
|
| 111 |
+
"maison": {
|
| 112 |
+
"median": 4505.3,
|
| 113 |
+
"mean": 4990.0,
|
| 114 |
+
"q1": 3283.3,
|
| 115 |
+
"q3": 6014.7,
|
| 116 |
+
"volume": 7871
|
| 117 |
+
},
|
| 118 |
+
"tous": {
|
| 119 |
+
"median": 3023.8,
|
| 120 |
+
"mean": 3284.8,
|
| 121 |
+
"q1": 2111.1,
|
| 122 |
+
"q3": 4083.3,
|
| 123 |
+
"volume": 71231
|
| 124 |
+
}
|
| 125 |
+
},
|
| 126 |
+
"Montpellier": {
|
| 127 |
+
"code": "34172",
|
| 128 |
+
"appartement": {
|
| 129 |
+
"median": 3261.3,
|
| 130 |
+
"mean": 3280.2,
|
| 131 |
+
"q1": 2522.7,
|
| 132 |
+
"q3": 4000.0,
|
| 133 |
+
"volume": 22452
|
| 134 |
+
},
|
| 135 |
+
"local": {
|
| 136 |
+
"median": 2564.1,
|
| 137 |
+
"mean": 2766.4,
|
| 138 |
+
"q1": 1639.3,
|
| 139 |
+
"q3": 3466.8,
|
| 140 |
+
"volume": 1585
|
| 141 |
+
},
|
| 142 |
+
"maison": {
|
| 143 |
+
"median": 3819.5,
|
| 144 |
+
"mean": 4073.3,
|
| 145 |
+
"q1": 3070.7,
|
| 146 |
+
"q3": 4806.0,
|
| 147 |
+
"volume": 2510
|
| 148 |
+
},
|
| 149 |
+
"tous": {
|
| 150 |
+
"median": 3279.5,
|
| 151 |
+
"mean": 3324.5,
|
| 152 |
+
"q1": 2508.0,
|
| 153 |
+
"q3": 4047.6,
|
| 154 |
+
"volume": 26547
|
| 155 |
+
}
|
| 156 |
+
},
|
| 157 |
+
"Nantes": {
|
| 158 |
+
"code": "44109",
|
| 159 |
+
"appartement": {
|
| 160 |
+
"median": 3690.5,
|
| 161 |
+
"mean": 3724.2,
|
| 162 |
+
"q1": 3000.0,
|
| 163 |
+
"q3": 4393.4,
|
| 164 |
+
"volume": 21661
|
| 165 |
+
},
|
| 166 |
+
"local": {
|
| 167 |
+
"median": 2911.7,
|
| 168 |
+
"mean": 3252.4,
|
| 169 |
+
"q1": 1845.2,
|
| 170 |
+
"q3": 4227.3,
|
| 171 |
+
"volume": 1334
|
| 172 |
+
},
|
| 173 |
+
"maison": {
|
| 174 |
+
"median": 4433.6,
|
| 175 |
+
"mean": 4675.4,
|
| 176 |
+
"q1": 3636.4,
|
| 177 |
+
"q3": 5500.0,
|
| 178 |
+
"volume": 5544
|
| 179 |
+
},
|
| 180 |
+
"tous": {
|
| 181 |
+
"median": 3793.1,
|
| 182 |
+
"mean": 3886.9,
|
| 183 |
+
"q1": 3048.8,
|
| 184 |
+
"q3": 4575.8,
|
| 185 |
+
"volume": 28539
|
| 186 |
+
}
|
| 187 |
+
},
|
| 188 |
+
"Nice": {
|
| 189 |
+
"code": "06088",
|
| 190 |
+
"appartement": {
|
| 191 |
+
"median": 4444.4,
|
| 192 |
+
"mean": 4717.2,
|
| 193 |
+
"q1": 3478.3,
|
| 194 |
+
"q3": 5600.0,
|
| 195 |
+
"volume": 39206
|
| 196 |
+
},
|
| 197 |
+
"local": {
|
| 198 |
+
"median": 3214.3,
|
| 199 |
+
"mean": 3623.2,
|
| 200 |
+
"q1": 2054.8,
|
| 201 |
+
"q3": 4722.2,
|
| 202 |
+
"volume": 2412
|
| 203 |
+
},
|
| 204 |
+
"maison": {
|
| 205 |
+
"median": 5312.5,
|
| 206 |
+
"mean": 5688.8,
|
| 207 |
+
"q1": 3689.0,
|
| 208 |
+
"q3": 6964.6,
|
| 209 |
+
"volume": 1565
|
| 210 |
+
},
|
| 211 |
+
"tous": {
|
| 212 |
+
"median": 4416.7,
|
| 213 |
+
"mean": 4691.3,
|
| 214 |
+
"q1": 3409.1,
|
| 215 |
+
"q3": 5616.4,
|
| 216 |
+
"volume": 43183
|
| 217 |
+
}
|
| 218 |
+
},
|
| 219 |
+
"Paris": {
|
| 220 |
+
"code": "75056",
|
| 221 |
+
"appartement": {
|
| 222 |
+
"median": 10362.8,
|
| 223 |
+
"mean": 10477.6,
|
| 224 |
+
"q1": 8709.5,
|
| 225 |
+
"q3": 12106.8,
|
| 226 |
+
"volume": 147088
|
| 227 |
+
},
|
| 228 |
+
"local": {
|
| 229 |
+
"median": 8255.6,
|
| 230 |
+
"mean": 8564.9,
|
| 231 |
+
"q1": 5411.0,
|
| 232 |
+
"q3": 11215.0,
|
| 233 |
+
"volume": 11620
|
| 234 |
+
},
|
| 235 |
+
"maison": {
|
| 236 |
+
"median": 13457.8,
|
| 237 |
+
"mean": 13777.4,
|
| 238 |
+
"q1": 10400.0,
|
| 239 |
+
"q3": 17260.3,
|
| 240 |
+
"volume": 652
|
| 241 |
+
},
|
| 242 |
+
"tous": {
|
| 243 |
+
"median": 10284.5,
|
| 244 |
+
"mean": 10351.6,
|
| 245 |
+
"q1": 8541.6,
|
| 246 |
+
"q3": 12083.3,
|
| 247 |
+
"volume": 159360
|
| 248 |
+
}
|
| 249 |
+
},
|
| 250 |
+
"Toulouse": {
|
| 251 |
+
"code": "31555",
|
| 252 |
+
"appartement": {
|
| 253 |
+
"median": 3230.8,
|
| 254 |
+
"mean": 3420.9,
|
| 255 |
+
"q1": 2552.2,
|
| 256 |
+
"q3": 4124.3,
|
| 257 |
+
"volume": 35114
|
| 258 |
+
},
|
| 259 |
+
"local": {
|
| 260 |
+
"median": 2500.0,
|
| 261 |
+
"mean": 2990.2,
|
| 262 |
+
"q1": 1580.5,
|
| 263 |
+
"q3": 3766.8,
|
| 264 |
+
"volume": 1845
|
| 265 |
+
},
|
| 266 |
+
"maison": {
|
| 267 |
+
"median": 3750.0,
|
| 268 |
+
"mean": 4100.6,
|
| 269 |
+
"q1": 2881.3,
|
| 270 |
+
"q3": 4969.7,
|
| 271 |
+
"volume": 6119
|
| 272 |
+
},
|
| 273 |
+
"tous": {
|
| 274 |
+
"median": 3272.7,
|
| 275 |
+
"mean": 3499.0,
|
| 276 |
+
"q1": 2557.3,
|
| 277 |
+
"q3": 4224.7,
|
| 278 |
+
"volume": 43078
|
| 279 |
+
}
|
| 280 |
+
}
|
| 281 |
+
}
|
main.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def main():
|
| 2 |
+
print("Hello from realadvisor!")
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
if __name__ == "__main__":
|
| 6 |
+
main()
|
ml_challenge.txt
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MLE Challenge
|
| 2 |
+
Context
|
| 3 |
+
You are analysing residential property prices in France, you have at your disposal the public recent transactions on (https://www.data.gouv.fr/datasets/demandes-de-valeurs-foncieres/) to estimate the current market price as price per squared meter (€/m²)
|
| 4 |
+
Objective
|
| 5 |
+
You need to generate an interactive map visualization for the aggregated price data by :
|
| 6 |
+
Country
|
| 7 |
+
Region
|
| 8 |
+
Departament
|
| 9 |
+
Neighborhood
|
| 10 |
+
Postcode
|
| 11 |
+
Building plots
|
| 12 |
+
|
| 13 |
+
You need to procure yourself with open data from the french government for such geometries and take care of cleaning the data to make accurate aggregates.
|
| 14 |
+
You are asked to make the best estimation of the market price taking into account, transaction price volatility, transaction volume, data freshness and consistency. You can estimate a number for the price or an interval.
|
| 15 |
+
You can use this one as a reference https://explore.data.gouv.fr/fr/immobilier
|
| 16 |
+
|
| 17 |
+
You need to render an interactive map that shows the price aggregate with colors, as the level zooms, it needs to transition between aggregation levels. (Link to hosted app, optional but preferred)
|
| 18 |
+
If the volume of data is too important for the browser to support all you can subset it, but a solution for this will be appreciated.
|
| 19 |
+
Produce a list of market price per square meter by property type for the top 10 biggest cities.
|
| 20 |
+
Submit your processing code. (Link to github repo)
|
| 21 |
+
What you will be evaluated on
|
| 22 |
+
Is the colored map loading ?
|
| 23 |
+
Is the map usable and not laggy ?
|
| 24 |
+
Is the map refreshing the aggregation level on zoom ?
|
| 25 |
+
Are all 6 aggregation levels present ?
|
| 26 |
+
Country
|
| 27 |
+
Region
|
| 28 |
+
Departament
|
| 29 |
+
Neighborhood
|
| 30 |
+
Postcode
|
| 31 |
+
Building plots
|
| 32 |
+
Are the price estimates plausible ?
|
| 33 |
+
Is the data complete or was it subset ?
|
| 34 |
+
The processing code is clean, clear and reusable
|
| 35 |
+
The architecture is robust and logical
|
| 36 |
+
App is hosted and functional
|
notebooks/01_data_exploration.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
pyproject.toml
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "realadvisor-mle-challenge"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = "French property price analysis and interactive map visualization"
|
| 5 |
+
readme = "README.md"
|
| 6 |
+
requires-python = ">=3.11"
|
| 7 |
+
dependencies = [
|
| 8 |
+
"polars>=1.0.0",
|
| 9 |
+
"requests>=2.31.0",
|
| 10 |
+
"tqdm>=4.66.0",
|
| 11 |
+
"jupyter>=1.0.0",
|
| 12 |
+
"matplotlib>=3.8.0",
|
| 13 |
+
"seaborn>=0.13.0",
|
| 14 |
+
"geopandas>=0.14.0",
|
| 15 |
+
"folium>=0.15.0",
|
| 16 |
+
"lab>=8.8",
|
| 17 |
+
]
|
src/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""RealAdvisor MLE Challenge - French property price analysis pipeline."""
|
src/aggregator.py
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Aggregate property prices at different geographic levels.
|
| 3 |
+
|
| 4 |
+
Responsibility: Given cleaned transaction data, compute summary statistics
|
| 5 |
+
(median, volume, IQR, confidence) grouped by any geographic column.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import json
|
| 9 |
+
import logging
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
|
| 12 |
+
import polars as pl
|
| 13 |
+
|
| 14 |
+
from src.config import (
|
| 15 |
+
AGGREGATED_DIR,
|
| 16 |
+
AGGREGATION_LEVELS,
|
| 17 |
+
DEPT_TO_REGION,
|
| 18 |
+
NO_DVF_DEPARTMENTS,
|
| 19 |
+
REGION_NAMES,
|
| 20 |
+
TYPE_LOCAL_SHORT,
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
logger = logging.getLogger(__name__)
|
| 24 |
+
|
| 25 |
+
# Map each aggregation level to the column used for grouping
|
| 26 |
+
LEVEL_TO_COLUMN: dict[str, str] = {
|
| 27 |
+
"country": "_country", # synthetic constant column
|
| 28 |
+
"region": "code_region",
|
| 29 |
+
"department": "code_departement",
|
| 30 |
+
"commune": "code_commune",
|
| 31 |
+
"postcode": "code_postal",
|
| 32 |
+
"section": "code_section",
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def aggregate_level(
|
| 37 |
+
lf: pl.LazyFrame,
|
| 38 |
+
group_col: str,
|
| 39 |
+
*,
|
| 40 |
+
property_type: str | None = None,
|
| 41 |
+
) -> pl.DataFrame:
|
| 42 |
+
"""
|
| 43 |
+
Compute price statistics for one geographic level and property type.
|
| 44 |
+
|
| 45 |
+
Args:
|
| 46 |
+
lf: Cleaned transaction LazyFrame.
|
| 47 |
+
group_col: Column to group by (e.g. "code_departement").
|
| 48 |
+
property_type: Filter to this type_local value, or None for all.
|
| 49 |
+
|
| 50 |
+
Returns:
|
| 51 |
+
DataFrame with columns: code, median_price_m2, mean_price_m2,
|
| 52 |
+
q1, q3, volume, std_dev.
|
| 53 |
+
"""
|
| 54 |
+
filtered = lf
|
| 55 |
+
if property_type:
|
| 56 |
+
filtered = filtered.filter(pl.col("type_local") == property_type)
|
| 57 |
+
|
| 58 |
+
# For country level, add a constant column
|
| 59 |
+
if group_col == "_country":
|
| 60 |
+
filtered = filtered.with_columns(pl.lit("FR").alias("_country"))
|
| 61 |
+
|
| 62 |
+
return (
|
| 63 |
+
filtered.group_by(group_col)
|
| 64 |
+
.agg(
|
| 65 |
+
pl.col("prix_m2").median().alias("median_price_m2"),
|
| 66 |
+
pl.col("prix_m2").mean().alias("mean_price_m2"),
|
| 67 |
+
pl.col("prix_m2").quantile(0.25).alias("q1"),
|
| 68 |
+
pl.col("prix_m2").quantile(0.75).alias("q3"),
|
| 69 |
+
pl.col("prix_m2").std().alias("std_dev"),
|
| 70 |
+
pl.col("prix_m2").count().alias("volume"),
|
| 71 |
+
pl.col("year").cast(pl.Int32, strict=False).max().alias("latest_year"),
|
| 72 |
+
)
|
| 73 |
+
.rename({group_col: "code"})
|
| 74 |
+
.with_columns(
|
| 75 |
+
# IQR = Q3 - Q1
|
| 76 |
+
(pl.col("q3") - pl.col("q1")).alias("iqr"),
|
| 77 |
+
)
|
| 78 |
+
.sort("code")
|
| 79 |
+
.collect()
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def compute_confidence(df: pl.DataFrame) -> pl.DataFrame:
|
| 84 |
+
"""
|
| 85 |
+
Add a confidence score (0-1) based on volume and volatility.
|
| 86 |
+
|
| 87 |
+
Higher volume + lower relative IQR = higher confidence.
|
| 88 |
+
"""
|
| 89 |
+
return df.with_columns(
|
| 90 |
+
(
|
| 91 |
+
# Volume component: log-scaled, saturates around 100 transactions
|
| 92 |
+
pl.col("volume").cast(pl.Float64).log1p() / pl.lit(100.0).log1p()
|
| 93 |
+
).clip(0.0, 1.0).alias("conf_volume"),
|
| 94 |
+
(
|
| 95 |
+
# Stability component: 1 - (IQR / median), clamped
|
| 96 |
+
pl.when(pl.col("median_price_m2") > 0)
|
| 97 |
+
.then(
|
| 98 |
+
(1.0 - pl.col("iqr") / pl.col("median_price_m2")).clip(0.0, 1.0)
|
| 99 |
+
)
|
| 100 |
+
.otherwise(0.0)
|
| 101 |
+
).alias("conf_stability"),
|
| 102 |
+
).with_columns(
|
| 103 |
+
# Overall confidence = weighted average
|
| 104 |
+
(pl.col("conf_volume") * 0.6 + pl.col("conf_stability") * 0.4)
|
| 105 |
+
.round(3)
|
| 106 |
+
.alias("confidence")
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
def aggregate_all_types(
|
| 111 |
+
lf: pl.LazyFrame,
|
| 112 |
+
group_col: str,
|
| 113 |
+
) -> pl.DataFrame:
|
| 114 |
+
"""
|
| 115 |
+
Aggregate for all property types + combined "tous" for a given level.
|
| 116 |
+
|
| 117 |
+
Returns:
|
| 118 |
+
DataFrame with columns: code, type, median_price_m2, volume, confidence, ...
|
| 119 |
+
"""
|
| 120 |
+
results = []
|
| 121 |
+
|
| 122 |
+
# All types combined
|
| 123 |
+
df_all = aggregate_level(lf, group_col)
|
| 124 |
+
df_all = compute_confidence(df_all)
|
| 125 |
+
df_all = df_all.with_columns(pl.lit("tous").alias("type"))
|
| 126 |
+
results.append(df_all)
|
| 127 |
+
|
| 128 |
+
# Per property type
|
| 129 |
+
for full_name, short_name in TYPE_LOCAL_SHORT.items():
|
| 130 |
+
df_type = aggregate_level(lf, group_col, property_type=full_name)
|
| 131 |
+
df_type = compute_confidence(df_type)
|
| 132 |
+
df_type = df_type.with_columns(pl.lit(short_name).alias("type"))
|
| 133 |
+
results.append(df_type)
|
| 134 |
+
|
| 135 |
+
return pl.concat(results, how="vertical_relaxed")
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def aggregate_all_levels(lf: pl.LazyFrame) -> dict[str, pl.DataFrame]:
|
| 139 |
+
"""
|
| 140 |
+
Run aggregation for all 6 geographic levels.
|
| 141 |
+
|
| 142 |
+
Args:
|
| 143 |
+
lf: Cleaned LazyFrame.
|
| 144 |
+
|
| 145 |
+
Returns:
|
| 146 |
+
Dict mapping level name → aggregated DataFrame.
|
| 147 |
+
"""
|
| 148 |
+
results = {}
|
| 149 |
+
for level in AGGREGATION_LEVELS:
|
| 150 |
+
col = LEVEL_TO_COLUMN[level]
|
| 151 |
+
logger.info("Aggregating level: %s (group by %s)", level, col)
|
| 152 |
+
results[level] = aggregate_all_types(lf, col)
|
| 153 |
+
logger.info(
|
| 154 |
+
" → %d rows (%d unique codes)",
|
| 155 |
+
len(results[level]),
|
| 156 |
+
results[level]["code"].n_unique(),
|
| 157 |
+
)
|
| 158 |
+
return results
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
def export_json(aggregated: dict[str, pl.DataFrame], output_dir: Path | None = None) -> None:
|
| 162 |
+
"""
|
| 163 |
+
Export aggregated DataFrames to JSON files for the frontend.
|
| 164 |
+
|
| 165 |
+
Each level produces a JSON file structured as:
|
| 166 |
+
{ "code1": { "tous": {...}, "appartement": {...}, ... }, ... }
|
| 167 |
+
|
| 168 |
+
Args:
|
| 169 |
+
aggregated: Dict from aggregate_all_levels().
|
| 170 |
+
output_dir: Directory to write JSONs. Defaults to config.AGGREGATED_DIR.
|
| 171 |
+
"""
|
| 172 |
+
output_dir = output_dir or AGGREGATED_DIR
|
| 173 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 174 |
+
|
| 175 |
+
for level, df in aggregated.items():
|
| 176 |
+
data: dict = {}
|
| 177 |
+
for row in df.iter_rows(named=True):
|
| 178 |
+
code = str(row["code"])
|
| 179 |
+
ptype = row["type"]
|
| 180 |
+
if code not in data:
|
| 181 |
+
data[code] = {}
|
| 182 |
+
data[code][ptype] = {
|
| 183 |
+
"median": round(row["median_price_m2"] or 0, 1),
|
| 184 |
+
"mean": round(row["mean_price_m2"] or 0, 1),
|
| 185 |
+
"q1": round(row["q1"] or 0, 1),
|
| 186 |
+
"q3": round(row["q3"] or 0, 1),
|
| 187 |
+
"volume": row["volume"],
|
| 188 |
+
"confidence": row["confidence"],
|
| 189 |
+
}
|
| 190 |
+
|
| 191 |
+
path = output_dir / f"prices_{level}.json"
|
| 192 |
+
with open(path, "w") as f:
|
| 193 |
+
json.dump(data, f, ensure_ascii=False)
|
| 194 |
+
logger.info("Exported: %s (%d entries)", path.name, len(data))
|
src/cleaner.py
ADDED
|
@@ -0,0 +1,245 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Clean and filter raw DVF transaction data.
|
| 3 |
+
|
| 4 |
+
Responsibility: Load raw CSVs, apply quality filters, deduplicate
|
| 5 |
+
multi-row mutations, and produce a clean dataset ready for price calculation.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import logging
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
|
| 11 |
+
import polars as pl
|
| 12 |
+
|
| 13 |
+
from src.config import (
|
| 14 |
+
ARRONDISSEMENT_MAPPING,
|
| 15 |
+
DVF_COLUMNS,
|
| 16 |
+
PRICE_M2_MAX,
|
| 17 |
+
PRICE_M2_MIN,
|
| 18 |
+
REFERENCE_DATE,
|
| 19 |
+
SURFACE_MAX,
|
| 20 |
+
SURFACE_MIN,
|
| 21 |
+
TEMPORAL_LAMBDA,
|
| 22 |
+
VALID_NATURE_MUTATION,
|
| 23 |
+
VALID_TYPE_LOCAL,
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
logger = logging.getLogger(__name__)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def load_raw_csv(path: Path) -> pl.LazyFrame:
|
| 30 |
+
"""
|
| 31 |
+
Load a single raw DVF CSV into a Polars LazyFrame.
|
| 32 |
+
|
| 33 |
+
Args:
|
| 34 |
+
path: Path to the CSV file (decompressed).
|
| 35 |
+
|
| 36 |
+
Returns:
|
| 37 |
+
LazyFrame with only the columns we need.
|
| 38 |
+
"""
|
| 39 |
+
# Force string types for geographic codes (Corsica has 2A/2B prefixes)
|
| 40 |
+
schema_overrides = {
|
| 41 |
+
"id_mutation": pl.Utf8,
|
| 42 |
+
"code_postal": pl.Utf8,
|
| 43 |
+
"code_commune": pl.Utf8,
|
| 44 |
+
"code_departement": pl.Utf8,
|
| 45 |
+
"id_parcelle": pl.Utf8,
|
| 46 |
+
"code_type_local": pl.Utf8,
|
| 47 |
+
"date_mutation": pl.Utf8,
|
| 48 |
+
"nom_commune": pl.Utf8,
|
| 49 |
+
"nature_mutation": pl.Utf8,
|
| 50 |
+
"type_local": pl.Utf8,
|
| 51 |
+
}
|
| 52 |
+
return pl.scan_csv(
|
| 53 |
+
path,
|
| 54 |
+
separator=",",
|
| 55 |
+
infer_schema_length=10_000,
|
| 56 |
+
null_values=["", "NA", "null"],
|
| 57 |
+
schema_overrides=schema_overrides,
|
| 58 |
+
).select(DVF_COLUMNS)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def load_multiple_csvs(paths: list[Path]) -> pl.LazyFrame:
|
| 62 |
+
"""
|
| 63 |
+
Load and concatenate multiple DVF CSV files.
|
| 64 |
+
|
| 65 |
+
Args:
|
| 66 |
+
paths: List of paths to CSV files.
|
| 67 |
+
|
| 68 |
+
Returns:
|
| 69 |
+
Single LazyFrame with all years combined.
|
| 70 |
+
"""
|
| 71 |
+
frames = [load_raw_csv(p) for p in paths]
|
| 72 |
+
return pl.concat(frames, how="vertical_relaxed")
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def filter_sales(lf: pl.LazyFrame) -> pl.LazyFrame:
|
| 76 |
+
"""
|
| 77 |
+
Keep only standard property sales.
|
| 78 |
+
|
| 79 |
+
Filters:
|
| 80 |
+
- nature_mutation == "Vente"
|
| 81 |
+
- type_local in [Appartement, Maison, Local industriel. et commercial.]
|
| 82 |
+
- valeur_fonciere > 0
|
| 83 |
+
- surface_reelle_bati > 0
|
| 84 |
+
"""
|
| 85 |
+
return lf.filter(
|
| 86 |
+
(pl.col("nature_mutation") == VALID_NATURE_MUTATION)
|
| 87 |
+
& (pl.col("type_local").is_in(VALID_TYPE_LOCAL))
|
| 88 |
+
& (pl.col("valeur_fonciere").is_not_null())
|
| 89 |
+
& (pl.col("valeur_fonciere") > 0)
|
| 90 |
+
& (pl.col("surface_reelle_bati").is_not_null())
|
| 91 |
+
& (pl.col("surface_reelle_bati") > 0)
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def deduplicate_mutations(lf: pl.LazyFrame) -> pl.LazyFrame:
|
| 96 |
+
"""
|
| 97 |
+
Handle multi-row mutations in DVF data.
|
| 98 |
+
|
| 99 |
+
A single sale (id_mutation) can span multiple rows when multiple lots
|
| 100 |
+
are included. We:
|
| 101 |
+
1. Keep only mutations where all rows share the same type_local
|
| 102 |
+
(mixed-type sales have ambiguous price attribution).
|
| 103 |
+
2. Group by id_mutation: sum surface, keep price (same across rows).
|
| 104 |
+
|
| 105 |
+
Returns:
|
| 106 |
+
One row per mutation with aggregated surface.
|
| 107 |
+
"""
|
| 108 |
+
# Tag mutations that have a single property type
|
| 109 |
+
type_counts = lf.group_by("id_mutation").agg(
|
| 110 |
+
pl.col("type_local").n_unique().alias("n_types")
|
| 111 |
+
)
|
| 112 |
+
single_type = type_counts.filter(pl.col("n_types") == 1).select("id_mutation")
|
| 113 |
+
|
| 114 |
+
# Keep only single-type mutations, then aggregate
|
| 115 |
+
filtered = lf.join(single_type, on="id_mutation", how="inner")
|
| 116 |
+
|
| 117 |
+
return filtered.group_by("id_mutation").agg(
|
| 118 |
+
pl.col("date_mutation").first(),
|
| 119 |
+
pl.col("nature_mutation").first(),
|
| 120 |
+
pl.col("valeur_fonciere").first(),
|
| 121 |
+
pl.col("code_postal").first(),
|
| 122 |
+
pl.col("code_commune").first(),
|
| 123 |
+
pl.col("nom_commune").first(),
|
| 124 |
+
pl.col("code_departement").first(),
|
| 125 |
+
pl.col("id_parcelle").first(),
|
| 126 |
+
pl.col("type_local").first(),
|
| 127 |
+
pl.col("surface_reelle_bati").sum(),
|
| 128 |
+
pl.col("nombre_pieces_principales").sum(),
|
| 129 |
+
pl.col("nombre_lots").first(),
|
| 130 |
+
pl.col("longitude").first(),
|
| 131 |
+
pl.col("latitude").first(),
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
def normalize_commune_codes(lf: pl.LazyFrame) -> pl.LazyFrame:
|
| 136 |
+
"""
|
| 137 |
+
Map arrondissement codes back to parent city codes.
|
| 138 |
+
|
| 139 |
+
Paris (75101-75120 → 75056), Lyon (69381-69389 → 69123),
|
| 140 |
+
Marseille (13201-13216 → 13055).
|
| 141 |
+
"""
|
| 142 |
+
mapping_expr = pl.col("code_commune").cast(pl.Utf8)
|
| 143 |
+
for arr_code, city_code in ARRONDISSEMENT_MAPPING.items():
|
| 144 |
+
mapping_expr = (
|
| 145 |
+
pl.when(pl.col("code_commune").cast(pl.Utf8) == arr_code)
|
| 146 |
+
.then(pl.lit(city_code))
|
| 147 |
+
.otherwise(mapping_expr)
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
return lf.with_columns(
|
| 151 |
+
mapping_expr.alias("code_commune_city"),
|
| 152 |
+
# Keep original for section-level work
|
| 153 |
+
pl.col("code_commune").alias("code_commune_original"),
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
def remove_outliers(lf: pl.LazyFrame) -> pl.LazyFrame:
|
| 158 |
+
"""
|
| 159 |
+
Remove transactions with implausible price/m² or surface values.
|
| 160 |
+
|
| 161 |
+
Uses absolute thresholds from config — intentionally conservative
|
| 162 |
+
to avoid removing valid luxury or rural transactions.
|
| 163 |
+
"""
|
| 164 |
+
return lf.filter(
|
| 165 |
+
(pl.col("surface_reelle_bati") >= SURFACE_MIN)
|
| 166 |
+
& (pl.col("surface_reelle_bati") <= SURFACE_MAX)
|
| 167 |
+
& (pl.col("prix_m2") >= PRICE_M2_MIN)
|
| 168 |
+
& (pl.col("prix_m2") <= PRICE_M2_MAX)
|
| 169 |
+
)
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
def add_derived_columns(lf: pl.LazyFrame) -> pl.LazyFrame:
|
| 173 |
+
"""
|
| 174 |
+
Add computed columns needed downstream.
|
| 175 |
+
|
| 176 |
+
Adds:
|
| 177 |
+
- prix_m2: valeur_fonciere / surface_reelle_bati
|
| 178 |
+
- code_section: first 10 chars of id_parcelle
|
| 179 |
+
- code_region: mapped from code_departement
|
| 180 |
+
- year: extracted from date_mutation
|
| 181 |
+
- months_since: months between transaction and reference date
|
| 182 |
+
- temporal_weight: exponential decay weight for time-relevance
|
| 183 |
+
"""
|
| 184 |
+
from src.config import DEPT_TO_REGION
|
| 185 |
+
|
| 186 |
+
# Build region mapping expression
|
| 187 |
+
region_expr = pl.lit("unknown")
|
| 188 |
+
for dept, region in DEPT_TO_REGION.items():
|
| 189 |
+
region_expr = (
|
| 190 |
+
pl.when(pl.col("code_departement").cast(pl.Utf8) == dept)
|
| 191 |
+
.then(pl.lit(region))
|
| 192 |
+
.otherwise(region_expr)
|
| 193 |
+
)
|
| 194 |
+
|
| 195 |
+
ref_date = pl.lit(REFERENCE_DATE).str.to_date("%Y-%m-%d")
|
| 196 |
+
|
| 197 |
+
return lf.with_columns(
|
| 198 |
+
(pl.col("valeur_fonciere") / pl.col("surface_reelle_bati")).alias("prix_m2"),
|
| 199 |
+
pl.col("id_parcelle").cast(pl.Utf8).str.slice(0, 10).alias("code_section"),
|
| 200 |
+
region_expr.alias("code_region"),
|
| 201 |
+
pl.col("date_mutation").cast(pl.Utf8).str.slice(0, 4).alias("year"),
|
| 202 |
+
# Temporal weighting: months since reference date
|
| 203 |
+
(
|
| 204 |
+
(ref_date - pl.col("date_mutation").cast(pl.Utf8).str.to_date("%Y-%m-%d"))
|
| 205 |
+
.dt.total_days()
|
| 206 |
+
.cast(pl.Float64)
|
| 207 |
+
/ 30.44 # average days per month
|
| 208 |
+
).alias("months_since"),
|
| 209 |
+
).with_columns(
|
| 210 |
+
# Exponential decay weight
|
| 211 |
+
(pl.lit(TEMPORAL_LAMBDA) ** pl.col("months_since").clip(0.0, None)).alias(
|
| 212 |
+
"temporal_weight"
|
| 213 |
+
),
|
| 214 |
+
)
|
| 215 |
+
|
| 216 |
+
|
| 217 |
+
def clean(paths: list[Path]) -> pl.LazyFrame:
|
| 218 |
+
"""
|
| 219 |
+
Run the full cleaning pipeline on raw DVF files.
|
| 220 |
+
|
| 221 |
+
Args:
|
| 222 |
+
paths: List of raw CSV file paths.
|
| 223 |
+
|
| 224 |
+
Returns:
|
| 225 |
+
Cleaned LazyFrame ready for aggregation.
|
| 226 |
+
"""
|
| 227 |
+
logger.info("Loading %d raw files...", len(paths))
|
| 228 |
+
lf = load_multiple_csvs(paths)
|
| 229 |
+
|
| 230 |
+
logger.info("Filtering sales...")
|
| 231 |
+
lf = filter_sales(lf)
|
| 232 |
+
|
| 233 |
+
logger.info("Deduplicating mutations...")
|
| 234 |
+
lf = deduplicate_mutations(lf)
|
| 235 |
+
|
| 236 |
+
logger.info("Adding derived columns...")
|
| 237 |
+
lf = add_derived_columns(lf)
|
| 238 |
+
|
| 239 |
+
logger.info("Removing outliers...")
|
| 240 |
+
lf = remove_outliers(lf)
|
| 241 |
+
|
| 242 |
+
logger.info("Normalizing commune codes...")
|
| 243 |
+
lf = normalize_commune_codes(lf)
|
| 244 |
+
|
| 245 |
+
return lf
|
src/config.py
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Configuration constants for the DVF data pipeline.
|
| 3 |
+
|
| 4 |
+
Single source of truth for paths, URLs, thresholds, and mappings.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
|
| 9 |
+
# ---------------------------------------------------------------------------
|
| 10 |
+
# Paths
|
| 11 |
+
# ---------------------------------------------------------------------------
|
| 12 |
+
ROOT_DIR = Path(__file__).resolve().parent.parent
|
| 13 |
+
DATA_DIR = ROOT_DIR / "data"
|
| 14 |
+
RAW_DIR = DATA_DIR / "raw"
|
| 15 |
+
PROCESSED_DIR = DATA_DIR / "processed"
|
| 16 |
+
AGGREGATED_DIR = DATA_DIR / "aggregated"
|
| 17 |
+
SECTIONS_DIR = AGGREGATED_DIR / "sections"
|
| 18 |
+
|
| 19 |
+
# ---------------------------------------------------------------------------
|
| 20 |
+
# DVF data source
|
| 21 |
+
# ---------------------------------------------------------------------------
|
| 22 |
+
DVF_BASE_URL = "https://files.data.gouv.fr/geo-dvf/latest/csv"
|
| 23 |
+
DVF_YEARS = list(range(2014, 2026)) # 2014-2025: full dataset per Carlos's feedback
|
| 24 |
+
|
| 25 |
+
def dvf_url(year: int) -> str:
|
| 26 |
+
"""Return download URL for a given year's national DVF geolocalized CSV."""
|
| 27 |
+
return f"{DVF_BASE_URL}/{year}/full.csv.gz"
|
| 28 |
+
|
| 29 |
+
# ---------------------------------------------------------------------------
|
| 30 |
+
# Columns we actually need (saves memory on load)
|
| 31 |
+
# ---------------------------------------------------------------------------
|
| 32 |
+
DVF_COLUMNS = [
|
| 33 |
+
"id_mutation",
|
| 34 |
+
"date_mutation",
|
| 35 |
+
"nature_mutation",
|
| 36 |
+
"valeur_fonciere",
|
| 37 |
+
"code_postal",
|
| 38 |
+
"code_commune",
|
| 39 |
+
"nom_commune",
|
| 40 |
+
"code_departement",
|
| 41 |
+
"id_parcelle",
|
| 42 |
+
"code_type_local",
|
| 43 |
+
"type_local",
|
| 44 |
+
"surface_reelle_bati",
|
| 45 |
+
"nombre_pieces_principales",
|
| 46 |
+
"nombre_lots",
|
| 47 |
+
"longitude",
|
| 48 |
+
"latitude",
|
| 49 |
+
]
|
| 50 |
+
|
| 51 |
+
# ---------------------------------------------------------------------------
|
| 52 |
+
# Filtering thresholds
|
| 53 |
+
# ---------------------------------------------------------------------------
|
| 54 |
+
VALID_NATURE_MUTATION = "Vente"
|
| 55 |
+
|
| 56 |
+
VALID_TYPE_LOCAL = ["Appartement", "Maison"] # Residential only per Carlos's feedback
|
| 57 |
+
|
| 58 |
+
TYPE_LOCAL_SHORT = {
|
| 59 |
+
"Appartement": "appartement",
|
| 60 |
+
"Maison": "maison",
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
# ---------------------------------------------------------------------------
|
| 64 |
+
# Temporal weighting parameters
|
| 65 |
+
# ---------------------------------------------------------------------------
|
| 66 |
+
REFERENCE_DATE = "2025-01-01" # Anchor date for temporal decay
|
| 67 |
+
TEMPORAL_LAMBDA = 0.97 # Monthly decay factor (half-life ~23 months)
|
| 68 |
+
TRIM_FRACTION = 0.20 # Trim 20% from each tail for trimmed mean
|
| 69 |
+
|
| 70 |
+
# Price per m² bounds for outlier removal
|
| 71 |
+
PRICE_M2_MIN = 200 # €/m² — below this is almost certainly an error
|
| 72 |
+
PRICE_M2_MAX = 25_000 # €/m² — above this is extreme luxury / error
|
| 73 |
+
SURFACE_MIN = 9 # m² — below 9m² is legally not habitable in France
|
| 74 |
+
SURFACE_MAX = 1000 # m² — above this for a single unit is suspect
|
| 75 |
+
|
| 76 |
+
# ---------------------------------------------------------------------------
|
| 77 |
+
# Department → Region mapping (2016 reform)
|
| 78 |
+
# ---------------------------------------------------------------------------
|
| 79 |
+
DEPT_TO_REGION: dict[str, str] = {}
|
| 80 |
+
_REGION_DEPTS = {
|
| 81 |
+
"84-Auvergne-Rhône-Alpes": "01,03,07,15,26,38,42,43,63,69,73,74",
|
| 82 |
+
"27-Bourgogne-Franche-Comté": "21,25,39,58,70,71,89,90",
|
| 83 |
+
"53-Bretagne": "22,29,35,56",
|
| 84 |
+
"24-Centre-Val de Loire": "18,28,36,37,41,45",
|
| 85 |
+
"94-Corse": "2A,2B",
|
| 86 |
+
"44-Grand Est": "08,10,51,52,54,55,57,67,68,88",
|
| 87 |
+
"32-Hauts-de-France": "02,59,60,62,80",
|
| 88 |
+
"11-Île-de-France": "75,77,78,91,92,93,94,95",
|
| 89 |
+
"28-Normandie": "14,27,50,61,76",
|
| 90 |
+
"75-Nouvelle-Aquitaine": "16,17,19,23,24,33,40,47,64,79,86,87",
|
| 91 |
+
"76-Occitanie": "09,11,12,30,31,32,34,46,48,65,66,81,82",
|
| 92 |
+
"52-Pays de la Loire": "44,49,53,72,85",
|
| 93 |
+
"93-Provence-Alpes-Côte d'Azur": "04,05,06,13,83,84",
|
| 94 |
+
"01-Guadeloupe": "971",
|
| 95 |
+
"02-Martinique": "972",
|
| 96 |
+
"03-Guyane": "973",
|
| 97 |
+
"04-La Réunion": "974",
|
| 98 |
+
"06-Mayotte": "976",
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
REGION_NAMES: dict[str, str] = {}
|
| 102 |
+
for key, depts_str in _REGION_DEPTS.items():
|
| 103 |
+
code, name = key.split("-", 1)
|
| 104 |
+
REGION_NAMES[code] = name
|
| 105 |
+
for d in depts_str.split(","):
|
| 106 |
+
DEPT_TO_REGION[d.strip()] = code
|
| 107 |
+
|
| 108 |
+
# Departments with no DVF data (Alsace-Moselle + Mayotte)
|
| 109 |
+
NO_DVF_DEPARTMENTS = {"57", "67", "68", "976"}
|
| 110 |
+
|
| 111 |
+
# ---------------------------------------------------------------------------
|
| 112 |
+
# Top 10 cities by population (INSEE code → name)
|
| 113 |
+
# ---------------------------------------------------------------------------
|
| 114 |
+
TOP_10_CITIES: dict[str, str] = {
|
| 115 |
+
"75056": "Paris",
|
| 116 |
+
"13055": "Marseille",
|
| 117 |
+
"69123": "Lyon",
|
| 118 |
+
"31555": "Toulouse",
|
| 119 |
+
"06088": "Nice",
|
| 120 |
+
"44109": "Nantes",
|
| 121 |
+
"34172": "Montpellier",
|
| 122 |
+
"67482": "Strasbourg",
|
| 123 |
+
"33063": "Bordeaux",
|
| 124 |
+
"59350": "Lille",
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
# Paris, Lyon, Marseille have arrondissements — we need to map them back
|
| 128 |
+
ARRONDISSEMENT_MAPPING: dict[str, str] = {}
|
| 129 |
+
# Paris: 75101-75120 → 75056
|
| 130 |
+
for i in range(1, 21):
|
| 131 |
+
ARRONDISSEMENT_MAPPING[f"751{i:02d}"] = "75056"
|
| 132 |
+
# Lyon: 69381-69389 → 69123
|
| 133 |
+
for i in range(1, 10):
|
| 134 |
+
ARRONDISSEMENT_MAPPING[f"6938{i}"] = "69123"
|
| 135 |
+
# Marseille: 13201-13216 → 13055
|
| 136 |
+
for i in range(1, 17):
|
| 137 |
+
ARRONDISSEMENT_MAPPING[f"132{i:02d}"] = "13055"
|
| 138 |
+
|
| 139 |
+
# ---------------------------------------------------------------------------
|
| 140 |
+
# Aggregation levels
|
| 141 |
+
# ---------------------------------------------------------------------------
|
| 142 |
+
AGGREGATION_LEVELS = [
|
| 143 |
+
"country",
|
| 144 |
+
"region",
|
| 145 |
+
"department",
|
| 146 |
+
"commune",
|
| 147 |
+
"postcode",
|
| 148 |
+
"section",
|
| 149 |
+
]
|
src/downloader.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Download DVF geolocalized data from data.gouv.fr.
|
| 3 |
+
|
| 4 |
+
Responsibility: Fetch raw CSV files and store them locally.
|
| 5 |
+
Handles caching — won't re-download files that already exist.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import gzip
|
| 9 |
+
import logging
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
|
| 12 |
+
import requests
|
| 13 |
+
from tqdm import tqdm
|
| 14 |
+
|
| 15 |
+
from src.config import DVF_YEARS, RAW_DIR, dvf_url
|
| 16 |
+
|
| 17 |
+
logger = logging.getLogger(__name__)
|
| 18 |
+
|
| 19 |
+
CHUNK_SIZE = 8192 # 8 KB chunks for streaming download
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def download_file(url: str, dest: Path, *, force: bool = False) -> Path:
|
| 23 |
+
"""
|
| 24 |
+
Download a single file with progress bar and caching.
|
| 25 |
+
|
| 26 |
+
Args:
|
| 27 |
+
url: Remote URL to download.
|
| 28 |
+
dest: Local path to save the file.
|
| 29 |
+
force: If True, re-download even if file exists.
|
| 30 |
+
|
| 31 |
+
Returns:
|
| 32 |
+
Path to the downloaded file.
|
| 33 |
+
|
| 34 |
+
Raises:
|
| 35 |
+
requests.HTTPError: If the server returns a non-2xx status.
|
| 36 |
+
"""
|
| 37 |
+
if dest.exists() and not force:
|
| 38 |
+
logger.info("Cached: %s", dest.name)
|
| 39 |
+
return dest
|
| 40 |
+
|
| 41 |
+
dest.parent.mkdir(parents=True, exist_ok=True)
|
| 42 |
+
logger.info("Downloading: %s", url)
|
| 43 |
+
|
| 44 |
+
response = requests.get(url, stream=True, timeout=300)
|
| 45 |
+
response.raise_for_status()
|
| 46 |
+
|
| 47 |
+
total = int(response.headers.get("content-length", 0))
|
| 48 |
+
with (
|
| 49 |
+
open(dest, "wb") as f,
|
| 50 |
+
tqdm(total=total, unit="B", unit_scale=True, desc=dest.name) as bar,
|
| 51 |
+
):
|
| 52 |
+
for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
|
| 53 |
+
f.write(chunk)
|
| 54 |
+
bar.update(len(chunk))
|
| 55 |
+
|
| 56 |
+
logger.info("Saved: %s (%.1f MB)", dest.name, dest.stat().st_size / 1e6)
|
| 57 |
+
return dest
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def decompress_gz(gz_path: Path) -> Path:
|
| 61 |
+
"""
|
| 62 |
+
Decompress a .gz file to .csv in the same directory.
|
| 63 |
+
|
| 64 |
+
Args:
|
| 65 |
+
gz_path: Path to the .gz file.
|
| 66 |
+
|
| 67 |
+
Returns:
|
| 68 |
+
Path to the decompressed .csv file.
|
| 69 |
+
"""
|
| 70 |
+
csv_path = gz_path.with_suffix("") # removes .gz
|
| 71 |
+
if csv_path.exists():
|
| 72 |
+
logger.info("Already decompressed: %s", csv_path.name)
|
| 73 |
+
return csv_path
|
| 74 |
+
|
| 75 |
+
logger.info("Decompressing: %s", gz_path.name)
|
| 76 |
+
with gzip.open(gz_path, "rb") as f_in, open(csv_path, "wb") as f_out:
|
| 77 |
+
while chunk := f_in.read(CHUNK_SIZE * 128):
|
| 78 |
+
f_out.write(chunk)
|
| 79 |
+
|
| 80 |
+
logger.info("Decompressed: %s (%.1f MB)", csv_path.name, csv_path.stat().st_size / 1e6)
|
| 81 |
+
return csv_path
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def download_dvf_year(year: int, *, force: bool = False) -> Path:
|
| 85 |
+
"""
|
| 86 |
+
Download and decompress DVF data for a single year.
|
| 87 |
+
|
| 88 |
+
Args:
|
| 89 |
+
year: Year to download (e.g. 2024).
|
| 90 |
+
force: Re-download even if cached.
|
| 91 |
+
|
| 92 |
+
Returns:
|
| 93 |
+
Path to the decompressed CSV file.
|
| 94 |
+
"""
|
| 95 |
+
url = dvf_url(year)
|
| 96 |
+
gz_path = RAW_DIR / f"dvf_{year}.csv.gz"
|
| 97 |
+
download_file(url, gz_path, force=force)
|
| 98 |
+
return decompress_gz(gz_path)
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def download_all(years: list[int] | None = None, *, force: bool = False) -> list[Path]:
|
| 102 |
+
"""
|
| 103 |
+
Download DVF data for all configured years.
|
| 104 |
+
|
| 105 |
+
Args:
|
| 106 |
+
years: List of years to download. Defaults to config.DVF_YEARS.
|
| 107 |
+
force: Re-download even if cached.
|
| 108 |
+
|
| 109 |
+
Returns:
|
| 110 |
+
List of paths to decompressed CSV files.
|
| 111 |
+
"""
|
| 112 |
+
years = years or DVF_YEARS
|
| 113 |
+
paths = []
|
| 114 |
+
for year in years:
|
| 115 |
+
try:
|
| 116 |
+
path = download_dvf_year(year, force=force)
|
| 117 |
+
paths.append(path)
|
| 118 |
+
except requests.HTTPError as e:
|
| 119 |
+
logger.error("Failed to download year %d: %s", year, e)
|
| 120 |
+
return paths
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
if __name__ == "__main__":
|
| 124 |
+
logging.basicConfig(level=logging.INFO)
|
| 125 |
+
download_all()
|
src/pipeline.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Orchestrate the full DVF data pipeline.
|
| 3 |
+
|
| 4 |
+
Responsibility: Wire together download → clean → aggregate → export.
|
| 5 |
+
This is the main entry point for running the complete pipeline.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import logging
|
| 9 |
+
import time
|
| 10 |
+
|
| 11 |
+
from src.config import AGGREGATED_DIR, PROCESSED_DIR, RAW_DIR
|
| 12 |
+
from src.downloader import download_all
|
| 13 |
+
from src.cleaner import clean
|
| 14 |
+
from src.aggregator import aggregate_all_levels, export_json
|
| 15 |
+
from src.top_cities import compute_top_cities, export_top_cities
|
| 16 |
+
|
| 17 |
+
logger = logging.getLogger(__name__)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def run(
|
| 21 |
+
*,
|
| 22 |
+
years: list[int] | None = None,
|
| 23 |
+
skip_download: bool = False,
|
| 24 |
+
skip_section: bool = False,
|
| 25 |
+
) -> None:
|
| 26 |
+
"""
|
| 27 |
+
Run the complete pipeline: download → clean → aggregate → export.
|
| 28 |
+
|
| 29 |
+
Args:
|
| 30 |
+
years: Years to process. None = all configured years.
|
| 31 |
+
skip_download: Skip download step (use existing raw files).
|
| 32 |
+
skip_section: Skip section-level aggregation (slow, large output).
|
| 33 |
+
"""
|
| 34 |
+
t0 = time.time()
|
| 35 |
+
|
| 36 |
+
# Step 1: Download
|
| 37 |
+
if not skip_download:
|
| 38 |
+
logger.info("=" * 60)
|
| 39 |
+
logger.info("STEP 1: Downloading DVF data")
|
| 40 |
+
logger.info("=" * 60)
|
| 41 |
+
csv_paths = download_all(years)
|
| 42 |
+
else:
|
| 43 |
+
logger.info("Skipping download, using existing files...")
|
| 44 |
+
csv_paths = sorted(RAW_DIR.glob("dvf_*.csv"))
|
| 45 |
+
csv_paths = [p for p in csv_paths if not p.name.endswith(".gz")]
|
| 46 |
+
logger.info("Found %d raw CSV files", len(csv_paths))
|
| 47 |
+
|
| 48 |
+
if not csv_paths:
|
| 49 |
+
logger.error("No CSV files found. Run without --skip-download first.")
|
| 50 |
+
return
|
| 51 |
+
|
| 52 |
+
# Step 2: Clean
|
| 53 |
+
logger.info("=" * 60)
|
| 54 |
+
logger.info("STEP 2: Cleaning data")
|
| 55 |
+
logger.info("=" * 60)
|
| 56 |
+
lf = clean(csv_paths)
|
| 57 |
+
|
| 58 |
+
# Materialize once and save as parquet for reuse
|
| 59 |
+
logger.info("Materializing cleaned data...")
|
| 60 |
+
PROCESSED_DIR.mkdir(parents=True, exist_ok=True)
|
| 61 |
+
df_clean = lf.collect()
|
| 62 |
+
parquet_path = PROCESSED_DIR / "dvf_clean.parquet"
|
| 63 |
+
df_clean.write_parquet(parquet_path)
|
| 64 |
+
logger.info(
|
| 65 |
+
"Saved: %s (%d rows, %.1f MB)",
|
| 66 |
+
parquet_path.name,
|
| 67 |
+
len(df_clean),
|
| 68 |
+
parquet_path.stat().st_size / 1e6,
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
# Step 3: Aggregate
|
| 72 |
+
logger.info("=" * 60)
|
| 73 |
+
logger.info("STEP 3: Aggregating prices")
|
| 74 |
+
logger.info("=" * 60)
|
| 75 |
+
lf_clean = df_clean.lazy()
|
| 76 |
+
|
| 77 |
+
if skip_section:
|
| 78 |
+
from src.config import AGGREGATION_LEVELS
|
| 79 |
+
levels_to_run = [l for l in AGGREGATION_LEVELS if l != "section"]
|
| 80 |
+
from src.aggregator import LEVEL_TO_COLUMN, aggregate_all_types
|
| 81 |
+
aggregated = {}
|
| 82 |
+
for level in levels_to_run:
|
| 83 |
+
col = LEVEL_TO_COLUMN[level]
|
| 84 |
+
logger.info("Aggregating: %s", level)
|
| 85 |
+
aggregated[level] = aggregate_all_types(lf_clean, col)
|
| 86 |
+
else:
|
| 87 |
+
aggregated = aggregate_all_levels(lf_clean)
|
| 88 |
+
|
| 89 |
+
export_json(aggregated)
|
| 90 |
+
|
| 91 |
+
# Step 4: Top cities
|
| 92 |
+
logger.info("=" * 60)
|
| 93 |
+
logger.info("STEP 4: Top 10 cities")
|
| 94 |
+
logger.info("=" * 60)
|
| 95 |
+
df_cities = compute_top_cities(lf_clean)
|
| 96 |
+
export_top_cities(df_cities)
|
| 97 |
+
|
| 98 |
+
elapsed = time.time() - t0
|
| 99 |
+
logger.info("=" * 60)
|
| 100 |
+
logger.info("Pipeline complete in %.1f seconds", elapsed)
|
| 101 |
+
logger.info("Output: %s", AGGREGATED_DIR)
|
| 102 |
+
logger.info("=" * 60)
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
if __name__ == "__main__":
|
| 106 |
+
logging.basicConfig(
|
| 107 |
+
level=logging.INFO,
|
| 108 |
+
format="%(asctime)s %(levelname)s %(name)s: %(message)s",
|
| 109 |
+
)
|
| 110 |
+
run()
|
src/top_cities.py
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Compute price/m² breakdown for the top 10 French cities.
|
| 3 |
+
|
| 4 |
+
Responsibility: Produce a clean table of median price per m²
|
| 5 |
+
by property type for the largest cities.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import json
|
| 9 |
+
import logging
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
|
| 12 |
+
import polars as pl
|
| 13 |
+
|
| 14 |
+
from src.config import AGGREGATED_DIR, TOP_10_CITIES, TYPE_LOCAL_SHORT
|
| 15 |
+
|
| 16 |
+
logger = logging.getLogger(__name__)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def compute_top_cities(lf: pl.LazyFrame) -> pl.DataFrame:
|
| 20 |
+
"""
|
| 21 |
+
Compute price statistics for top 10 cities, broken down by property type.
|
| 22 |
+
|
| 23 |
+
Uses code_commune_city (with arrondissements mapped to parent city)
|
| 24 |
+
to correctly aggregate Paris, Lyon, and Marseille.
|
| 25 |
+
|
| 26 |
+
Args:
|
| 27 |
+
lf: Cleaned LazyFrame with code_commune_city column.
|
| 28 |
+
|
| 29 |
+
Returns:
|
| 30 |
+
DataFrame with columns: city_code, city_name, type, median_price_m2,
|
| 31 |
+
mean_price_m2, volume, q1, q3.
|
| 32 |
+
"""
|
| 33 |
+
city_codes = list(TOP_10_CITIES.keys())
|
| 34 |
+
|
| 35 |
+
# Filter to top 10 cities only
|
| 36 |
+
city_data = lf.filter(
|
| 37 |
+
pl.col("code_commune_city").is_in(city_codes)
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
results = []
|
| 41 |
+
|
| 42 |
+
# All types combined
|
| 43 |
+
df_all = (
|
| 44 |
+
city_data.group_by("code_commune_city")
|
| 45 |
+
.agg(
|
| 46 |
+
pl.col("prix_m2").median().alias("median_price_m2"),
|
| 47 |
+
pl.col("prix_m2").mean().alias("mean_price_m2"),
|
| 48 |
+
pl.col("prix_m2").quantile(0.25).alias("q1"),
|
| 49 |
+
pl.col("prix_m2").quantile(0.75).alias("q3"),
|
| 50 |
+
pl.col("prix_m2").count().alias("volume"),
|
| 51 |
+
)
|
| 52 |
+
.with_columns(pl.lit("tous").alias("type"))
|
| 53 |
+
.collect()
|
| 54 |
+
)
|
| 55 |
+
results.append(df_all)
|
| 56 |
+
|
| 57 |
+
# Per property type
|
| 58 |
+
for full_name, short_name in TYPE_LOCAL_SHORT.items():
|
| 59 |
+
df_type = (
|
| 60 |
+
city_data.filter(pl.col("type_local") == full_name)
|
| 61 |
+
.group_by("code_commune_city")
|
| 62 |
+
.agg(
|
| 63 |
+
pl.col("prix_m2").median().alias("median_price_m2"),
|
| 64 |
+
pl.col("prix_m2").mean().alias("mean_price_m2"),
|
| 65 |
+
pl.col("prix_m2").quantile(0.25).alias("q1"),
|
| 66 |
+
pl.col("prix_m2").quantile(0.75).alias("q3"),
|
| 67 |
+
pl.col("prix_m2").count().alias("volume"),
|
| 68 |
+
)
|
| 69 |
+
.with_columns(pl.lit(short_name).alias("type"))
|
| 70 |
+
.collect()
|
| 71 |
+
)
|
| 72 |
+
results.append(df_type)
|
| 73 |
+
|
| 74 |
+
combined = pl.concat(results, how="vertical_relaxed")
|
| 75 |
+
|
| 76 |
+
# Add city names
|
| 77 |
+
city_name_map = {code: name for code, name in TOP_10_CITIES.items()}
|
| 78 |
+
combined = combined.with_columns(
|
| 79 |
+
pl.col("code_commune_city")
|
| 80 |
+
.replace_strict(city_name_map, default="Unknown")
|
| 81 |
+
.alias("city_name")
|
| 82 |
+
).rename({"code_commune_city": "city_code"})
|
| 83 |
+
|
| 84 |
+
return combined.sort(["city_name", "type"])
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def export_top_cities(df: pl.DataFrame, output_dir: Path | None = None) -> None:
|
| 88 |
+
"""
|
| 89 |
+
Export top cities data to JSON.
|
| 90 |
+
|
| 91 |
+
Output format:
|
| 92 |
+
{
|
| 93 |
+
"Paris": {
|
| 94 |
+
"code": "75056",
|
| 95 |
+
"tous": {"median": 10500, "volume": 45000, ...},
|
| 96 |
+
"appartement": {...},
|
| 97 |
+
...
|
| 98 |
+
},
|
| 99 |
+
...
|
| 100 |
+
}
|
| 101 |
+
"""
|
| 102 |
+
output_dir = output_dir or AGGREGATED_DIR
|
| 103 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 104 |
+
|
| 105 |
+
data: dict = {}
|
| 106 |
+
for row in df.iter_rows(named=True):
|
| 107 |
+
name = row["city_name"]
|
| 108 |
+
if name not in data:
|
| 109 |
+
data[name] = {"code": row["city_code"]}
|
| 110 |
+
data[name][row["type"]] = {
|
| 111 |
+
"median": round(row["median_price_m2"] or 0, 1),
|
| 112 |
+
"mean": round(row["mean_price_m2"] or 0, 1),
|
| 113 |
+
"q1": round(row["q1"] or 0, 1),
|
| 114 |
+
"q3": round(row["q3"] or 0, 1),
|
| 115 |
+
"volume": row["volume"],
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
path = output_dir / "top_cities.json"
|
| 119 |
+
with open(path, "w") as f:
|
| 120 |
+
json.dump(data, f, ensure_ascii=False, indent=2)
|
| 121 |
+
logger.info("Exported: %s (%d cities)", path.name, len(data))
|