Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files
main.py
CHANGED
|
@@ -240,37 +240,82 @@ def get_historical(years: int = 5):
|
|
| 240 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 241 |
|
| 242 |
def fetch_training_data(days: int = 90):
|
| 243 |
-
"""
|
| 244 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
start_date = end_date - timedelta(days=days)
|
| 246 |
|
| 247 |
url = "https://archive-api.open-meteo.com/v1/archive"
|
| 248 |
params = {
|
| 249 |
-
"latitude":
|
| 250 |
-
"longitude":
|
| 251 |
"start_date": start_date.strftime("%Y-%m-%d"),
|
| 252 |
-
"end_date":
|
| 253 |
-
"daily":
|
| 254 |
-
"timezone":
|
| 255 |
}
|
| 256 |
|
| 257 |
r = requests.get(url, params=params, timeout=20)
|
| 258 |
r.raise_for_status()
|
| 259 |
-
data
|
| 260 |
daily = data.get("daily", {})
|
| 261 |
|
| 262 |
temps_max = [t for t in daily.get("temperature_2m_max", []) if t is not None]
|
| 263 |
temps_min = [t for t in daily.get("temperature_2m_min", []) if t is not None]
|
| 264 |
-
precip
|
| 265 |
-
wind
|
| 266 |
|
| 267 |
return {
|
| 268 |
-
"temps_max":
|
| 269 |
-
"temps_min":
|
| 270 |
-
"precip":
|
| 271 |
-
"wind":
|
| 272 |
-
"end_date":
|
| 273 |
"training_days": len(temps_max),
|
|
|
|
| 274 |
}
|
| 275 |
|
| 276 |
|
|
@@ -2060,6 +2105,58 @@ def ask_climai(q: str = "weather today"):
|
|
| 2060 |
|
| 2061 |
|
| 2062 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2063 |
if __name__ == "__main__":
|
| 2064 |
import uvicorn # type: ignore[import]
|
| 2065 |
uvicorn.run(app, host="0.0.0.0", port=8000)
|
|
|
|
| 240 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 241 |
|
| 242 |
def fetch_training_data(days: int = 90):
|
| 243 |
+
"""
|
| 244 |
+
Load temperature data for ML training.
|
| 245 |
+
Priority: 1) saved dataset (data/weather_history.json) for full 5yr history
|
| 246 |
+
2) live API fallback if file not found
|
| 247 |
+
Using saved data means models train on 5 years instead of 90 days β
|
| 248 |
+
dramatically improves prediction accuracy.
|
| 249 |
+
"""
|
| 250 |
+
import os as _os
|
| 251 |
+
import json as _json
|
| 252 |
+
dataset_path = "data/weather_history.json"
|
| 253 |
+
|
| 254 |
+
# ββ Try loading from saved dataset first ββββββββββββββββββββββ
|
| 255 |
+
if _os.path.exists(dataset_path):
|
| 256 |
+
try:
|
| 257 |
+
with open(dataset_path) as f:
|
| 258 |
+
saved = _json.load(f)
|
| 259 |
+
daily = saved.get("daily", {})
|
| 260 |
+
temps_max = [t for t in daily.get("temperature_2m_max", []) if t is not None]
|
| 261 |
+
temps_min = [t for t in daily.get("temperature_2m_min", []) if t is not None]
|
| 262 |
+
precip = [p for p in daily.get("precipitation_sum", []) if p is not None]
|
| 263 |
+
wind = [w for w in daily.get("wind_speed_10m_max", []) if w is not None]
|
| 264 |
+
|
| 265 |
+
if len(temps_max) >= 14:
|
| 266 |
+
period = saved.get("period", "")
|
| 267 |
+
try:
|
| 268 |
+
end_str = period.split(" to ")[-1].strip()
|
| 269 |
+
end_date = datetime.strptime(end_str, "%Y-%m-%d")
|
| 270 |
+
except Exception:
|
| 271 |
+
end_date = datetime.now() - timedelta(days=7)
|
| 272 |
+
|
| 273 |
+
logger.info(f"[fetch_training_data] Loaded {len(temps_max)} days from saved dataset")
|
| 274 |
+
return {
|
| 275 |
+
"temps_max": temps_max,
|
| 276 |
+
"temps_min": temps_min,
|
| 277 |
+
"precip": precip,
|
| 278 |
+
"wind": wind,
|
| 279 |
+
"end_date": end_date,
|
| 280 |
+
"training_days": len(temps_max),
|
| 281 |
+
"source": "saved_dataset",
|
| 282 |
+
}
|
| 283 |
+
except Exception as e:
|
| 284 |
+
logger.warning(f"[fetch_training_data] Saved dataset load failed: {e} β falling back to API")
|
| 285 |
+
|
| 286 |
+
# ββ Fallback: live API call ββββββββββββββββββββββββββββββββββββ
|
| 287 |
+
logger.info("[fetch_training_data] No saved dataset β fetching from Open-Meteo Archive API")
|
| 288 |
+
end_date = datetime.now() - timedelta(days=7)
|
| 289 |
start_date = end_date - timedelta(days=days)
|
| 290 |
|
| 291 |
url = "https://archive-api.open-meteo.com/v1/archive"
|
| 292 |
params = {
|
| 293 |
+
"latitude": LAT,
|
| 294 |
+
"longitude": LON,
|
| 295 |
"start_date": start_date.strftime("%Y-%m-%d"),
|
| 296 |
+
"end_date": end_date.strftime("%Y-%m-%d"),
|
| 297 |
+
"daily": "temperature_2m_max,temperature_2m_min,precipitation_sum,wind_speed_10m_max",
|
| 298 |
+
"timezone": "Asia/Kolkata",
|
| 299 |
}
|
| 300 |
|
| 301 |
r = requests.get(url, params=params, timeout=20)
|
| 302 |
r.raise_for_status()
|
| 303 |
+
data = r.json()
|
| 304 |
daily = data.get("daily", {})
|
| 305 |
|
| 306 |
temps_max = [t for t in daily.get("temperature_2m_max", []) if t is not None]
|
| 307 |
temps_min = [t for t in daily.get("temperature_2m_min", []) if t is not None]
|
| 308 |
+
precip = [p for p in daily.get("precipitation_sum", []) if p is not None]
|
| 309 |
+
wind = [w for w in daily.get("wind_speed_10m_max", []) if w is not None]
|
| 310 |
|
| 311 |
return {
|
| 312 |
+
"temps_max": temps_max,
|
| 313 |
+
"temps_min": temps_min,
|
| 314 |
+
"precip": precip,
|
| 315 |
+
"wind": wind,
|
| 316 |
+
"end_date": end_date,
|
| 317 |
"training_days": len(temps_max),
|
| 318 |
+
"source": "live_api",
|
| 319 |
}
|
| 320 |
|
| 321 |
|
|
|
|
| 2105 |
|
| 2106 |
|
| 2107 |
|
| 2108 |
+
|
| 2109 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 2110 |
+
# /refresh-data β Rebuild historical dataset in background
|
| 2111 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 2112 |
+
@app.post("/refresh-data")
|
| 2113 |
+
def refresh_dataset():
|
| 2114 |
+
"""
|
| 2115 |
+
Trigger a full dataset rebuild by running build_dataset.py.
|
| 2116 |
+
Run monthly to keep ML training data and LLM context fresh.
|
| 2117 |
+
"""
|
| 2118 |
+
import os as _os, subprocess as _subprocess
|
| 2119 |
+
try:
|
| 2120 |
+
if not _os.path.exists("build_dataset.py"):
|
| 2121 |
+
return {"status": "error", "message": "build_dataset.py not found"}
|
| 2122 |
+
_subprocess.Popen(["python", "build_dataset.py"], stdout=_subprocess.DEVNULL, stderr=_subprocess.DEVNULL)
|
| 2123 |
+
return {
|
| 2124 |
+
"status": "started",
|
| 2125 |
+
"message": "Dataset rebuild started in background. Check data/ folder in ~2 minutes.",
|
| 2126 |
+
"files_to_update": ["data/weather_history.json","data/earthquake_history.json","data/aqi_history.json","data/flood_baseline.json","data/llm_context.json"],
|
| 2127 |
+
}
|
| 2128 |
+
except Exception as e:
|
| 2129 |
+
return {"status": "error", "message": str(e)}
|
| 2130 |
+
|
| 2131 |
+
|
| 2132 |
+
@app.get("/dataset-status")
|
| 2133 |
+
def dataset_status():
|
| 2134 |
+
"""Check which dataset files exist and when they were last updated."""
|
| 2135 |
+
import os as _os, json as _json
|
| 2136 |
+
files = {
|
| 2137 |
+
"weather_history": "data/weather_history.json",
|
| 2138 |
+
"earthquake_history": "data/earthquake_history.json",
|
| 2139 |
+
"aqi_history": "data/aqi_history.json",
|
| 2140 |
+
"flood_baseline": "data/flood_baseline.json",
|
| 2141 |
+
"llm_context": "data/llm_context.json",
|
| 2142 |
+
}
|
| 2143 |
+
result = {}
|
| 2144 |
+
for key, path in files.items():
|
| 2145 |
+
if _os.path.exists(path):
|
| 2146 |
+
stat = _os.stat(path)
|
| 2147 |
+
try:
|
| 2148 |
+
with open(path) as f:
|
| 2149 |
+
data = _json.load(f)
|
| 2150 |
+
fetched_at = data.get("fetched_at") or data.get("generated_at", "unknown")
|
| 2151 |
+
except Exception:
|
| 2152 |
+
fetched_at = "unknown"
|
| 2153 |
+
result[key] = {"exists": True, "size_kb": round(stat.st_size/1024,1), "fetched_at": fetched_at}
|
| 2154 |
+
else:
|
| 2155 |
+
result[key] = {"exists": False}
|
| 2156 |
+
all_exist = all(v["exists"] for v in result.values())
|
| 2157 |
+
return {"dataset_ready": all_exist, "files": result,
|
| 2158 |
+
"tip": "Run POST /refresh-data to build missing files." if not all_exist else "All dataset files present."}
|
| 2159 |
+
|
| 2160 |
if __name__ == "__main__":
|
| 2161 |
import uvicorn # type: ignore[import]
|
| 2162 |
uvicorn.run(app, host="0.0.0.0", port=8000)
|