iPurushottam commited on
Commit
a936b59
Β·
verified Β·
1 Parent(s): 1bacc8b

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. main.py +112 -15
main.py CHANGED
@@ -240,37 +240,82 @@ def get_historical(years: int = 5):
240
  # ════════════════════════════════════════════════════════════
241
 
242
  def fetch_training_data(days: int = 90):
243
- """Fetch recent temperature data for ML training."""
244
- end_date = datetime.now() - timedelta(days=7) # Archive API lags ~5-7 days
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  start_date = end_date - timedelta(days=days)
246
 
247
  url = "https://archive-api.open-meteo.com/v1/archive"
248
  params = {
249
- "latitude": LAT,
250
- "longitude": LON,
251
  "start_date": start_date.strftime("%Y-%m-%d"),
252
- "end_date": end_date.strftime("%Y-%m-%d"),
253
- "daily": "temperature_2m_max,temperature_2m_min,precipitation_sum,wind_speed_10m_max",
254
- "timezone": "Asia/Kolkata",
255
  }
256
 
257
  r = requests.get(url, params=params, timeout=20)
258
  r.raise_for_status()
259
- data = r.json()
260
  daily = data.get("daily", {})
261
 
262
  temps_max = [t for t in daily.get("temperature_2m_max", []) if t is not None]
263
  temps_min = [t for t in daily.get("temperature_2m_min", []) if t is not None]
264
- precip = [p for p in daily.get("precipitation_sum", []) if p is not None]
265
- wind = [w for w in daily.get("wind_speed_10m_max", []) if w is not None]
266
 
267
  return {
268
- "temps_max": temps_max,
269
- "temps_min": temps_min,
270
- "precip": precip,
271
- "wind": wind,
272
- "end_date": end_date,
273
  "training_days": len(temps_max),
 
274
  }
275
 
276
 
@@ -2060,6 +2105,58 @@ def ask_climai(q: str = "weather today"):
2060
 
2061
 
2062
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2063
  if __name__ == "__main__":
2064
  import uvicorn # type: ignore[import]
2065
  uvicorn.run(app, host="0.0.0.0", port=8000)
 
240
  # ════════════════════════════════════════════════════════════
241
 
242
  def fetch_training_data(days: int = 90):
243
+ """
244
+ Load temperature data for ML training.
245
+ Priority: 1) saved dataset (data/weather_history.json) for full 5yr history
246
+ 2) live API fallback if file not found
247
+ Using saved data means models train on 5 years instead of 90 days β€”
248
+ dramatically improves prediction accuracy.
249
+ """
250
+ import os as _os
251
+ import json as _json
252
+ dataset_path = "data/weather_history.json"
253
+
254
+ # ── Try loading from saved dataset first ──────────────────────
255
+ if _os.path.exists(dataset_path):
256
+ try:
257
+ with open(dataset_path) as f:
258
+ saved = _json.load(f)
259
+ daily = saved.get("daily", {})
260
+ temps_max = [t for t in daily.get("temperature_2m_max", []) if t is not None]
261
+ temps_min = [t for t in daily.get("temperature_2m_min", []) if t is not None]
262
+ precip = [p for p in daily.get("precipitation_sum", []) if p is not None]
263
+ wind = [w for w in daily.get("wind_speed_10m_max", []) if w is not None]
264
+
265
+ if len(temps_max) >= 14:
266
+ period = saved.get("period", "")
267
+ try:
268
+ end_str = period.split(" to ")[-1].strip()
269
+ end_date = datetime.strptime(end_str, "%Y-%m-%d")
270
+ except Exception:
271
+ end_date = datetime.now() - timedelta(days=7)
272
+
273
+ logger.info(f"[fetch_training_data] Loaded {len(temps_max)} days from saved dataset")
274
+ return {
275
+ "temps_max": temps_max,
276
+ "temps_min": temps_min,
277
+ "precip": precip,
278
+ "wind": wind,
279
+ "end_date": end_date,
280
+ "training_days": len(temps_max),
281
+ "source": "saved_dataset",
282
+ }
283
+ except Exception as e:
284
+ logger.warning(f"[fetch_training_data] Saved dataset load failed: {e} β€” falling back to API")
285
+
286
+ # ── Fallback: live API call ────────────────────────────────────
287
+ logger.info("[fetch_training_data] No saved dataset β€” fetching from Open-Meteo Archive API")
288
+ end_date = datetime.now() - timedelta(days=7)
289
  start_date = end_date - timedelta(days=days)
290
 
291
  url = "https://archive-api.open-meteo.com/v1/archive"
292
  params = {
293
+ "latitude": LAT,
294
+ "longitude": LON,
295
  "start_date": start_date.strftime("%Y-%m-%d"),
296
+ "end_date": end_date.strftime("%Y-%m-%d"),
297
+ "daily": "temperature_2m_max,temperature_2m_min,precipitation_sum,wind_speed_10m_max",
298
+ "timezone": "Asia/Kolkata",
299
  }
300
 
301
  r = requests.get(url, params=params, timeout=20)
302
  r.raise_for_status()
303
+ data = r.json()
304
  daily = data.get("daily", {})
305
 
306
  temps_max = [t for t in daily.get("temperature_2m_max", []) if t is not None]
307
  temps_min = [t for t in daily.get("temperature_2m_min", []) if t is not None]
308
+ precip = [p for p in daily.get("precipitation_sum", []) if p is not None]
309
+ wind = [w for w in daily.get("wind_speed_10m_max", []) if w is not None]
310
 
311
  return {
312
+ "temps_max": temps_max,
313
+ "temps_min": temps_min,
314
+ "precip": precip,
315
+ "wind": wind,
316
+ "end_date": end_date,
317
  "training_days": len(temps_max),
318
+ "source": "live_api",
319
  }
320
 
321
 
 
2105
 
2106
 
2107
 
2108
+
2109
+ # ════════════════════════════════════════════════════════════
2110
+ # /refresh-data β€” Rebuild historical dataset in background
2111
+ # ════════════════════════════════════════════════════════════
2112
+ @app.post("/refresh-data")
2113
+ def refresh_dataset():
2114
+ """
2115
+ Trigger a full dataset rebuild by running build_dataset.py.
2116
+ Run monthly to keep ML training data and LLM context fresh.
2117
+ """
2118
+ import os as _os, subprocess as _subprocess
2119
+ try:
2120
+ if not _os.path.exists("build_dataset.py"):
2121
+ return {"status": "error", "message": "build_dataset.py not found"}
2122
+ _subprocess.Popen(["python", "build_dataset.py"], stdout=_subprocess.DEVNULL, stderr=_subprocess.DEVNULL)
2123
+ return {
2124
+ "status": "started",
2125
+ "message": "Dataset rebuild started in background. Check data/ folder in ~2 minutes.",
2126
+ "files_to_update": ["data/weather_history.json","data/earthquake_history.json","data/aqi_history.json","data/flood_baseline.json","data/llm_context.json"],
2127
+ }
2128
+ except Exception as e:
2129
+ return {"status": "error", "message": str(e)}
2130
+
2131
+
2132
+ @app.get("/dataset-status")
2133
+ def dataset_status():
2134
+ """Check which dataset files exist and when they were last updated."""
2135
+ import os as _os, json as _json
2136
+ files = {
2137
+ "weather_history": "data/weather_history.json",
2138
+ "earthquake_history": "data/earthquake_history.json",
2139
+ "aqi_history": "data/aqi_history.json",
2140
+ "flood_baseline": "data/flood_baseline.json",
2141
+ "llm_context": "data/llm_context.json",
2142
+ }
2143
+ result = {}
2144
+ for key, path in files.items():
2145
+ if _os.path.exists(path):
2146
+ stat = _os.stat(path)
2147
+ try:
2148
+ with open(path) as f:
2149
+ data = _json.load(f)
2150
+ fetched_at = data.get("fetched_at") or data.get("generated_at", "unknown")
2151
+ except Exception:
2152
+ fetched_at = "unknown"
2153
+ result[key] = {"exists": True, "size_kb": round(stat.st_size/1024,1), "fetched_at": fetched_at}
2154
+ else:
2155
+ result[key] = {"exists": False}
2156
+ all_exist = all(v["exists"] for v in result.values())
2157
+ return {"dataset_ready": all_exist, "files": result,
2158
+ "tip": "Run POST /refresh-data to build missing files." if not all_exist else "All dataset files present."}
2159
+
2160
  if __name__ == "__main__":
2161
  import uvicorn # type: ignore[import]
2162
  uvicorn.run(app, host="0.0.0.0", port=8000)