Spaces:
Sleeping
Sleeping
Inital schemeimpactnet deployment
Browse files- .dockerignore +9 -0
- .gitignore +9 -0
- .streamlit/config.toml +16 -0
- Dockerfile +20 -9
- README.md +0 -19
- backend/crud.py +130 -0
- backend/database.py +50 -0
- backend/main.py +44 -0
- backend/routers/districts.py +51 -0
- backend/routers/optimizer.py +99 -0
- backend/routers/predictions.py +19 -0
- backend/schemas.py +77 -0
- data/processed/mnrega_cleaned.csv +0 -0
- data/processed/mnrega_predictions.csv +0 -0
- data/processed/optimized_budget_allocation.csv +707 -0
- data/scraper/mnrega_scraper.py +312 -0
- fix_optimizer.py +41 -0
- frontend/app.py +185 -0
- frontend/pages/districts.py +164 -0
- frontend/pages/home.py +226 -0
- frontend/pages/insights.py +278 -0
- frontend/pages/optimizer.py +240 -0
- frontend/pages/overview.py +159 -0
- frontend/pages/predictions.py +255 -0
- frontend/pages/spatial.py +491 -0
- frontend/theme.py +297 -0
- frontend/utils/api_client.py +128 -0
- hf_start.sh +69 -0
- main.py +50 -0
- overview.txt +92 -0
- reports/model_comparison.csv +4 -0
- reports/model_report.txt +57 -0
- requirements.txt +30 -3
- src/__init__.py +0 -0
- src/clean.py +98 -0
- src/eda.py +201 -0
- src/extract.py +58 -0
- src/features.py +320 -0
- src/generate_synthetic.py +184 -0
- src/model.py +656 -0
- src/optimize.py +210 -0
- src/pipeline.py +212 -0
- src/streamlit_app.py +0 -40
- start.sh +239 -0
.dockerignore
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
.venv/
|
| 3 |
+
wandb/
|
| 4 |
+
|
| 5 |
+
data/schemeimpactnet.db
|
| 6 |
+
data/raw/
|
| 7 |
+
|
| 8 |
+
reports/figures/
|
| 9 |
+
|
.gitignore
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
.venv/
|
| 3 |
+
wandb/
|
| 4 |
+
|
| 5 |
+
data/schemeimpactnet.db
|
| 6 |
+
data/raw/
|
| 7 |
+
|
| 8 |
+
reports/figures/
|
| 9 |
+
|
.streamlit/config.toml
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[server]
|
| 2 |
+
headless = true
|
| 3 |
+
fileWatcherType = "auto"
|
| 4 |
+
|
| 5 |
+
[browser]
|
| 6 |
+
gatherUsageStats = false
|
| 7 |
+
|
| 8 |
+
[theme]
|
| 9 |
+
base = "light"
|
| 10 |
+
backgroundColor = "#FAF9F7"
|
| 11 |
+
secondaryBackgroundColor = "#F5F5F4"
|
| 12 |
+
textColor = "#1C1917"
|
| 13 |
+
font = "serif"
|
| 14 |
+
|
| 15 |
+
[client]
|
| 16 |
+
showSidebarNavigation = true
|
Dockerfile
CHANGED
|
@@ -1,20 +1,31 @@
|
|
| 1 |
-
FROM python:3.
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
WORKDIR /app
|
| 4 |
|
| 5 |
-
|
|
|
|
| 6 |
build-essential \
|
| 7 |
curl \
|
| 8 |
-
git \
|
| 9 |
&& rm -rf /var/lib/apt/lists/*
|
| 10 |
|
| 11 |
-
|
| 12 |
-
COPY
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
-
|
|
|
|
|
|
|
| 15 |
|
| 16 |
-
|
| 17 |
|
| 18 |
-
|
|
|
|
| 19 |
|
| 20 |
-
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
# HF Spaces runs as non-root user 1000
|
| 4 |
+
RUN useradd -m -u 1000 appuser
|
| 5 |
|
| 6 |
WORKDIR /app
|
| 7 |
|
| 8 |
+
# Install system deps
|
| 9 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 10 |
build-essential \
|
| 11 |
curl \
|
|
|
|
| 12 |
&& rm -rf /var/lib/apt/lists/*
|
| 13 |
|
| 14 |
+
# Copy requirements first for layer caching
|
| 15 |
+
COPY requirements.txt .
|
| 16 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 17 |
+
|
| 18 |
+
# Copy entire project
|
| 19 |
+
COPY --chown=appuser:appuser . .
|
| 20 |
|
| 21 |
+
# Create necessary directories
|
| 22 |
+
RUN mkdir -p data/raw data/processed data/db models reports/figures \
|
| 23 |
+
&& chown -R appuser:appuser /app
|
| 24 |
|
| 25 |
+
USER appuser
|
| 26 |
|
| 27 |
+
# HF Spaces exposes port 7860
|
| 28 |
+
EXPOSE 7860
|
| 29 |
|
| 30 |
+
# Entrypoint: generate synthetic data, seed DB, start both services
|
| 31 |
+
CMD ["bash", "hf_start.sh"]
|
README.md
DELETED
|
@@ -1,19 +0,0 @@
|
|
| 1 |
-
---
|
| 2 |
-
title: SchemeImpactNet
|
| 3 |
-
emoji: π
|
| 4 |
-
colorFrom: red
|
| 5 |
-
colorTo: red
|
| 6 |
-
sdk: docker
|
| 7 |
-
app_port: 8501
|
| 8 |
-
tags:
|
| 9 |
-
- streamlit
|
| 10 |
-
pinned: false
|
| 11 |
-
short_description: Predictive analysis of government schemes...
|
| 12 |
-
---
|
| 13 |
-
|
| 14 |
-
# Welcome to Streamlit!
|
| 15 |
-
|
| 16 |
-
Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
|
| 17 |
-
|
| 18 |
-
If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
|
| 19 |
-
forums](https://discuss.streamlit.io).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/crud.py
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
crud.py
|
| 3 |
+
-------
|
| 4 |
+
Database query functions. All queries return plain dicts/lists
|
| 5 |
+
so FastAPI routers stay thin.
|
| 6 |
+
|
| 7 |
+
V3 update: expenditure_lakhs, expenditure_per_personday, demand_fulfillment_rate
|
| 8 |
+
removed β these were synthetic columns dropped in the leak-free pipeline.
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import pandas as pd
|
| 12 |
+
from sqlalchemy.orm import Session
|
| 13 |
+
from sqlalchemy import text
|
| 14 |
+
from typing import Optional, List
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
# ββ Districts βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 18 |
+
|
| 19 |
+
def get_states(db: Session) -> List[str]:
|
| 20 |
+
rows = db.execute(text("SELECT DISTINCT state FROM district_data ORDER BY state")).fetchall()
|
| 21 |
+
return [r[0] for r in rows]
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def get_districts(db: Session, state: str) -> List[str]:
|
| 25 |
+
rows = db.execute(
|
| 26 |
+
text("SELECT DISTINCT district FROM district_data WHERE state=:s ORDER BY district"),
|
| 27 |
+
{"s": state}
|
| 28 |
+
).fetchall()
|
| 29 |
+
return [r[0] for r in rows]
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def get_district_history(db: Session, state: str, district: str) -> List[dict]:
|
| 33 |
+
rows = db.execute(text("""
|
| 34 |
+
SELECT state, district, financial_year,
|
| 35 |
+
person_days_lakhs, avg_wage_rate
|
| 36 |
+
FROM district_data
|
| 37 |
+
WHERE state=:s AND district=:d
|
| 38 |
+
ORDER BY financial_year
|
| 39 |
+
"""), {"s": state, "d": district}).fetchall()
|
| 40 |
+
return [dict(r._mapping) for r in rows]
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def get_top_districts(db: Session, state: Optional[str], metric: str, n: int) -> List[dict]:
|
| 44 |
+
# Only allow metrics that actually exist in V3 data
|
| 45 |
+
valid = {"person_days_lakhs"}
|
| 46 |
+
if metric not in valid:
|
| 47 |
+
metric = "person_days_lakhs"
|
| 48 |
+
where = "WHERE state=:s" if state else ""
|
| 49 |
+
params = {"s": state} if state else {}
|
| 50 |
+
rows = db.execute(text(f"""
|
| 51 |
+
SELECT state, district,
|
| 52 |
+
AVG(person_days_lakhs) as avg_persondays
|
| 53 |
+
FROM district_data
|
| 54 |
+
{where}
|
| 55 |
+
GROUP BY state, district
|
| 56 |
+
ORDER BY AVG({metric}) DESC
|
| 57 |
+
LIMIT :n
|
| 58 |
+
"""), {**params, "n": n}).fetchall()
|
| 59 |
+
return [dict(r._mapping) for r in rows]
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def get_yearly_trend(db: Session, state: Optional[str]) -> List[dict]:
|
| 63 |
+
where = "WHERE state=:s" if state else ""
|
| 64 |
+
params = {"s": state} if state else {}
|
| 65 |
+
rows = db.execute(text(f"""
|
| 66 |
+
SELECT financial_year,
|
| 67 |
+
SUM(person_days_lakhs) as total_persondays,
|
| 68 |
+
AVG(avg_wage_rate) as avg_wage
|
| 69 |
+
FROM district_data
|
| 70 |
+
{where}
|
| 71 |
+
GROUP BY financial_year
|
| 72 |
+
ORDER BY financial_year
|
| 73 |
+
"""), params).fetchall()
|
| 74 |
+
return [dict(r._mapping) for r in rows]
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def get_stats(db: Session) -> dict:
|
| 78 |
+
r = db.execute(text("""
|
| 79 |
+
SELECT
|
| 80 |
+
COUNT(DISTINCT district) as total_districts,
|
| 81 |
+
COUNT(DISTINCT state) as total_states,
|
| 82 |
+
MIN(financial_year)||' β '||MAX(financial_year) as year_range,
|
| 83 |
+
SUM(person_days_lakhs) as total_persondays_lakhs
|
| 84 |
+
FROM district_data
|
| 85 |
+
""")).fetchone()
|
| 86 |
+
base = dict(r._mapping)
|
| 87 |
+
base["total_expenditure_lakhs"] = 0.0 # removed in V3 (synthetic column)
|
| 88 |
+
|
| 89 |
+
# COVID spike
|
| 90 |
+
pre = db.execute(text("SELECT AVG(person_days_lakhs) FROM district_data WHERE financial_year=2019")).scalar()
|
| 91 |
+
post = db.execute(text("SELECT AVG(person_days_lakhs) FROM district_data WHERE financial_year=2020")).scalar()
|
| 92 |
+
base["covid_spike_pct"] = round((post - pre) / pre * 100, 2) if pre else 0.0
|
| 93 |
+
return base
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
# ββ Predictions βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 97 |
+
|
| 98 |
+
def get_predictions(
|
| 99 |
+
db: Session,
|
| 100 |
+
state: Optional[str],
|
| 101 |
+
district: Optional[str],
|
| 102 |
+
year: Optional[int]
|
| 103 |
+
) -> List[dict]:
|
| 104 |
+
clauses, params = [], {}
|
| 105 |
+
if state:
|
| 106 |
+
clauses.append("state=:state"); params["state"] = state
|
| 107 |
+
if district:
|
| 108 |
+
clauses.append("district=:district"); params["district"] = district
|
| 109 |
+
if year:
|
| 110 |
+
clauses.append("financial_year=:year"); params["year"] = year
|
| 111 |
+
where = ("WHERE " + " AND ".join(clauses)) if clauses else ""
|
| 112 |
+
rows = db.execute(text(f"""
|
| 113 |
+
SELECT state, district, financial_year,
|
| 114 |
+
person_days_lakhs, predicted_persondays, prediction_error
|
| 115 |
+
FROM predictions {where}
|
| 116 |
+
ORDER BY state, district, financial_year
|
| 117 |
+
"""), params).fetchall()
|
| 118 |
+
return [dict(r._mapping) for r in rows]
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
# ββ Optimizer βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 122 |
+
|
| 123 |
+
def get_optimizer_results(db: Session, state: Optional[str]) -> List[dict]:
|
| 124 |
+
where = "WHERE state=:s" if state else ""
|
| 125 |
+
params = {"s": state} if state else {}
|
| 126 |
+
rows = db.execute(text(f"""
|
| 127 |
+
SELECT * FROM optimizer {where}
|
| 128 |
+
ORDER BY persondays_gain DESC
|
| 129 |
+
"""), params).fetchall()
|
| 130 |
+
return [dict(r._mapping) for r in rows]
|
backend/database.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
database.py
|
| 3 |
+
-----------
|
| 4 |
+
SQLite database setup using SQLAlchemy.
|
| 5 |
+
Seeds from processed CSVs on first run.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import os
|
| 9 |
+
import pandas as pd
|
| 10 |
+
from sqlalchemy import create_engine, text
|
| 11 |
+
from sqlalchemy.orm import declarative_base, sessionmaker
|
| 12 |
+
|
| 13 |
+
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
| 14 |
+
DB_PATH = os.path.join(BASE_DIR, "data", "schemeimpactnet.db")
|
| 15 |
+
DB_URL = f"sqlite:///{DB_PATH}"
|
| 16 |
+
|
| 17 |
+
engine = create_engine(DB_URL, connect_args={"check_same_thread": False})
|
| 18 |
+
SessionLocal = sessionmaker(bind=engine, autocommit=False, autoflush=False)
|
| 19 |
+
Base = declarative_base()
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def get_db():
|
| 23 |
+
db = SessionLocal()
|
| 24 |
+
try:
|
| 25 |
+
yield db
|
| 26 |
+
finally:
|
| 27 |
+
db.close()
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def seed_database() -> None:
|
| 31 |
+
"""Load processed CSVs into SQLite tables on startup."""
|
| 32 |
+
processed = os.path.join(BASE_DIR, "data", "processed")
|
| 33 |
+
|
| 34 |
+
files = {
|
| 35 |
+
"district_data": os.path.join(processed, "mnrega_cleaned.csv"),
|
| 36 |
+
"predictions": os.path.join(processed, "mnrega_predictions.csv"),
|
| 37 |
+
"optimizer": os.path.join(processed, "optimized_budget_allocation.csv"),
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
with engine.connect() as conn:
|
| 41 |
+
for table, path in files.items():
|
| 42 |
+
if not os.path.exists(path):
|
| 43 |
+
print(f"[db] WARNING: {path} not found, skipping")
|
| 44 |
+
continue
|
| 45 |
+
df = pd.read_csv(path)
|
| 46 |
+
df.to_sql(table, conn, if_exists="replace", index=False)
|
| 47 |
+
print(f"[db] Seeded '{table}': {len(df)} rows")
|
| 48 |
+
conn.commit()
|
| 49 |
+
|
| 50 |
+
print("[db] Database ready β")
|
backend/main.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
backend/main.py
|
| 3 |
+
---------------
|
| 4 |
+
FastAPI application entry point.
|
| 5 |
+
|
| 6 |
+
Run with:
|
| 7 |
+
uvicorn backend.main:app --reload --port 8000
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
from fastapi import FastAPI
|
| 11 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 12 |
+
from backend.database import seed_database
|
| 13 |
+
from backend.routers.districts import router as districts_router
|
| 14 |
+
from backend.routers.predictions import router as predictions_router
|
| 15 |
+
from backend.routers.optimizer import router as optimizer_router
|
| 16 |
+
|
| 17 |
+
app = FastAPI(
|
| 18 |
+
title="SchemeImpactNet API",
|
| 19 |
+
description="MNREGA district-level forecasting and budget optimization",
|
| 20 |
+
version="1.0.0"
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
app.add_middleware(
|
| 24 |
+
CORSMiddleware,
|
| 25 |
+
allow_origins=["*"],
|
| 26 |
+
allow_methods=["*"],
|
| 27 |
+
allow_headers=["*"],
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
@app.on_event("startup")
|
| 31 |
+
def startup():
|
| 32 |
+
seed_database()
|
| 33 |
+
|
| 34 |
+
app.include_router(districts_router)
|
| 35 |
+
app.include_router(predictions_router)
|
| 36 |
+
app.include_router(optimizer_router)
|
| 37 |
+
|
| 38 |
+
@app.get("/")
|
| 39 |
+
def root():
|
| 40 |
+
return {"project": "SchemeImpactNet", "version": "1.0.0", "docs": "/docs"}
|
| 41 |
+
|
| 42 |
+
@app.get("/health")
|
| 43 |
+
def health():
|
| 44 |
+
return {"status": "ok"}
|
backend/routers/districts.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""routers/districts.py β District data endpoints."""
|
| 2 |
+
|
| 3 |
+
from fastapi import APIRouter, Depends, Query
|
| 4 |
+
from sqlalchemy.orm import Session
|
| 5 |
+
from typing import Optional, List
|
| 6 |
+
from backend.database import get_db
|
| 7 |
+
from backend import crud
|
| 8 |
+
|
| 9 |
+
router = APIRouter(prefix="/districts", tags=["Districts"])
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@router.get("/states")
|
| 13 |
+
def list_states(db: Session = Depends(get_db)):
|
| 14 |
+
return crud.get_states(db)
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
@router.get("/list")
|
| 18 |
+
def list_districts(state: str = Query(...), db: Session = Depends(get_db)):
|
| 19 |
+
return crud.get_districts(db, state)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
@router.get("/history")
|
| 23 |
+
def district_history(
|
| 24 |
+
state: str = Query(...),
|
| 25 |
+
district: str = Query(...),
|
| 26 |
+
db: Session = Depends(get_db)
|
| 27 |
+
):
|
| 28 |
+
return crud.get_district_history(db, state, district)
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
@router.get("/top")
|
| 32 |
+
def top_districts(
|
| 33 |
+
state: Optional[str] = Query(None),
|
| 34 |
+
metric: str = Query("person_days_lakhs"),
|
| 35 |
+
n: int = Query(10),
|
| 36 |
+
db: Session = Depends(get_db)
|
| 37 |
+
):
|
| 38 |
+
return crud.get_top_districts(db, state, metric, n)
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
@router.get("/trend")
|
| 42 |
+
def yearly_trend(
|
| 43 |
+
state: Optional[str] = Query(None),
|
| 44 |
+
db: Session = Depends(get_db)
|
| 45 |
+
):
|
| 46 |
+
return crud.get_yearly_trend(db, state)
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
@router.get("/stats")
|
| 50 |
+
def stats(db: Session = Depends(get_db)):
|
| 51 |
+
return crud.get_stats(db)
|
backend/routers/optimizer.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""routers/optimizer.py β Budget optimizer endpoints."""
|
| 2 |
+
|
| 3 |
+
from fastapi import APIRouter, Depends, Query
|
| 4 |
+
from sqlalchemy.orm import Session
|
| 5 |
+
from typing import Optional
|
| 6 |
+
from backend.database import get_db
|
| 7 |
+
from backend import crud
|
| 8 |
+
from backend.schemas import OptimizerRequest, OptimizerResponse
|
| 9 |
+
|
| 10 |
+
router = APIRouter(prefix="/optimizer", tags=["Optimizer"])
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
@router.get("/results")
|
| 14 |
+
def get_optimizer_results(
|
| 15 |
+
state: Optional[str] = Query(None),
|
| 16 |
+
db: Session = Depends(get_db)
|
| 17 |
+
):
|
| 18 |
+
return crud.get_optimizer_results(db, state)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
@router.post("/run", response_model=OptimizerResponse)
|
| 22 |
+
def run_optimizer_live(req: OptimizerRequest, db: Session = Depends(get_db)):
|
| 23 |
+
"""
|
| 24 |
+
Run LP optimizer live with custom parameters.
|
| 25 |
+
Reads predictions from DB, runs scipy LP, returns results.
|
| 26 |
+
"""
|
| 27 |
+
import numpy as np
|
| 28 |
+
from scipy.optimize import linprog
|
| 29 |
+
from sqlalchemy import text
|
| 30 |
+
|
| 31 |
+
# Get latest year predictions + budget
|
| 32 |
+
state_clause = "AND p.state=:s" if req.state else ""
|
| 33 |
+
params = {"s": req.state} if req.state else {}
|
| 34 |
+
|
| 35 |
+
rows = db.execute(text(f"""
|
| 36 |
+
SELECT p.state, p.district,
|
| 37 |
+
p.predicted_persondays,
|
| 38 |
+
o.budget_allocated_lakhs,
|
| 39 |
+
o.persondays_per_lakh
|
| 40 |
+
FROM predictions p
|
| 41 |
+
JOIN optimizer o ON p.district = o.district AND p.state = o.state
|
| 42 |
+
WHERE p.financial_year = (SELECT MAX(financial_year) FROM predictions)
|
| 43 |
+
{state_clause}
|
| 44 |
+
"""), params).fetchall()
|
| 45 |
+
|
| 46 |
+
if not rows:
|
| 47 |
+
return OptimizerResponse(
|
| 48 |
+
scope=req.state or "All-India",
|
| 49 |
+
total_budget_lakhs=0, sq_persondays_total=0,
|
| 50 |
+
opt_persondays_total=0, gain_lakhs=0, gain_pct=0, districts=[]
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
import pandas as pd
|
| 54 |
+
df = pd.DataFrame([dict(r._mapping) for r in rows]).dropna()
|
| 55 |
+
|
| 56 |
+
budgets = df["budget_allocated_lakhs"].values * req.budget_scale
|
| 57 |
+
efficiency = df["persondays_per_lakh"].values
|
| 58 |
+
total_bud = budgets.sum()
|
| 59 |
+
|
| 60 |
+
lb = budgets * req.min_fraction
|
| 61 |
+
ub = budgets * req.max_fraction
|
| 62 |
+
|
| 63 |
+
res = linprog(-efficiency, A_ub=[np.ones(len(df))],
|
| 64 |
+
b_ub=[total_bud], bounds=list(zip(lb, ub)), method="highs")
|
| 65 |
+
|
| 66 |
+
opt_budgets = res.x if res.success else budgets
|
| 67 |
+
sq_total = float((efficiency * budgets).sum())
|
| 68 |
+
opt_total = float((efficiency * opt_budgets).sum())
|
| 69 |
+
|
| 70 |
+
districts_out = []
|
| 71 |
+
for i, row in df.iterrows():
|
| 72 |
+
orig = budgets[df.index.get_loc(i)]
|
| 73 |
+
opt = opt_budgets[df.index.get_loc(i)]
|
| 74 |
+
sq_pd = float(efficiency[df.index.get_loc(i)] * orig)
|
| 75 |
+
opt_pd = float(efficiency[df.index.get_loc(i)] * opt)
|
| 76 |
+
districts_out.append({
|
| 77 |
+
"state": row["state"],
|
| 78 |
+
"district": row["district"],
|
| 79 |
+
"budget_allocated_lakhs": round(orig, 2),
|
| 80 |
+
"optimized_budget": round(opt, 2),
|
| 81 |
+
"budget_change": round(opt - orig, 2),
|
| 82 |
+
"budget_change_pct": round((opt - orig) / orig * 100, 2),
|
| 83 |
+
"sq_persondays": round(sq_pd, 3),
|
| 84 |
+
"opt_persondays": round(opt_pd, 3),
|
| 85 |
+
"persondays_gain": round(opt_pd - sq_pd, 3),
|
| 86 |
+
"persondays_gain_pct": round((opt_pd - sq_pd) / sq_pd * 100, 2) if sq_pd else 0,
|
| 87 |
+
"persondays_per_lakh": round(float(efficiency[df.index.get_loc(i)]), 4),
|
| 88 |
+
})
|
| 89 |
+
|
| 90 |
+
gain = opt_total - sq_total
|
| 91 |
+
return OptimizerResponse(
|
| 92 |
+
scope=req.state or "All-India",
|
| 93 |
+
total_budget_lakhs=round(total_bud, 2),
|
| 94 |
+
sq_persondays_total=round(sq_total, 2),
|
| 95 |
+
opt_persondays_total=round(opt_total, 2),
|
| 96 |
+
gain_lakhs=round(gain, 2),
|
| 97 |
+
gain_pct=round(gain / sq_total * 100, 2) if sq_total else 0,
|
| 98 |
+
districts=districts_out
|
| 99 |
+
)
|
backend/routers/predictions.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""routers/predictions.py β Model prediction endpoints."""
|
| 2 |
+
|
| 3 |
+
from fastapi import APIRouter, Depends, Query
|
| 4 |
+
from sqlalchemy.orm import Session
|
| 5 |
+
from typing import Optional
|
| 6 |
+
from backend.database import get_db
|
| 7 |
+
from backend import crud
|
| 8 |
+
|
| 9 |
+
router = APIRouter(prefix="/predictions", tags=["Predictions"])
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@router.get("/")
|
| 13 |
+
def get_predictions(
|
| 14 |
+
state: Optional[str] = Query(None),
|
| 15 |
+
district: Optional[str] = Query(None),
|
| 16 |
+
year: Optional[int] = Query(None),
|
| 17 |
+
db: Session = Depends(get_db)
|
| 18 |
+
):
|
| 19 |
+
return crud.get_predictions(db, state, district, year)
|
backend/schemas.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
schemas.py
|
| 3 |
+
----------
|
| 4 |
+
Pydantic schemas for API request/response validation.
|
| 5 |
+
|
| 6 |
+
V3 update: expenditure_lakhs, expenditure_per_personday, demand_fulfillment_rate
|
| 7 |
+
removed from DistrictSummary β synthetic columns dropped in leak-free pipeline.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
from pydantic import BaseModel
|
| 11 |
+
from typing import Optional, List
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class DistrictSummary(BaseModel):
|
| 15 |
+
state: str
|
| 16 |
+
district: str
|
| 17 |
+
financial_year: int
|
| 18 |
+
person_days_lakhs: float
|
| 19 |
+
avg_wage_rate: float
|
| 20 |
+
|
| 21 |
+
class Config:
|
| 22 |
+
from_attributes = True
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class PredictionOut(BaseModel):
|
| 26 |
+
state: str
|
| 27 |
+
district: str
|
| 28 |
+
financial_year: int
|
| 29 |
+
person_days_lakhs: float
|
| 30 |
+
predicted_persondays: float
|
| 31 |
+
prediction_error: float
|
| 32 |
+
|
| 33 |
+
class Config:
|
| 34 |
+
from_attributes = True
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
class OptimizerOut(BaseModel):
|
| 38 |
+
state: str
|
| 39 |
+
district: str
|
| 40 |
+
budget_allocated_lakhs: float
|
| 41 |
+
optimized_budget: float
|
| 42 |
+
budget_change: float
|
| 43 |
+
budget_change_pct: float
|
| 44 |
+
sq_persondays: float
|
| 45 |
+
opt_persondays: float
|
| 46 |
+
persondays_gain: float
|
| 47 |
+
persondays_gain_pct: float
|
| 48 |
+
persondays_per_lakh: float
|
| 49 |
+
|
| 50 |
+
class Config:
|
| 51 |
+
from_attributes = True
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
class OptimizerRequest(BaseModel):
|
| 55 |
+
state: Optional[str] = None
|
| 56 |
+
budget_scale: float = 1.0
|
| 57 |
+
min_fraction: float = 0.40
|
| 58 |
+
max_fraction: float = 2.50
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
class OptimizerResponse(BaseModel):
|
| 62 |
+
scope: str
|
| 63 |
+
total_budget_lakhs: float
|
| 64 |
+
sq_persondays_total: float
|
| 65 |
+
opt_persondays_total: float
|
| 66 |
+
gain_lakhs: float
|
| 67 |
+
gain_pct: float
|
| 68 |
+
districts: List[OptimizerOut]
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
class StatsOut(BaseModel):
|
| 72 |
+
total_districts: int
|
| 73 |
+
total_states: int
|
| 74 |
+
year_range: str
|
| 75 |
+
total_persondays_lakhs: float
|
| 76 |
+
total_expenditure_lakhs: float # kept for API compat, always 0.0 in V3
|
| 77 |
+
covid_spike_pct: float
|
data/processed/mnrega_cleaned.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/processed/mnrega_predictions.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/processed/optimized_budget_allocation.csv
ADDED
|
@@ -0,0 +1,707 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
state,district,budget_allocated_lakhs,optimized_budget,budget_change,budget_change_pct,sq_persondays,opt_persondays,persondays_gain,persondays_gain_pct,persondays_per_lakh
|
| 2 |
+
Rajasthan,Jhalawar,34513.79,61420.08,26906.29,77.96,142.07,252.825,110.755,77.96,0.004116325677359687
|
| 3 |
+
Rajasthan,Bhilwara,63021.65,96309.49,33287.840000000004,52.82,207.529,317.145,109.616,52.82,0.0032929794761006733
|
| 4 |
+
Rajasthan,Jodhpur,43157.28,72509.08,29351.800000000003,68.01,154.84,260.149,105.309,68.01,0.00358780720193673
|
| 5 |
+
Odisha,Ganjam,22347.28,40981.25,18633.97,83.38,115.335,211.505,96.17,83.38,0.005161030783164663
|
| 6 |
+
Tamil Nadu,Chengalpattu,22102.08,40171.85,18069.769999999997,81.76,103.364,187.87,84.506,81.76,0.00467666391579435
|
| 7 |
+
Andhra Pradesh,Prakasam,44170.36,66462.53,22292.17,50.47,143.216,215.495,72.279,50.47,0.0032423552807810487
|
| 8 |
+
Gujarat,Dohad,28223.4,47928.91,19705.510000000002,69.82,103.06,175.016,71.956,69.82,0.003651579894697308
|
| 9 |
+
Uttar Pradesh,Sitapur,34734.96,55845.88,21110.92,60.78,117.944,189.627,71.683,60.78,0.0033955415523725953
|
| 10 |
+
Tamil Nadu,Ramanathapuram,24985.21,43423.92,18438.71,73.8,96.415,167.568,71.153,73.8,0.0038588829151325926
|
| 11 |
+
Uttar Pradesh,Siddharth Nagar,33779.28,54065.01,20285.730000000003,60.05,114.38,183.069,68.689,60.05,0.003386099407684237
|
| 12 |
+
Tamil Nadu,Dindigul,33099.97,53277.05,20177.08,60.96,112.531,181.128,68.597,60.96,0.003399731178004089
|
| 13 |
+
Rajasthan,Ajmer,64856.7,86446.01,21589.309999999998,33.29,198.554,264.648,66.094,33.29,0.0030614261903550446
|
| 14 |
+
Odisha,Koraput,24902.06,42423.72,17521.66,70.36,92.406,157.425,65.019,70.36,0.0037107773413123254
|
| 15 |
+
Rajasthan,Nagaur,85238.46,106521.25,21282.789999999994,24.97,255.112,318.81,63.698,24.97,0.00299292127051568
|
| 16 |
+
Tamil Nadu,Kanchipuram,20938.58,36656.01,15717.43,75.06,82.417,144.283,61.866,75.06,0.003936131294481287
|
| 17 |
+
Andhra Pradesh,Kurnool,29973.18,47160.09,17186.909999999996,57.34,100.049,157.418,57.369,57.34,0.003337950794677108
|
| 18 |
+
Rajasthan,Bikaner,34206.5,51717.5,17511.0,51.19,111.376,168.392,57.016,51.19,0.0032559893587476066
|
| 19 |
+
Rajasthan,Banswara,60504.32,78675.2,18170.879999999997,30.03,183.15,238.154,55.004,30.03,0.003027056580422687
|
| 20 |
+
Maharashtra,Palghar,21062.69,35501.96,14439.27,68.55,76.026,128.145,52.119,68.55,0.0036095104661370415
|
| 21 |
+
Odisha,Kendujhar,22443.48,37179.9,14736.420000000002,65.66,78.262,129.649,51.387,65.66,0.0034870706325400517
|
| 22 |
+
Andhra Pradesh,Nellore,33897.52,49717.74,15820.220000000001,46.67,108.913,159.743,50.83,46.67,0.0032130079132632714
|
| 23 |
+
Tamil Nadu,Sivagangai,28850.77,44141.83,15291.060000000001,53.0,95.105,145.511,50.406,53.0,0.0032964458140978562
|
| 24 |
+
Tamil Nadu,Pudukkottai,42900.64,59043.35,16142.71,37.63,132.89,182.894,50.004,37.63,0.0030976227860470143
|
| 25 |
+
Tamil Nadu,Tirupathur,14446.82,25866.05,11419.23,79.04,61.585,110.264,48.679,79.04,0.0042628758439573556
|
| 26 |
+
Madhya Pradesh,Balaghat,29396.28,44285.34,14889.059999999998,50.65,95.607,144.031,48.424,50.65,0.0032523502973845673
|
| 27 |
+
Tamil Nadu,Karur,15300.44,27117.68,11817.24,77.23,62.237,110.306,48.069,77.24,0.004067660799297275
|
| 28 |
+
Uttar Pradesh,Kheri,26011.01,40361.48,14350.470000000005,55.17,86.324,133.95,47.626,55.17,0.0033187484838151232
|
| 29 |
+
Odisha,Bolangir,28454.31,43072.11,14617.8,51.37,92.675,140.285,47.61,51.37,0.0032569758324837252
|
| 30 |
+
Tamil Nadu,Vellore,21076.41,34419.69,13343.280000000002,63.31,72.252,117.994,45.742,63.31,0.003428098048956155
|
| 31 |
+
Tamil Nadu,Nagapattinam,11359.29,20728.36,9369.07,82.48,55.317,100.942,45.625,82.48,0.004869758585263691
|
| 32 |
+
Andhra Pradesh,Palnadu,25215.12,38852.88,13637.759999999998,54.09,83.554,128.745,45.191,54.09,0.0033136467325953637
|
| 33 |
+
Andhra Pradesh,Anantapur,37192.75,51524.01,14331.260000000002,38.53,115.475,159.97,44.495,38.53,0.0031047717633140865
|
| 34 |
+
Tamil Nadu,Thoothukkudi,21805.35,34860.85,13055.5,59.87,73.829,118.033,44.204,59.87,0.0033858204523201873
|
| 35 |
+
Chhattisgarh,Kanker,18867.61,31119.62,12252.009999999998,64.94,65.337,107.765,42.428,64.94,0.003462918726855177
|
| 36 |
+
Maharashtra,Parbhani,21829.87,34110.45,12280.579999999998,56.26,72.635,113.496,40.861,56.26,0.0033273216927082027
|
| 37 |
+
Uttar Pradesh,Prayagraj,20807.27,32926.51,12119.240000000002,58.25,69.884,110.588,40.704,58.25,0.0033586337852106497
|
| 38 |
+
Assam,Dhubri,9215.78,16916.9,7701.120000000001,83.56,48.286,88.636,40.35,83.56,0.0052394913941088
|
| 39 |
+
Andhra Pradesh,Tirupati,33787.19,46622.89,12835.699999999997,37.99,104.842,144.671,39.829,37.99,0.003103010342085269
|
| 40 |
+
Rajasthan,Karauli,8015.52,14858.61,6843.09,85.37,46.365,85.948,39.583,85.37,0.005784403257680101
|
| 41 |
+
Madhya Pradesh,Dindori,31667.27,44213.05,12545.780000000002,39.62,99.078,138.33,39.252,39.62,0.0031287193370315787
|
| 42 |
+
Tamil Nadu,Tenkasi,13610.81,23532.3,9921.49,72.89,52.101,90.08,37.979,72.89,0.003827913254244237
|
| 43 |
+
Odisha,Nabarangapur,17937.75,28807.36,10869.61,60.6,60.902,97.806,36.904,60.6,0.0033951861298100374
|
| 44 |
+
Chhattisgarh,Rajnandagon,17478.81,28291.6,10812.789999999997,61.86,59.602,96.473,36.871,61.86,0.0034099575428762022
|
| 45 |
+
Uttar Pradesh,Fatehpur,14174.0,24121.52,9947.52,70.18,52.156,88.76,36.604,70.18,0.003679695216593763
|
| 46 |
+
Odisha,Dhenkanal,10094.64,18219.82,8125.18,80.49,45.315,81.789,36.474,80.49,0.004489015953020613
|
| 47 |
+
Tamil Nadu,Tiruvarur,18963.31,29562.69,10599.379999999997,55.89,63.079,98.336,35.257,55.89,0.003326370765441265
|
| 48 |
+
Uttar Pradesh,Sant Kabeer Nagar,21731.81,32581.65,10849.84,49.93,70.258,105.335,35.077,49.93,0.0032329566658276503
|
| 49 |
+
Tamil Nadu,Tirunelveli,12376.02,21419.81,9043.79,73.08,47.512,82.231,34.719,73.07,0.003839037105628465
|
| 50 |
+
Tamil Nadu,Erode,18763.56,29149.49,10385.93,55.35,62.324,96.821,34.497,55.35,0.003321544525665705
|
| 51 |
+
Himachal Pradesh,Mandi,36291.24,47584.16,11292.920000000006,31.12,110.375,144.721,34.346,31.12,0.0030413675586725615
|
| 52 |
+
Odisha,Mayurbhanj,35253.13,46478.04,11224.910000000003,31.84,107.416,141.618,34.202,31.84,0.0030469918557586234
|
| 53 |
+
Karnataka,Vijayanagara,22187.48,32662.92,10475.439999999999,47.21,71.342,105.025,33.683,47.21,0.0032154169829110833
|
| 54 |
+
Rajasthan,Alwar,10564.01,18780.4,8216.390000000001,77.78,43.177,76.759,33.582,77.78,0.0040871790163015745
|
| 55 |
+
Uttar Pradesh,Banda,10978.28,19377.9,8399.62,76.51,43.867,77.43,33.563,76.51,0.003995798977617622
|
| 56 |
+
Karnataka,Belagavi,35936.08,46923.51,10987.43,30.57,109.043,142.383,33.34,30.58,0.003034359896794531
|
| 57 |
+
Maharashtra,Hingoli,11616.64,20231.56,8614.920000000002,74.16,44.881,78.165,33.284,74.16,0.0038635095862486917
|
| 58 |
+
Odisha,Jajpur,4483.12,8415.9,3932.7799999999997,87.72,36.597,68.701,32.104,87.72,0.008163288067238888
|
| 59 |
+
Rajasthan,Tonk,12727.92,21545.47,8817.550000000001,69.28,46.331,78.428,32.097,69.28,0.0036401077316639326
|
| 60 |
+
Andhra Pradesh,Bapatla,22944.75,33030.79,10086.04,43.96,72.956,105.026,32.07,43.96,0.003179638043561163
|
| 61 |
+
Jharkhand,Giridih,31209.94,41542.57,10332.630000000001,33.11,95.496,127.112,31.616,33.11,0.0030597944116521852
|
| 62 |
+
Uttar Pradesh,Amethi,13338.55,22217.28,8878.73,66.56,47.308,78.798,31.49,66.56,0.003546712348793535
|
| 63 |
+
Uttar Pradesh,Barabanki,27689.45,37858.09,10168.639999999996,36.72,85.624,117.068,31.444,36.72,0.003092296885636948
|
| 64 |
+
Assam,Barpeta,5231.46,9754.48,4523.0199999999995,86.46,35.876,66.894,31.018,86.46,0.006857741433557744
|
| 65 |
+
Tamil Nadu,Krishnagiri,34206.25,44293.61,10087.36,29.49,103.378,133.864,30.486,29.49,0.00302219623606797
|
| 66 |
+
Odisha,Angul,8309.35,15027.6,6718.25,80.85,37.567,67.941,30.374,80.85,0.004521051586465849
|
| 67 |
+
Uttar Pradesh,Rae Bareli,12541.8,20980.9,8439.100000000002,67.29,44.797,74.94,30.143,67.29,0.0035718158478049403
|
| 68 |
+
Tamil Nadu,Tiruppur,19337.98,28677.91,9339.93,48.3,62.249,92.314,30.065,48.3,0.00321900219154224
|
| 69 |
+
Uttar Pradesh,Balrampur,16982.83,26075.96,9093.129999999997,53.54,56.06,86.076,30.016,53.54,0.0033009810496837095
|
| 70 |
+
Uttar Pradesh,Jhansi,13783.26,22434.53,8651.269999999999,62.77,47.131,76.713,29.582,62.77,0.0034194377817729623
|
| 71 |
+
Uttar Pradesh,Ballia,13814.85,22460.97,8646.12,62.59,47.224,76.779,29.555,62.58,0.003418350543075024
|
| 72 |
+
Andhra Pradesh,Y.S.R,25967.38,35456.65,9489.27,36.54,80.296,109.639,29.343,36.54,0.0030921871979383364
|
| 73 |
+
Rajasthan,Jalore,22667.38,31934.59,9267.21,40.88,71.417,100.615,29.198,40.88,0.0031506508471645157
|
| 74 |
+
Tamil Nadu,Mayiladuthurai,22279.07,31306.94,9027.869999999999,40.52,69.986,98.346,28.36,40.52,0.003141333996437015
|
| 75 |
+
Madhya Pradesh,Bhind,1558.31,2973.23,1414.92,90.8,30.47,58.136,27.666,90.8,0.019553233952166127
|
| 76 |
+
Rajasthan,Sikar,11102.46,18613.22,7510.760000000002,67.65,39.731,66.609,26.878,67.65,0.0035785762794912123
|
| 77 |
+
Rajasthan,Dholpur,4089.41,7662.02,3572.6100000000006,87.36,30.37,56.902,26.532,87.36,0.007426499177142914
|
| 78 |
+
Rajasthan,Jaipur,15021.74,23037.68,8015.9400000000005,53.36,49.572,76.025,26.453,53.36,0.0033000171751075445
|
| 79 |
+
Odisha,Bargarh,10240.79,17353.84,7113.049999999999,69.46,37.278,63.171,25.893,69.46,0.0036401488557035147
|
| 80 |
+
Jharkhand,Sahebganj,8898.15,15529.22,6631.07,74.52,34.723,60.599,25.876,74.52,0.0039022718205469677
|
| 81 |
+
Madhya Pradesh,Rajgarh,19739.9,27881.66,8141.759999999998,41.25,62.223,87.887,25.664,41.25,0.0031521436278805865
|
| 82 |
+
Kerala,Palakkad,26115.51,34289.23,8173.720000000005,31.3,79.43,104.29,24.86,31.3,0.003041487606407074
|
| 83 |
+
Rajasthan,Sawai Madhopur,5839.71,10698.51,4858.8,83.2,29.433,53.922,24.489,83.2,0.005040147541573126
|
| 84 |
+
Odisha,Rayagada,14561.44,22068.4,7506.960000000001,51.55,47.501,71.99,24.489,51.55,0.0032621086925468906
|
| 85 |
+
Odisha,Bhadrak,5067.22,9356.6,4289.38,84.65,28.68,52.957,24.277,84.65,0.005659908194236681
|
| 86 |
+
Andhra Pradesh,Guntur,8888.24,15270.81,6382.57,71.81,33.552,57.645,24.093,71.81,0.003774875565916312
|
| 87 |
+
Telangana,Mahabubabad,15590.57,22895.0,7304.43,46.85,50.113,73.592,23.479,46.85,0.0032143148069634403
|
| 88 |
+
Assam,Nalbari,6273.44,11379.66,5106.22,81.39,28.501,51.699,23.198,81.39,0.004543121477211865
|
| 89 |
+
Uttar Pradesh,Chitrakoot,6662.45,11916.61,5254.160000000001,78.86,28.349,50.706,22.357,78.86,0.0042550413136308715
|
| 90 |
+
Kerala,Thrissur,15471.21,22467.9,6996.690000000002,45.22,49.421,71.771,22.35,45.22,0.003194384925290265
|
| 91 |
+
Chhattisgarh,Gariyaband,18272.41,25379.3,7106.889999999999,38.89,56.97,79.128,22.158,38.89,0.0031178153292313383
|
| 92 |
+
Maharashtra,Chandrapur,22202.39,29432.44,7230.049999999999,32.56,67.848,89.942,22.094,32.56,0.0030558872265553393
|
| 93 |
+
Kerala,Kottayam,11619.93,18240.88,6620.950000000001,56.98,38.75,60.829,22.079,56.98,0.003334787731079275
|
| 94 |
+
Andhra Pradesh,West Godavari,12840.14,19552.6,6712.459999999999,52.28,42.123,64.144,22.021,52.28,0.0032805717071620714
|
| 95 |
+
Uttar Pradesh,Ambedkar Nagar,14554.18,21399.36,6845.18,47.03,46.794,68.802,22.008,47.03,0.003215158806610884
|
| 96 |
+
Karnataka,Bagalkote,14315.73,21152.33,6836.600000000002,47.76,46.059,68.055,21.996,47.76,0.0032173699839267715
|
| 97 |
+
Odisha,Nayagarh,6035.34,10904.1,4868.76,80.67,27.266,49.262,21.996,80.67,0.004517723939330675
|
| 98 |
+
Assam,Darrang,9428.32,15653.07,6224.75,66.02,33.135,55.011,21.876,66.02,0.0035144118994688343
|
| 99 |
+
Telangana,Sangareddy,17704.1,24621.97,6917.870000000003,39.07,55.268,76.864,21.596,39.08,0.003121762755519908
|
| 100 |
+
Madhya Pradesh,Chhatarpur,13676.71,20356.55,6679.84,48.84,44.083,65.614,21.531,48.84,0.0032232166946582915
|
| 101 |
+
Uttar Pradesh,Aligarh,10684.99,17043.76,6358.769999999999,59.51,36.134,57.638,21.504,59.51,0.003381753281940367
|
| 102 |
+
Tamil Nadu,Perambalur,12799.26,19328.31,6529.050000000001,51.01,41.663,62.916,21.253,51.01,0.003255110061050404
|
| 103 |
+
Madhya Pradesh,Rewa,8667.3,14546.36,5879.060000000001,67.83,31.095,52.187,21.092,67.83,0.0035876224429753213
|
| 104 |
+
Kerala,Ernakulam,14145.08,20695.53,6550.449999999999,46.31,45.419,66.452,21.033,46.31,0.00321093977552619
|
| 105 |
+
Karnataka,Davanagere,9489.86,15532.13,6042.269999999999,63.67,32.601,53.358,20.757,63.67,0.0034353509956943514
|
| 106 |
+
Uttar Pradesh,Mau,15310.59,21846.99,6536.4000000000015,42.69,48.487,69.187,20.7,42.69,0.003166892980610153
|
| 107 |
+
Uttar Pradesh,Pilibhit,11020.85,17200.79,6179.9400000000005,56.07,36.668,57.23,20.562,56.08,0.0033271480874887144
|
| 108 |
+
Madhya Pradesh,Shivpuri,25035.6,31739.39,6703.790000000001,26.78,75.217,95.358,20.141,26.78,0.0030044017319337266
|
| 109 |
+
Uttar Pradesh,Kashganj,7366.96,12683.76,5316.8,72.17,27.865,47.975,20.11,72.17,0.003782428572979899
|
| 110 |
+
Uttar Pradesh,Gonda,16865.32,23302.93,6437.610000000001,38.17,52.354,72.338,19.984,38.17,0.003104239943268198
|
| 111 |
+
Madhya Pradesh,Satna,10580.94,16571.61,5990.67,56.62,35.268,55.236,19.968,56.62,0.0033331632161225752
|
| 112 |
+
Chhattisgarh,Durg,13500.48,19703.59,6203.110000000001,45.95,43.314,63.216,19.902,45.95,0.0032083303704757166
|
| 113 |
+
Bihar,Sitamarhi,24346.93,30954.38,6607.450000000001,27.14,73.172,93.03,19.858,27.14,0.0030053891804839457
|
| 114 |
+
Uttar Pradesh,Varanasi,6283.68,11102.77,4819.09,76.69,25.208,44.541,19.333,76.69,0.004011661956051231
|
| 115 |
+
Andhra Pradesh,Krishna,25808.32,32158.93,6350.610000000001,24.61,77.234,96.239,19.005,24.61,0.002992600835699495
|
| 116 |
+
Uttar Pradesh,Amroha,12087.7,17882.14,5794.439999999999,47.94,38.897,57.543,18.646,47.94,0.0032178991867766404
|
| 117 |
+
Jharkhand,Jamtara,16680.67,22715.96,6035.290000000001,36.18,51.466,70.087,18.621,36.18,0.003085367674080238
|
| 118 |
+
Chhattisgarh,Dhamtari,12806.5,18621.26,5814.759999999998,45.4,40.929,59.513,18.584,45.41,0.0031959551790106586
|
| 119 |
+
Jammu and Kashmir,Kishtwar,10612.18,16198.31,5586.129999999999,52.64,34.874,53.231,18.357,52.64,0.003286223942677188
|
| 120 |
+
Nagaland,Dimapur,3211.34,5976.19,2764.8499999999995,86.1,20.958,39.002,18.044,86.1,0.006526247610031948
|
| 121 |
+
Kerala,Pathanamthitta,10115.32,15567.96,5452.639999999999,53.9,33.417,51.43,18.013,53.9,0.003303602851911754
|
| 122 |
+
Uttarakhand,Tehri Garhwal,8177.31,13369.08,5191.7699999999995,63.49,28.057,45.87,17.813,63.49,0.0034310794136458074
|
| 123 |
+
Assam,Bongaigaon,2252.34,4236.33,1983.9899999999998,88.09,19.666,36.989,17.323,88.09,0.008731363826065337
|
| 124 |
+
Uttar Pradesh,Unnao,13438.3,18908.04,5469.740000000002,40.7,42.253,59.451,17.198,40.7,0.003144222111427785
|
| 125 |
+
Uttar Pradesh,Mirzapur,14179.63,19669.03,5489.4,38.71,44.175,61.277,17.102,38.71,0.0031153845340111132
|
| 126 |
+
Odisha,Cuttack,2650.32,4960.91,2310.5899999999997,87.18,19.577,36.645,17.068,87.18,0.007386655196353648
|
| 127 |
+
Andhra Pradesh,Ntr,25273.44,30989.65,5716.210000000003,22.62,75.382,92.431,17.049,22.62,0.0029826568919782987
|
| 128 |
+
Madhya Pradesh,Sidhi,13475.68,18887.52,5411.84,40.16,42.249,59.216,16.967,40.16,0.0031352035667216796
|
| 129 |
+
Jammu and Kashmir,Shopian,5659.65,9938.74,4279.09,75.61,22.312,39.181,16.869,75.61,0.003942293251349466
|
| 130 |
+
Bihar,Gopalganj,12758.18,18066.46,5308.279999999999,41.61,40.227,56.964,16.737,41.61,0.003153035934592551
|
| 131 |
+
Madhya Pradesh,Tikamgarh,7415.88,12204.69,4788.81,64.58,25.663,42.235,16.572,64.58,0.0034605468265398037
|
| 132 |
+
Chhattisgarh,Korea,6020.12,10386.66,4366.54,72.53,22.84,39.406,16.566,72.53,0.003793944306758005
|
| 133 |
+
Maharashtra,Latur,22446.52,27929.3,5482.779999999999,24.43,67.103,83.494,16.391,24.43,0.002989461172600474
|
| 134 |
+
Himachal Pradesh,Kullu,11981.7,17140.26,5158.559999999998,43.05,37.981,54.333,16.352,43.05,0.0031699174574559537
|
| 135 |
+
Madhya Pradesh,Jhabua,11658.77,16741.56,5082.790000000001,43.6,37.018,53.156,16.138,43.6,0.0031751205315826625
|
| 136 |
+
Uttar Pradesh,Bareilly,13486.6,18658.92,5172.319999999998,38.35,41.867,57.924,16.057,38.35,0.003104340604748417
|
| 137 |
+
Uttar Pradesh,Kanpur Dehat,7449.55,12098.43,4648.88,62.4,25.46,41.348,15.888,62.4,0.003417656100032888
|
| 138 |
+
Maharashtra,Nandurbar,11131.85,16065.45,4933.6,44.32,35.428,51.13,15.702,44.32,0.0031825797149620232
|
| 139 |
+
Madhya Pradesh,Dewas,7487.25,12091.95,4604.700000000001,61.5,25.519,41.213,15.694,61.5,0.0034083274900664462
|
| 140 |
+
Uttar Pradesh,Farrukhabad,7500.79,12100.25,4599.46,61.32,25.523,41.174,15.651,61.32,0.003402708248064537
|
| 141 |
+
Uttar Pradesh,Etawah,7791.32,12413.94,4622.620000000001,59.33,26.343,41.972,15.629,59.33,0.0033810702166000113
|
| 142 |
+
Uttar Pradesh,Lalitpur,15202.69,20290.81,5088.120000000001,33.47,46.549,62.128,15.579,33.47,0.0030618923361589298
|
| 143 |
+
Andhra Pradesh,Visakhapatanam,5614.6,9697.17,4082.5699999999997,72.71,21.379,36.924,15.545,72.71,0.0038077512200334843
|
| 144 |
+
Nagaland,Kohima,1107.67,2109.41,1001.7399999999998,90.44,17.134,32.629,15.495,90.43,0.015468505962967309
|
| 145 |
+
Karnataka,Gadag,10844.21,15689.55,4845.34,44.68,34.581,50.032,15.451,44.68,0.0031888906614681942
|
| 146 |
+
Uttar Pradesh,Lucknow,3488.61,6416.47,2927.86,83.93,18.406,33.853,15.447,83.92,0.005276026841636067
|
| 147 |
+
Karnataka,Yadgir,14626.58,19654.15,5027.5700000000015,34.37,44.896,60.328,15.432,34.37,0.0030694803569939113
|
| 148 |
+
Bihar,Arwal,6966.35,11439.68,4473.33,64.21,23.964,39.352,15.388,64.21,0.003439964974484486
|
| 149 |
+
Assam,Lakhimpur,8387.74,12984.98,4597.24,54.81,27.821,43.069,15.248,54.81,0.0033168648527493703
|
| 150 |
+
Kerala,Idukki,18292.61,23356.24,5063.630000000001,27.68,55.061,70.303,15.242,27.68,0.0030100133332531553
|
| 151 |
+
Himachal Pradesh,Shimla,15169.78,20082.28,4912.499999999998,32.38,46.348,61.357,15.009,32.38,0.003055284915140496
|
| 152 |
+
Assam,Nagaon,14691.04,19501.64,4810.5999999999985,32.75,44.895,59.596,14.701,32.75,0.0030559443034666026
|
| 153 |
+
Rajasthan,Dausa,5954.55,10025.85,4071.3,68.37,21.453,36.121,14.668,68.37,0.003602791142907524
|
| 154 |
+
Telangana,Nizamabad,17319.99,22177.04,4857.049999999999,28.04,52.188,66.823,14.635,28.04,0.003013165711989441
|
| 155 |
+
Assam,South Salmara-Mankachar,2479.84,4619.38,2139.54,86.28,16.95,31.574,14.624,86.28,0.006835118394735143
|
| 156 |
+
Jharkhand,Gumla,10877.43,15481.88,4604.449999999999,42.33,34.387,48.943,14.556,42.33,0.003161316597762523
|
| 157 |
+
Tripura,South Tripura,14826.45,19600.95,4774.5,32.2,45.189,59.741,14.552,32.2,0.003047863783980656
|
| 158 |
+
Uttar Pradesh,Sonbhadra,15638.71,20363.67,4724.959999999999,30.21,47.388,61.705,14.317,30.21,0.00303017320482316
|
| 159 |
+
Madhya Pradesh,Singrauli,12966.41,17587.48,4621.07,35.64,39.947,54.184,14.237,35.64,0.003080806483830143
|
| 160 |
+
Assam,Goalpara,5604.63,9467.09,3862.46,68.92,20.334,34.347,14.013,68.91,0.0036280717906445204
|
| 161 |
+
Chhattisgarh,Balod,16100.06,20702.35,4602.289999999999,28.59,48.606,62.5,13.894,28.58,0.003018994960267229
|
| 162 |
+
Jharkhand,Godda,12771.04,17276.29,4505.25,35.28,39.341,53.219,13.878,35.28,0.0030804852228166224
|
| 163 |
+
Jammu and Kashmir,Doda,14984.69,19539.15,4554.460000000001,30.39,45.412,59.215,13.803,30.4,0.003030559858095162
|
| 164 |
+
Uttar Pradesh,Kaushambi,6734.04,10802.44,4068.4000000000005,60.42,22.824,36.613,13.789,60.41,0.0033893472566245524
|
| 165 |
+
Telangana,Wanaparthy,8460.08,12699.16,4239.08,50.11,27.362,41.072,13.71,50.11,0.0032342483759018824
|
| 166 |
+
Maharashtra,Nagpur,10300.81,14605.28,4304.470000000001,41.79,32.5,46.081,13.581,41.79,0.0031550916869644233
|
| 167 |
+
Nagaland,Tuensang,3000.07,5523.34,2523.27,84.11,16.012,29.479,13.467,84.11,0.0053372087984613695
|
| 168 |
+
Odisha,Kendrapara,3344.96,6097.81,2752.8500000000004,82.3,16.251,29.625,13.374,82.3,0.00485835406103511
|
| 169 |
+
Madhya Pradesh,Ratlam,9117.03,13273.08,4156.049999999999,45.59,29.206,42.52,13.314,45.59,0.0032034555112794407
|
| 170 |
+
Telangana,Medak,15931.06,20283.36,4352.300000000001,27.32,47.9,60.986,13.086,27.32,0.0030067051407753156
|
| 171 |
+
Madhya Pradesh,Sagar,11895.11,16112.87,4217.76,35.46,36.643,49.636,12.993,35.46,0.003080509553925941
|
| 172 |
+
Madhya Pradesh,Barwani,8970.35,12994.65,4024.2999999999993,44.86,28.641,41.49,12.849,44.86,0.003192852006889363
|
| 173 |
+
Gujarat,Tapi,3899.74,6954.01,3054.2700000000004,78.32,16.375,29.2,12.825,78.32,0.0041989978819100765
|
| 174 |
+
Meghalaya,West Khasi Hills,3881.82,6915.03,3033.2099999999996,78.14,16.229,28.91,12.681,78.14,0.004180770875517154
|
| 175 |
+
Punjab,Ludhiana,6475.7,10224.06,3748.3599999999997,57.88,21.668,34.21,12.542,57.88,0.0033460475315409918
|
| 176 |
+
Rajasthan,Jhunjhunu,6332.88,10044.39,3711.5099999999993,58.61,21.317,33.81,12.493,58.61,0.0033660830459443414
|
| 177 |
+
Madhya Pradesh,Shahdol,11662.84,15713.87,4051.0300000000007,34.73,35.823,48.266,12.443,34.73,0.00307155032564967
|
| 178 |
+
Uttar Pradesh,Deoria,14311.2,18376.25,4065.0499999999993,28.4,43.132,55.384,12.252,28.41,0.0030138632679300127
|
| 179 |
+
Odisha,Jagatsinghapur,3460.13,6232.67,2772.54,80.13,15.256,27.48,12.224,80.13,0.004409082895729351
|
| 180 |
+
Bihar,Sheohar,6402.54,10062.24,3659.7,57.16,21.352,33.557,12.205,57.16,0.003334926451064734
|
| 181 |
+
Odisha,Baleshwar,8882.63,12722.99,3840.3600000000006,43.23,28.177,40.359,12.182,43.23,0.0031721460873637653
|
| 182 |
+
Uttar Pradesh,Kanpur Nagar,3931.49,6953.74,3022.25,76.87,15.83,27.999,12.169,76.87,0.004026463249302428
|
| 183 |
+
Tripura,Khowai,14176.43,18177.57,4001.1399999999994,28.22,42.721,54.779,12.058,28.22,0.0030135231507509293
|
| 184 |
+
Nagaland,Phek,1343.53,2536.71,1193.18,88.81,13.513,25.514,12.001,88.81,0.010057832724241364
|
| 185 |
+
Tripura,Sepahijala,13196.1,17135.34,3939.24,29.85,39.933,51.854,11.921,29.85,0.0030261213540364196
|
| 186 |
+
Odisha,Khordha,3048.73,5552.28,2503.5499999999997,82.12,14.453,26.321,11.868,82.11,0.004740662505371088
|
| 187 |
+
Uttar Pradesh,Etah,10481.7,14255.16,3773.459999999999,36.0,32.324,43.961,11.637,36.0,0.0030838509020483314
|
| 188 |
+
Odisha,Puri,5557.95,8956.02,3398.0700000000006,61.14,18.896,30.449,11.553,61.14,0.003399814679872975
|
| 189 |
+
Gujarat,Mahisagar,5123.16,8440.71,3317.5499999999993,64.76,17.733,29.216,11.483,64.75,0.0034613402665542364
|
| 190 |
+
Assam,Sribhumi,8827.24,12452.09,3624.8500000000004,41.06,27.822,39.247,11.425,41.06,0.003151834548511199
|
| 191 |
+
Puducherry,Pondicherry,2212.97,4106.25,1893.2800000000002,85.55,13.259,24.603,11.344,85.56,0.0059914955918968634
|
| 192 |
+
Himachal Pradesh,Sirmaur,7647.51,11175.17,3527.66,46.13,24.536,35.854,11.318,46.13,0.0032083645526452403
|
| 193 |
+
Gujarat,Narmada,10815.13,14454.36,3639.2300000000014,33.65,33.129,44.277,11.148,33.65,0.003063208671555497
|
| 194 |
+
Karnataka,Haveri,14379.75,18022.17,3642.4199999999983,25.33,43.054,53.96,10.906,25.33,0.0029940715241920063
|
| 195 |
+
Nagaland,Peren,3174.85,5695.84,2520.9900000000002,79.41,13.718,24.611,10.893,79.41,0.004320834055152212
|
| 196 |
+
Punjab,Moga,3582.72,6310.95,2728.23,76.15,14.243,25.089,10.846,76.15,0.003975471150410861
|
| 197 |
+
Karnataka,Dharwar,7112.71,10483.72,3371.0099999999993,47.39,22.879,33.722,10.843,47.39,0.003216636134469141
|
| 198 |
+
Meghalaya,West Jaintia Hills,4522.22,7540.59,3018.37,66.75,16.099,26.844,10.745,66.74,0.003559977179349965
|
| 199 |
+
Karnataka,Mandya,9428.79,12857.3,3428.5099999999984,36.36,29.144,39.741,10.597,36.36,0.0030909586489888943
|
| 200 |
+
Nagaland,Zunheboto,2292.32,4224.47,1932.15,84.29,12.53,23.091,10.561,84.29,0.005466078034480351
|
| 201 |
+
Arunachal Pradesh,Kra-Daadi,4983.38,8075.22,3091.84,62.04,17.02,27.58,10.56,62.04,0.003415352632149264
|
| 202 |
+
Madhya Pradesh,Niwari,3872.87,6660.95,2788.08,71.99,14.623,25.15,10.527,71.99,0.0037757528654460385
|
| 203 |
+
Odisha,Jharsuguda,3890.94,6663.88,2772.94,71.27,14.582,24.974,10.392,71.27,0.0037476805090800682
|
| 204 |
+
Bihar,Sheikhpura,6078.73,9234.53,3155.800000000001,51.92,19.874,30.192,10.318,51.92,0.003269432924311493
|
| 205 |
+
Chhattisgarh,Sukma,4038.09,6850.17,2812.08,69.64,14.718,24.967,10.249,69.64,0.003644792463763809
|
| 206 |
+
Tripura,North Tripura,12865.36,16263.79,3398.4300000000003,26.42,38.608,48.806,10.198,26.41,0.003000926518962547
|
| 207 |
+
Nagaland,Mokokchung,3842.4,6573.8,2731.4,71.09,14.309,24.481,10.172,71.09,0.0037239745992088276
|
| 208 |
+
Odisha,Malkangiri,12477.78,15841.53,3363.75,26.96,37.491,47.598,10.107,26.96,0.0030046210143150464
|
| 209 |
+
Madhya Pradesh,Gwalior,5057.16,8039.3,2982.1400000000003,58.97,17.024,27.063,10.039,58.97,0.003366316272374218
|
| 210 |
+
Telangana,Mahabubnagar,11128.87,14430.88,3302.0099999999984,29.67,33.673,43.664,9.991,29.67,0.003025733969396713
|
| 211 |
+
Jharkhand,Pakur,9536.88,12780.48,3243.6000000000004,34.01,29.252,39.201,9.949,34.01,0.0030672505054063805
|
| 212 |
+
Mizoram,Lunglei,6347.94,9436.85,3088.9100000000008,48.66,20.444,30.392,9.948,48.66,0.0032205723431538423
|
| 213 |
+
Assam,Jorhat,4964.01,7900.2,2936.1899999999996,59.15,16.777,26.701,9.924,59.15,0.00337972727693941
|
| 214 |
+
Jharkhand,Ramgarh,6137.5,9190.61,3053.1100000000006,49.75,19.84,29.709,9.869,49.74,0.0032325865580448065
|
| 215 |
+
Meghalaya,South Garo Hills,9418.37,12604.63,3186.2599999999984,33.83,28.861,38.625,9.764,33.83,0.0030643306644355657
|
| 216 |
+
Madhya Pradesh,Vidisha,8923.06,12054.72,3131.66,35.1,27.434,37.062,9.628,35.1,0.0030745058309593347
|
| 217 |
+
Bihar,Khagaria,6666.11,9668.73,3002.62,45.04,21.293,30.884,9.591,45.04,0.0031942167170958776
|
| 218 |
+
Punjab,Hoshiarpur,4430.25,7226.99,2796.74,63.13,15.184,24.769,9.585,63.13,0.0034273460865639634
|
| 219 |
+
Chhattisgarh,Dantewada,3361.18,5835.6,2474.4200000000005,73.62,12.969,22.516,9.547,73.61,0.003858466371928906
|
| 220 |
+
Jharkhand,Lohardaga,4142.99,6855.79,2712.8,65.48,14.429,23.877,9.448,65.48,0.0034827503807636517
|
| 221 |
+
Telangana,Rangareddy,8245.53,11288.52,3042.99,36.9,25.502,34.913,9.411,36.9,0.0030928272651970217
|
| 222 |
+
Uttar Pradesh,Kannauj,9807.18,12894.4,3087.2199999999993,31.48,29.848,39.244,9.396,31.48,0.0030434844675023805
|
| 223 |
+
Uttar Pradesh,Mahoba,5033.44,7837.74,2804.3,55.71,16.732,26.054,9.322,55.71,0.0033241679646524047
|
| 224 |
+
Assam,Bajali,1271.6,2389.4,1117.8000000000002,87.91,10.582,19.884,9.302,87.9,0.00832179930795848
|
| 225 |
+
Jammu and Kashmir,Kathua,3457.39,5927.61,2470.22,71.45,13.005,22.297,9.292,71.45,0.003761507958315377
|
| 226 |
+
Himachal Pradesh,Solan,4048.04,6691.35,2643.3100000000004,65.3,14.078,23.271,9.193,65.3,0.0034777324334739775
|
| 227 |
+
Assam,Sivasagar,5089.67,7860.86,2771.1899999999996,54.45,16.88,26.071,9.191,54.45,0.0033165215033587638
|
| 228 |
+
Manipur,Kangpokpi,2730.61,4879.09,2148.48,78.68,11.553,20.643,9.09,78.68,0.004230922760848309
|
| 229 |
+
Uttarakhand,Pauri Garhwal,4913.98,7642.83,2728.8500000000004,55.53,16.327,25.394,9.067,55.53,0.003322561345386022
|
| 230 |
+
Jharkhand,Simdega,9968.62,12872.29,2903.67,29.13,30.106,38.875,8.769,29.13,0.0030200770016311183
|
| 231 |
+
Maharashtra,Sangli,3172.43,5467.74,2295.31,72.35,12.031,20.736,8.705,72.35,0.0037923610607641465
|
| 232 |
+
Madhya Pradesh,Burhanpur,4455.28,7042.21,2586.9300000000003,58.06,14.921,23.585,8.664,58.07,0.0033490599917401376
|
| 233 |
+
Himachal Pradesh,Hamirpur,6272.55,8995.8,2723.249999999999,43.42,19.902,28.543,8.641,43.42,0.003172872276825215
|
| 234 |
+
Jammu and Kashmir,Reasi,2091.67,3828.21,1736.54,83.02,10.382,19.001,8.619,83.02,0.004963498066138539
|
| 235 |
+
Karnataka,Dakshina Kannada,4426.96,6981.43,2554.4700000000003,57.7,14.794,23.331,8.537,57.71,0.0033417966279342937
|
| 236 |
+
Maharashtra,Ratnagiri,2905.4,5075.81,2170.4100000000003,74.7,11.366,19.857,8.491,74.71,0.003912025882838852
|
| 237 |
+
Gujarat,Junagadh,1065.34,2005.68,940.3400000000001,88.27,9.602,18.077,8.475,88.26,0.009013085024499223
|
| 238 |
+
Telangana,Narayanpet,7173.69,9885.99,2712.3,37.81,22.237,30.645,8.408,37.81,0.0030997994058845585
|
| 239 |
+
Jharkhand,Saraikela Kharsawan,9831.93,12535.75,2703.8199999999997,27.5,29.592,37.73,8.138,27.5,0.003009785464298464
|
| 240 |
+
Nagaland,Kiphire,816.18,1543.97,727.7900000000001,89.17,9.032,17.086,8.054,89.17,0.011066186380455293
|
| 241 |
+
Punjab,Faridkot,3407.44,5669.42,2261.98,66.38,12.032,20.019,7.987,66.38,0.003531096659075435
|
| 242 |
+
Punjab,Gurdaspur,3426.32,5682.24,2255.9199999999996,65.84,12.034,19.957,7.923,65.84,0.0035122230264540383
|
| 243 |
+
Punjab,Ferozepur,5265.56,7713.51,2447.95,46.49,16.909,24.77,7.861,46.49,0.003211244388061288
|
| 244 |
+
Madhya Pradesh,Shajapur,4845.8,7238.83,2393.0299999999997,49.38,15.653,23.383,7.73,49.38,0.003230219984316315
|
| 245 |
+
Tamil Nadu,Theni,8174.12,10702.92,2528.8,30.94,24.85,32.538,7.688,30.94,0.003040082602163903
|
| 246 |
+
Madhya Pradesh,Indore,2547.95,4478.99,1931.04,75.79,10.069,17.7,7.631,75.79,0.00395180439176593
|
| 247 |
+
Punjab,Fatehgarh Sahib,6003.1,8424.81,2421.709999999999,40.34,18.839,26.439,7.6,40.34,0.0031382119238393493
|
| 248 |
+
Assam,Hojai,6011.53,8414.89,2403.3599999999997,39.98,18.844,26.378,7.534,39.98,0.0031346429278403336
|
| 249 |
+
Gujarat,Navsari,4433.67,6727.41,2293.74,51.73,14.493,21.991,7.498,51.74,0.0032688495084207893
|
| 250 |
+
Haryana,Karnal,2122.95,3816.35,1693.4,79.77,9.291,16.702,7.411,79.77,0.004376457288207448
|
| 251 |
+
Uttar Pradesh,Sant Ravidas Nagar,6400.93,8786.33,2385.3999999999996,37.27,19.802,27.182,7.38,37.27,0.003093612959366842
|
| 252 |
+
Madhya Pradesh,Neemuch,4648.57,6860.13,2211.5600000000004,47.58,14.956,22.071,7.115,47.57,0.0032173335025610025
|
| 253 |
+
Assam,Tamulpur,3439.66,5511.53,2071.87,60.23,11.649,18.666,7.017,60.24,0.0033866719385055497
|
| 254 |
+
Manipur,Kamjong,2350.65,4119.4,1768.7499999999995,75.25,9.258,16.224,6.966,75.24,0.003938485099865994
|
| 255 |
+
Punjab,Mansa,4454.01,6613.28,2159.2699999999995,48.48,14.34,21.292,6.952,48.48,0.003219570679006109
|
| 256 |
+
Gujarat,Chhotaudepur,6059.22,8306.32,2247.0999999999995,37.09,18.743,25.694,6.951,37.09,0.0030933024382676314
|
| 257 |
+
Karnataka,Uttara Kannada,5944.05,8169.94,2225.8899999999994,37.45,18.412,25.307,6.895,37.45,0.00309755133284545
|
| 258 |
+
Maharashtra,Pune,3191.43,5200.35,2008.9200000000005,62.95,10.937,17.822,6.885,62.95,0.003426990408688268
|
| 259 |
+
Himachal Pradesh,Una,4882.35,7037.36,2155.0099999999993,44.14,15.532,22.388,6.856,44.14,0.003181254928466824
|
| 260 |
+
Gujarat,Patan,2741.33,4625.57,1884.2399999999998,68.73,9.94,16.772,6.832,68.73,0.003625977171664849
|
| 261 |
+
Gujarat,Surat,5414.61,7549.95,2135.34,39.44,16.93,23.607,6.677,39.44,0.003126725655218012
|
| 262 |
+
Arunachal Pradesh,Papum Pare,5919.32,8039.6,2120.2800000000007,35.82,18.238,24.771,6.533,35.82,0.003081097153051364
|
| 263 |
+
Gujarat,Surendranagar,2257.0,3934.88,1677.88,74.34,8.783,15.312,6.529,74.34,0.003891448825875055
|
| 264 |
+
Maharashtra,Sindhudurg,1861.05,3348.91,1487.86,79.95,8.155,14.675,6.52,79.95,0.0043819349292066306
|
| 265 |
+
Arunachal Pradesh,Kurung Kumey,4870.3,6923.1,2052.8,42.15,15.395,21.884,6.489,42.15,0.003160996242531261
|
| 266 |
+
Uttar Pradesh,Hathras,4062.16,6053.5,1991.3400000000001,49.02,13.103,19.526,6.423,49.02,0.003225623806053922
|
| 267 |
+
Maharashtra,Raigad,1328.72,2455.88,1127.16,84.83,7.548,13.951,6.403,84.83,0.005680655066530194
|
| 268 |
+
Jammu and Kashmir,Ramban,2849.95,4685.15,1835.1999999999998,64.39,9.829,16.158,6.329,64.39,0.0034488324356567665
|
| 269 |
+
Uttar Pradesh,Meerut,2873.55,4708.36,1834.8099999999995,63.85,9.883,16.193,6.31,63.85,0.003439299820779175
|
| 270 |
+
Punjab,Barnala,2131.6,3727.82,1596.2200000000003,74.88,8.384,14.662,6.278,74.88,0.003933195721523739
|
| 271 |
+
Maharashtra,Satara,4626.42,6593.16,1966.7399999999998,42.51,14.635,20.856,6.221,42.51,0.0031633530894298397
|
| 272 |
+
Jammu and Kashmir,Badgam,6410.72,8440.36,2029.6400000000003,31.66,19.518,25.697,6.179,31.66,0.0030445878154091895
|
| 273 |
+
Haryana,Kaithal,1498.38,2736.94,1238.56,82.66,7.316,13.363,6.047,82.65,0.004882606548405611
|
| 274 |
+
Assam,Golaghat,7105.04,9084.66,1979.62,27.86,21.405,27.369,5.964,27.86,0.003012650175086981
|
| 275 |
+
Uttar Pradesh,Saharanpur,6696.44,8634.88,1938.4399999999996,28.95,20.223,26.077,5.854,28.95,0.003019962845930076
|
| 276 |
+
Assam,Morigaon,8064.82,10020.15,1955.33,24.25,24.108,29.953,5.845,24.25,0.0029892793639535666
|
| 277 |
+
Madhya Pradesh,Umaria,8236.58,10159.07,1922.4899999999998,23.34,24.59,30.33,5.74,23.34,0.0029854624127975448
|
| 278 |
+
Mizoram,Khawzawl,2931.71,4596.88,1665.17,56.8,9.773,15.324,5.551,56.8,0.0033335493619764574
|
| 279 |
+
Himachal Pradesh,Kinnaur,2235.16,3759.36,1524.2000000000003,68.19,8.04,13.523,5.483,68.2,0.003597057928738882
|
| 280 |
+
Maharashtra,Thane,2251.89,3771.21,1519.3200000000002,67.47,8.055,13.49,5.435,67.47,0.0035769953239279005
|
| 281 |
+
Jammu and Kashmir,Udhampur,2678.67,4277.62,1598.9499999999998,59.69,9.068,14.481,5.413,59.69,0.0033852620890217904
|
| 282 |
+
Gujarat,Kheda,2455.68,4028.11,1572.4300000000003,64.03,8.447,13.856,5.409,64.03,0.003439780427417253
|
| 283 |
+
Nagaland,Longleng,2238.52,3736.67,1498.15,66.93,7.979,13.319,5.34,66.93,0.0035644086271286388
|
| 284 |
+
Uttarakhand,Nainital,2349.6,3879.6,1530.0,65.12,8.144,13.447,5.303,65.12,0.0034661218930881854
|
| 285 |
+
Assam,Chirang,5967.72,7716.8,1749.08,29.31,18.028,23.312,5.284,29.31,0.0030209192120273735
|
| 286 |
+
Assam,Cachar,7488.69,9250.16,1761.4700000000003,23.52,22.366,27.627,5.261,23.52,0.002986637182204097
|
| 287 |
+
Chhattisgarh,Narayanpur,1998.18,3407.76,1409.5800000000002,70.54,7.422,12.658,5.236,70.55,0.003714380085878149
|
| 288 |
+
Uttarakhand,Rudra Prayag,3212.41,4827.86,1615.4499999999998,50.29,10.392,15.618,5.226,50.29,0.003234954442303442
|
| 289 |
+
Gujarat,Anand,1480.39,2658.57,1178.18,79.59,6.403,11.499,5.096,79.59,0.004325211599646038
|
| 290 |
+
Ladakh,Leh (Ladakh),2869.19,4410.63,1541.44,53.72,9.474,14.564,5.09,53.73,0.003301977213081044
|
| 291 |
+
Dadra and Nagar Haveli,Dadra And Nagar Haveli,1016.46,1882.4,865.94,85.19,5.874,10.878,5.004,85.19,0.005778879641107372
|
| 292 |
+
Himachal Pradesh,Bilaspur,5114.77,6752.61,1637.8399999999992,32.02,15.589,20.581,4.992,32.02,0.0030478398833183114
|
| 293 |
+
Manipur,Tamenglong,2916.99,4436.63,1519.6400000000003,52.1,9.566,14.55,4.984,52.1,0.0032794078827832803
|
| 294 |
+
Mizoram,Saitual,3981.55,5566.14,1584.5900000000001,39.8,12.464,17.424,4.96,39.79,0.00313043915058206
|
| 295 |
+
Uttarakhand,Almora,3196.79,4735.01,1538.2200000000003,48.12,10.287,15.237,4.95,48.12,0.003217915471457306
|
| 296 |
+
Jharkhand,Khunti,5276.33,6899.11,1622.7799999999997,30.76,16.019,20.946,4.927,30.76,0.0030360117733348744
|
| 297 |
+
Karnataka,Udupi,2698.69,4182.7,1484.0099999999998,54.99,8.953,13.876,4.923,54.99,0.0033175355450236962
|
| 298 |
+
Uttarakhand,Chamoli,6109.62,7734.55,1624.9300000000003,26.6,18.354,23.235,4.881,26.59,0.0030041148221984343
|
| 299 |
+
Punjab,Pathankot,2516.6,3936.89,1420.29,56.44,8.384,13.116,4.732,56.44,0.0033314789795756182
|
| 300 |
+
Madhya Pradesh,Agar-Malwa,6097.96,7675.67,1577.71,25.87,18.282,23.012,4.73,25.87,0.0029980518074897177
|
| 301 |
+
Arunachal Pradesh,Lohit,1281.83,2322.85,1041.02,81.21,5.802,10.514,4.712,81.21,0.0045263412464991454
|
| 302 |
+
Gujarat,Mahesana,2617.51,4037.94,1420.4299999999998,54.27,8.676,13.384,4.708,54.26,0.0033146005172855117
|
| 303 |
+
Arunachal Pradesh,Tawang,1294.84,2334.71,1039.8700000000001,80.31,5.761,10.388,4.627,80.32,0.0044491983565537055
|
| 304 |
+
Odisha,Sonepur,6089.58,7621.07,1531.4899999999998,25.15,18.226,22.81,4.584,25.15,0.002992981453564942
|
| 305 |
+
Madhya Pradesh,Narsinghpur,5832.08,7330.45,1498.37,25.69,17.483,21.975,4.492,25.69,0.0029977297979451585
|
| 306 |
+
Assam,Charaideo,2716.13,4062.36,1346.23,49.56,8.778,13.129,4.351,49.57,0.003231804074179071
|
| 307 |
+
Uttar Pradesh,Muzaffarnagar,3513.82,4893.2,1379.3799999999997,39.26,10.978,15.288,4.31,39.26,0.003124235162871177
|
| 308 |
+
Arunachal Pradesh,Longding,1406.09,2479.36,1073.2700000000002,76.33,5.615,9.901,4.286,76.33,0.003993343242608937
|
| 309 |
+
Karnataka,Kodagu,1619.25,2764.45,1145.1999999999998,70.72,6.02,10.278,4.258,70.73,0.003717770572796047
|
| 310 |
+
Jammu and Kashmir,Jammu,4006.04,5390.27,1384.2300000000005,34.55,12.299,16.549,4.25,34.56,0.0030701141276672224
|
| 311 |
+
Gujarat,Bharuch,1404.45,2463.77,1059.32,75.43,5.536,9.712,4.176,75.43,0.00394175655950728
|
| 312 |
+
Arunachal Pradesh,Anjaw,1744.47,2915.13,1170.66,67.11,6.222,10.397,4.175,67.1,0.0035666993413472287
|
| 313 |
+
Karnataka,Bengaluru Rural,5547.12,6922.12,1375.0,24.79,16.601,20.716,4.115,24.79,0.0029927241523529328
|
| 314 |
+
Gujarat,Gir Somnath,934.42,1716.95,782.5300000000001,83.74,4.905,9.013,4.108,83.75,0.0052492455212859316
|
| 315 |
+
Punjab,Bhatinda,5752.63,7116.15,1363.5199999999995,23.7,17.188,21.262,4.074,23.7,0.002987850774341475
|
| 316 |
+
Uttar Pradesh,Bulandshahr,2760.56,4023.97,1263.4099999999999,45.77,8.853,12.905,4.052,45.77,0.0032069580085200104
|
| 317 |
+
Manipur,Jiribam,1339.39,2356.91,1017.5199999999998,75.97,5.295,9.318,4.023,75.98,0.003953292170316338
|
| 318 |
+
Gujarat,Botad,1175.7,2107.14,931.4399999999998,79.22,5.037,9.028,3.991,79.23,0.004284256187803011
|
| 319 |
+
Mizoram,Hnahthial,2314.37,3528.44,1214.0700000000002,52.46,7.598,11.584,3.986,52.46,0.0032829668549108396
|
| 320 |
+
Haryana,Jind,2757.91,3985.19,1227.2800000000002,44.5,8.785,12.694,3.909,44.5,0.003185383134329982
|
| 321 |
+
Gujarat,Bhavnagar,2359.39,3558.68,1199.29,50.83,7.676,11.578,3.902,50.83,0.0032533832897486217
|
| 322 |
+
Gujarat,Ahmadabad,1441.0,2473.16,1032.1599999999999,71.63,5.426,9.313,3.887,71.64,0.003765440666204025
|
| 323 |
+
Arunachal Pradesh,Siang,1002.84,1824.54,821.6999999999999,81.94,4.728,8.602,3.874,81.94,0.004714610506162498
|
| 324 |
+
Sikkim,Gyalshing District,2195.12,3362.52,1167.4,53.18,7.242,11.093,3.851,53.18,0.003299136265898903
|
| 325 |
+
Gujarat,Jamnagar,1201.34,2133.54,932.2,77.6,4.905,8.711,3.806,77.59,0.004082940716200243
|
| 326 |
+
Uttar Pradesh,Hapur,1001.94,1819.27,817.3299999999999,81.57,4.597,8.347,3.75,81.57,0.004588099087769727
|
| 327 |
+
Gujarat,Kachchh,2315.28,3454.46,1139.1799999999998,49.2,7.469,11.144,3.675,49.2,0.0032259597111364497
|
| 328 |
+
Arunachal Pradesh,Changlang,3594.93,4778.6,1183.6700000000005,32.93,10.998,14.619,3.621,32.92,0.003059308526174362
|
| 329 |
+
Assam,Kamrup (Metro),869.13,1589.13,720.0000000000001,82.84,4.252,7.774,3.522,82.83,0.00489224857040949
|
| 330 |
+
Uttar Pradesh,Shamli,1635.85,2644.87,1009.02,61.68,5.576,9.015,3.439,61.68,0.00340862548522175
|
| 331 |
+
Madhya Pradesh,Bhopal,4848.55,5971.47,1122.92,23.16,14.465,17.815,3.35,23.16,0.002983366161017211
|
| 332 |
+
Punjab,Sas Nagar Mohali,1735.13,2733.21,998.0799999999999,57.52,5.793,9.125,3.332,57.52,0.003338654740566989
|
| 333 |
+
Himachal Pradesh,Lahul And Spiti,293.45,556.71,263.26000000000005,89.71,3.618,6.864,3.246,89.72,0.012329187255069006
|
| 334 |
+
Arunachal Pradesh,West Siang,304.72,576.99,272.27,89.35,3.621,6.856,3.235,89.34,0.011883040168023102
|
| 335 |
+
Gujarat,Rajkot,1126.2,1953.25,827.05,73.44,4.338,7.524,3.186,73.44,0.003851891315929675
|
| 336 |
+
Haryana,Sirsa,3028.77,4064.37,1035.6,34.19,9.292,12.469,3.177,34.19,0.0030679120567094894
|
| 337 |
+
Arunachal Pradesh,Leparada,509.71,952.24,442.53000000000003,86.82,3.621,6.765,3.144,86.83,0.007104039551902063
|
| 338 |
+
Sikkim,Gangtok District,1249.02,2112.05,863.0300000000002,69.1,4.534,7.667,3.133,69.1,0.003630045956029527
|
| 339 |
+
Arunachal Pradesh,Namsai,3982.09,5026.77,1044.6800000000003,26.23,11.942,15.075,3.133,26.24,0.002998927698771248
|
| 340 |
+
Arunachal Pradesh,Shi Yomi,173.29,330.95,157.66,90.98,3.432,6.554,3.122,90.97,0.019804951237809454
|
| 341 |
+
Arunachal Pradesh,Upper Siang,1091.95,1891.87,799.9199999999998,73.26,4.2,7.277,3.077,73.26,0.003846329960163011
|
| 342 |
+
Arunachal Pradesh,Lower Subansiri,648.37,1196.04,547.67,84.47,3.637,6.709,3.072,84.47,0.0056094513934944554
|
| 343 |
+
Punjab,Malerkotla,1160.05,1982.58,822.53,70.9,4.317,7.378,3.061,70.91,0.0037213913193396837
|
| 344 |
+
Arunachal Pradesh,Upper Dibang Valley,567.05,1054.23,487.18000000000006,85.91,3.539,6.58,3.041,85.93,0.006241072215853982
|
| 345 |
+
Punjab,Ropar,3775.12,4758.68,983.5600000000004,26.05,11.319,14.268,2.949,26.05,0.0029983152853419234
|
| 346 |
+
Gujarat,Gandhinagar,1119.64,1903.39,783.75,70.0,4.116,6.997,2.881,70.0,0.003676181629809581
|
| 347 |
+
Madhya Pradesh,Narmadapuram,3678.48,4616.9,938.4199999999996,25.51,11.024,13.836,2.812,25.51,0.002996890019790783
|
| 348 |
+
Madhya Pradesh,Harda,2615.76,3529.06,913.2999999999997,34.92,8.042,10.85,2.808,34.92,0.0030744410802214265
|
| 349 |
+
Haryana,Sonipat,1313.62,2131.01,817.3900000000003,62.22,4.489,7.282,2.793,62.22,0.003417274402034074
|
| 350 |
+
Madhya Pradesh,Datia,4044.75,4974.2,929.4499999999998,22.98,12.066,14.839,2.773,22.98,0.002983126274800668
|
| 351 |
+
Bihar,Begusarai,14196.08,15109.62,913.5400000000009,6.44,42.323,45.047,2.724,6.44,0.0029813159689153624
|
| 352 |
+
Haryana,Jhajjar,1151.25,1913.41,762.1600000000001,66.2,4.049,6.73,2.681,66.21,0.0035170466883821935
|
| 353 |
+
Haryana,Panipat,1346.3,2132.89,786.5899999999999,58.43,4.53,7.177,2.647,58.43,0.003364777538438684
|
| 354 |
+
Jammu and Kashmir,Srinagar,323.32,609.87,286.55,88.63,2.953,5.57,2.617,88.62,0.009133366324384511
|
| 355 |
+
Nagaland,Wokha,3828.59,4701.44,872.8499999999995,22.8,11.42,14.024,2.604,22.8,0.002982821351985979
|
| 356 |
+
Uttar Pradesh,Baghpat,126.15,241.15,115.0,91.16,2.822,5.395,2.573,91.18,0.022370194213238208
|
| 357 |
+
Jammu and Kashmir,Samba,820.55,1452.82,632.27,77.05,3.321,5.88,2.559,77.06,0.004047285357382244
|
| 358 |
+
Haryana,Yamunanagar,641.4,1161.14,519.7400000000001,81.03,2.902,5.254,2.352,81.05,0.004524477705020269
|
| 359 |
+
Karnataka,Bengaluru,219.37,415.78,196.40999999999997,89.53,2.619,4.964,2.345,89.54,0.011938733646350915
|
| 360 |
+
Haryana,Mahendragarh,721.88,1280.73,558.85,77.42,2.943,5.221,2.278,77.4,0.004076854878927246
|
| 361 |
+
Gujarat,Devbhumi Dwarka,680.46,1214.63,534.1700000000001,78.5,2.869,5.121,2.252,78.49,0.004216265467477883
|
| 362 |
+
Manipur,Pherzawl,1108.37,1759.96,651.5900000000001,58.79,3.731,5.924,2.193,58.78,0.003366204426319731
|
| 363 |
+
Haryana,Ambala,313.94,588.77,274.83,87.54,2.411,4.522,2.111,87.56,0.007679811428935465
|
| 364 |
+
Gujarat,Amreli,1589.45,2256.52,667.0699999999999,41.97,5.017,7.123,2.106,41.98,0.0031564377614898237
|
| 365 |
+
Haryana,Bhiwani,2422.68,3119.6,696.9200000000001,28.77,7.316,9.421,2.105,28.77,0.003019796258688725
|
| 366 |
+
Haryana,Palwal,172.23,327.37,155.14000000000001,90.08,2.311,4.393,2.082,90.09,0.013418103698542646
|
| 367 |
+
Haryana,Faridabad,85.33,163.27,77.94000000000001,91.34,2.273,4.349,2.076,91.33,0.026637759287472168
|
| 368 |
+
Punjab,Kapurthala,2853.6,3540.3,686.7000000000003,24.06,8.53,10.583,2.053,24.07,0.002989206616204093
|
| 369 |
+
Haryana,Charki Dadri,390.84,725.93,335.09,85.74,2.381,4.422,2.041,85.72,0.006092006959369563
|
| 370 |
+
Haryana,Panchkula,414.45,766.78,352.33,85.01,2.381,4.405,2.024,85.01,0.0057449632042465915
|
| 371 |
+
Haryana,Gurugram,212.8,402.17,189.37,88.99,2.273,4.296,2.023,89.0,0.01068139097744361
|
| 372 |
+
Haryana,Rewari,321.95,602.05,280.09999999999997,87.0,2.306,4.312,2.006,86.99,0.007162602888647306
|
| 373 |
+
Haryana,Kurukshetra,1416.86,2024.31,607.45,42.87,4.489,6.414,1.925,42.88,0.0031682735062038593
|
| 374 |
+
Telangana,Medchal,230.3,433.99,203.69,88.45,2.089,3.937,1.848,88.46,0.00907077724706904
|
| 375 |
+
Gujarat,Vadodara,1362.87,1927.45,564.5800000000002,41.43,4.297,6.077,1.78,41.42,0.0031529052660928775
|
| 376 |
+
Gujarat,Morbi,585.58,1018.79,433.2099999999999,73.98,2.26,3.932,1.672,73.98,0.0038594214283274694
|
| 377 |
+
Goa,South Goa,124.16,236.22,112.06,90.25,1.789,3.404,1.615,90.27,0.014408827319587628
|
| 378 |
+
Gujarat,Porbandar,875.9,1354.39,478.4900000000001,54.63,2.905,4.492,1.587,54.63,0.0033165886516725654
|
| 379 |
+
Goa,North Goa,133.18,252.9,119.72,89.89,1.754,3.331,1.577,89.91,0.013170145667517645
|
| 380 |
+
Arunachal Pradesh,East Siang,1080.35,1553.3,472.95000000000005,43.78,3.432,4.934,1.502,43.76,0.00317674827602166
|
| 381 |
+
Haryana,Rohtak,1728.22,2140.98,412.76,23.88,5.165,6.399,1.234,23.89,0.002988624133501522
|
| 382 |
+
Andaman and Nicobar Islands,North And Middle Andaman,199.7,372.72,173.02000000000004,86.64,1.398,2.609,1.211,86.62,0.00700050075112669
|
| 383 |
+
Andaman and Nicobar Islands,South Andaman,71.59,136.46,64.87,90.61,1.331,2.537,1.206,90.61,0.018591982120407878
|
| 384 |
+
Andaman and Nicobar Islands,Nicobars,27.23,52.15,24.919999999999998,91.52,1.289,2.469,1.18,91.54,0.04733749540947484
|
| 385 |
+
Sikkim,Mangan District,716.09,641.34,-74.75,-10.44,2.127,1.905,-0.222,-10.44,0.00297029702970297
|
| 386 |
+
Arunachal Pradesh,Lower Siang,1167.08,1043.69,-123.38999999999987,-10.57,3.465,3.099,-0.366,-10.56,0.0029689481440860954
|
| 387 |
+
Manipur,Noney,1876.03,1632.54,-243.49,-12.98,5.521,4.804,-0.717,-12.99,0.00294291669109769
|
| 388 |
+
Arunachal Pradesh,Lower Dibang Valley,2574.59,2233.55,-341.03999999999996,-13.25,7.574,6.571,-1.003,-13.24,0.002941827630807235
|
| 389 |
+
Uttarakhand,Udam Singh Nagar,3159.23,2795.65,-363.5799999999999,-11.51,9.341,8.266,-1.075,-11.51,0.00295673312800904
|
| 390 |
+
Manipur,Tengnoupal,2469.75,2033.66,-436.0899999999999,-17.66,7.116,5.86,-1.256,-17.65,0.0028812632857576678
|
| 391 |
+
Assam,Majuli,2831.61,2392.18,-439.4300000000003,-15.52,8.245,6.965,-1.28,-15.52,0.0029117710419160826
|
| 392 |
+
Puducherry,Karaikal,1455.97,961.44,-494.53,-33.97,3.879,2.561,-1.318,-33.98,0.0026642032459460014
|
| 393 |
+
Sikkim,Namchi District,3315.86,2850.04,-465.82000000000016,-14.05,9.717,8.352,-1.365,-14.05,0.0029304614790733024
|
| 394 |
+
Gujarat,Dang,3925.72,3463.43,-462.28999999999996,-11.78,11.591,10.226,-1.365,-11.78,0.0029525793994477447
|
| 395 |
+
Sikkim,Soreng,2153.73,1563.27,-590.46,-27.42,5.971,4.334,-1.637,-27.42,0.0027723995115450867
|
| 396 |
+
Uttarakhand,Bageshwar,2463.99,1870.82,-593.1699999999998,-24.07,6.946,5.274,-1.672,-24.07,0.002819004947260338
|
| 397 |
+
Sikkim,Pakyong,1894.76,1210.67,-684.0899999999999,-36.1,4.971,3.176,-1.795,-36.11,0.002623551267706728
|
| 398 |
+
Assam,West Karbi Anglong,1659.93,860.92,-799.0100000000001,-48.14,3.839,1.991,-1.848,-48.14,0.0023127481279331054
|
| 399 |
+
Punjab,Sangrur,5891.34,5260.59,-630.75,-10.71,17.477,15.606,-1.871,-10.71,0.0029665576931563954
|
| 400 |
+
Manipur,Chandel,2039.1,1321.98,-717.1199999999999,-35.17,5.386,3.492,-1.894,-35.17,0.002641361384924722
|
| 401 |
+
Assam,Dima Hasao,1746.74,917.62,-829.12,-47.47,4.099,2.153,-1.946,-47.47,0.0023466572014152077
|
| 402 |
+
Uttarakhand,Pithoragarh,4955.61,4279.31,-676.2999999999993,-13.65,14.564,12.576,-1.988,-13.65,0.0029388914785465365
|
| 403 |
+
Arunachal Pradesh,Pakke Kessang,1949.78,966.95,-982.8299999999999,-50.41,3.99,1.979,-2.011,-50.4,0.002046384720327422
|
| 404 |
+
Jammu and Kashmir,Bandipora,3305.25,2571.41,-733.8400000000001,-22.2,9.377,7.295,-2.082,-22.2,0.0028370017396566072
|
| 405 |
+
Arunachal Pradesh,West Kameng,2025.85,1126.53,-899.3199999999999,-44.39,4.963,2.76,-2.203,-44.39,0.002449835871362638
|
| 406 |
+
Mizoram,Champhai,6114.68,5353.76,-760.9200000000001,-12.44,18.024,15.781,-2.243,-12.44,0.00294766038451726
|
| 407 |
+
Gujarat,Valsad,3907.92,3066.39,-841.5300000000002,-21.53,11.113,8.72,-2.393,-21.53,0.0028437122561362563
|
| 408 |
+
Maharashtra,Kolhapur,3185.13,2277.85,-907.2800000000002,-28.48,8.773,6.274,-2.499,-28.49,0.002754361674405754
|
| 409 |
+
Tamil Nadu,Coimbatore,8418.98,7573.88,-845.0999999999995,-10.04,25.057,22.542,-2.515,-10.04,0.002976251279846252
|
| 410 |
+
Odisha,Boudh,6544.09,5685.99,-858.1000000000004,-13.11,19.256,16.731,-2.525,-13.11,0.002942502318886201
|
| 411 |
+
Madhya Pradesh,Sehore,5612.05,4711.13,-900.9200000000001,-16.05,16.261,13.651,-2.61,-16.05,0.0028975151682540247
|
| 412 |
+
Jammu and Kashmir,Pulwama,5663.08,4761.54,-901.54,-15.92,16.422,13.808,-2.614,-15.92,0.0028998354252456264
|
| 413 |
+
Chhattisgarh,Bijapur,4206.81,3261.55,-945.2600000000002,-22.47,11.927,9.247,-2.68,-22.47,0.0028351648874087487
|
| 414 |
+
Haryana,Mewat,2465.14,1265.37,-1199.77,-48.67,5.556,2.852,-2.704,-48.67,0.002253827368830979
|
| 415 |
+
Mizoram,Aizawl,6893.53,5906.68,-986.8499999999995,-14.32,20.163,17.277,-2.886,-14.31,0.002924916552187341
|
| 416 |
+
Uttarakhand,Champawat,3206.03,2121.37,-1084.6600000000003,-33.83,8.545,5.654,-2.891,-33.83,0.0026652900939791576
|
| 417 |
+
Arunachal Pradesh,Kamle,3008.77,1459.95,-1548.82,-51.48,5.802,2.815,-2.987,-51.48,0.0019283627528857306
|
| 418 |
+
Uttar Pradesh,Shravasti,10766.0,9714.08,-1051.92,-9.77,32.073,28.939,-3.134,-9.77,0.0029791008731190785
|
| 419 |
+
Punjab,Jalandhar,3084.25,1764.56,-1319.69,-42.79,7.645,4.374,-3.271,-42.79,0.0024787225419469886
|
| 420 |
+
Meghalaya,East Jaintia Hills,4430.38,3180.23,-1250.15,-28.22,12.214,8.767,-3.447,-28.22,0.0027568741281786212
|
| 421 |
+
Tamil Nadu,The Nilgiris,3672.72,2351.62,-1321.1,-35.97,9.652,6.18,-3.472,-35.97,0.0026280250059901107
|
| 422 |
+
Jammu and Kashmir,Ganderbal,3554.17,2213.95,-1340.2200000000003,-37.71,9.267,5.773,-3.494,-37.7,0.002607359805524215
|
| 423 |
+
Madhya Pradesh,Mandsaur,10840.0,9635.95,-1204.0499999999993,-11.11,32.12,28.552,-3.568,-11.11,0.0029630996309963097
|
| 424 |
+
Mizoram,Serchhip,5365.91,4081.31,-1284.6,-23.94,15.145,11.519,-3.626,-23.94,0.002822447637027084
|
| 425 |
+
Tamil Nadu,Kanniyakumari,6268.92,4960.87,-1308.0500000000002,-20.87,17.853,14.128,-3.725,-20.86,0.002847858961352195
|
| 426 |
+
Ladakh,Kargil,4686.31,3313.83,-1372.4800000000005,-29.29,12.869,9.1,-3.769,-29.29,0.0027460838058088344
|
| 427 |
+
Karnataka,Shivamogga,8906.5,7607.67,-1298.83,-14.58,26.02,22.226,-3.794,-14.58,0.002921461853702352
|
| 428 |
+
Telangana,Yadadri Bhuvanagiri,10163.11,8871.22,-1291.8900000000012,-12.71,29.954,26.146,-3.808,-12.71,0.0029473261629560242
|
| 429 |
+
Tripura,Unakoti,9581.79,8248.52,-1333.2700000000004,-13.91,28.11,24.199,-3.911,-13.91,0.00293368984292079
|
| 430 |
+
Punjab,Fazilka,9289.54,7947.27,-1342.2700000000004,-14.45,27.145,23.223,-3.922,-14.45,0.002922103785548046
|
| 431 |
+
Meghalaya,Ri Bhoi,5965.18,4521.16,-1444.0200000000004,-24.21,16.787,12.723,-4.064,-24.21,0.002814164870129652
|
| 432 |
+
Uttar Pradesh,Sambhal,9062.69,7632.05,-1430.6400000000003,-15.79,26.342,22.184,-4.158,-15.78,0.0029066425090122244
|
| 433 |
+
Telangana,Jayashanker Bhopalapally,6966.9,5503.9,-1463.0,-21.0,19.837,15.671,-4.166,-21.0,0.0028473209031276464
|
| 434 |
+
Punjab,Nawanshahr,4721.0,3104.87,-1616.13,-34.23,12.55,8.254,-4.296,-34.23,0.0026583350984960815
|
| 435 |
+
Odisha,Gajapati,10972.96,9504.79,-1468.1699999999983,-13.38,32.268,27.951,-4.317,-13.38,0.0029406832796255527
|
| 436 |
+
Punjab,Patiala,8603.83,7096.12,-1507.71,-17.52,24.823,20.473,-4.35,-17.52,0.002885110468244956
|
| 437 |
+
Arunachal Pradesh,Tirap,4475.82,2165.83,-2309.99,-51.61,8.448,4.088,-4.36,-51.61,0.0018874753676421305
|
| 438 |
+
Bihar,Banka,14499.86,13024.97,-1474.8900000000012,-10.17,43.151,38.762,-4.389,-10.17,0.002975959767887414
|
| 439 |
+
Madhya Pradesh,Raisen,6336.35,4743.19,-1593.1600000000008,-25.14,17.773,13.304,-4.469,-25.14,0.002804927126815911
|
| 440 |
+
Jharkhand,Koderma,7022.2,5406.79,-1615.4099999999999,-23.0,19.879,15.306,-4.573,-23.0,0.002830879211643075
|
| 441 |
+
Madhya Pradesh,Jabalpur,8190.32,6579.9,-1610.42,-19.66,23.443,18.834,-4.609,-19.66,0.00286228132722531
|
| 442 |
+
Assam,Kokrajhar,13384.29,11790.29,-1594.0,-11.91,39.489,34.786,-4.703,-11.91,0.002950399311431536
|
| 443 |
+
Karnataka,Chikkamagaluru,10039.32,8306.89,-1732.4300000000003,-17.26,28.99,23.987,-5.003,-17.26,0.0028876457768056
|
| 444 |
+
Jammu and Kashmir,Anantnag,13839.33,12135.64,-1703.6900000000005,-12.31,40.803,35.78,-5.023,-12.31,0.002948336371775223
|
| 445 |
+
Meghalaya,South West Khasi Hills,5519.03,3614.95,-1904.08,-34.5,14.626,9.58,-5.046,-34.5,0.0026501033696138634
|
| 446 |
+
Uttarakhand,Haridwar,5148.42,3213.92,-1934.5,-37.57,13.443,8.392,-5.051,-37.57,0.002611092335124174
|
| 447 |
+
Odisha,Sambalpur,7997.72,6179.28,-1818.4400000000005,-22.74,22.647,17.498,-5.149,-22.74,0.0028316820293783724
|
| 448 |
+
Mizoram,Kolasib,6923.91,5053.45,-1870.46,-27.01,19.277,14.069,-5.208,-27.02,0.0027841205330514118
|
| 449 |
+
Gujarat,Sabar Kantha,6503.95,4581.75,-1922.1999999999998,-29.55,17.818,12.552,-5.266,-29.55,0.0027395659560728483
|
| 450 |
+
Uttar Pradesh,Agra,15464.1,13663.75,-1800.3500000000004,-11.64,45.714,40.392,-5.322,-11.64,0.0029561371175820124
|
| 451 |
+
Punjab,Amritsar,4924.64,2751.66,-2172.9800000000005,-44.12,12.08,6.75,-5.33,-44.12,0.0024529711816498262
|
| 452 |
+
Haryana,Fatehabad,7344.91,5429.44,-1915.4700000000003,-26.08,20.531,15.177,-5.354,-26.08,0.0027952691047269467
|
| 453 |
+
Meghalaya,East Garo Hills,9457.67,7572.77,-1884.8999999999996,-19.93,27.066,21.672,-5.394,-19.93,0.002861804228737099
|
| 454 |
+
Assam,Dibrugarh,6840.57,4873.75,-1966.8199999999997,-28.75,18.815,13.405,-5.41,-28.75,0.002750501785669908
|
| 455 |
+
Manipur,Senapati,5451.86,3330.46,-2121.3999999999996,-38.91,14.018,8.563,-5.455,-38.91,0.002571232570168713
|
| 456 |
+
Manipur,Ukhrul,5886.34,3729.64,-2156.7000000000003,-36.64,15.418,9.769,-5.649,-36.64,0.002619284648864999
|
| 457 |
+
Mizoram,Siaha,6120.58,3959.88,-2160.7,-35.3,16.165,10.458,-5.707,-35.3,0.0026410895699427177
|
| 458 |
+
Maharashtra,Dharashiv,10958.88,8877.33,-2081.5499999999993,-18.99,31.468,25.491,-5.977,-18.99,0.002871461317214898
|
| 459 |
+
Madhya Pradesh,Katni,14855.97,12808.68,-2047.289999999999,-13.78,43.63,37.617,-6.013,-13.78,0.002936866458400226
|
| 460 |
+
Odisha,Deogarh,5432.19,2831.93,-2600.2599999999998,-47.87,12.612,6.575,-6.037,-47.87,0.002321715551186538
|
| 461 |
+
Karnataka,Mysuru,8591.76,6420.04,-2171.7200000000003,-25.28,24.093,18.003,-6.09,-25.28,0.002804198441297243
|
| 462 |
+
Madhya Pradesh,Panna,12021.34,9882.61,-2138.7299999999996,-17.79,34.635,28.473,-6.162,-17.79,0.002881126396890862
|
| 463 |
+
Manipur,Thoubal,7686.2,3647.4,-4038.7999999999997,-52.55,11.733,5.568,-6.165,-52.54,0.001526502042621842
|
| 464 |
+
Assam,Kamrup,10554.51,8309.92,-2244.59,-21.27,30.036,23.648,-6.388,-21.27,0.0028457976732221583
|
| 465 |
+
Manipur,Kakching,5831.62,3001.19,-2830.43,-48.54,13.237,6.812,-6.425,-48.54,0.0022698666922741882
|
| 466 |
+
Punjab,Mukatsar,6878.49,4431.84,-2446.6499999999996,-35.57,18.146,11.692,-6.454,-35.57,0.002638078996989165
|
| 467 |
+
Assam,Sonitpur,9771.88,7458.61,-2313.2699999999995,-23.67,27.606,21.071,-6.535,-23.67,0.0028250449248251107
|
| 468 |
+
Jharkhand,East Singhbum,9150.81,6813.31,-2337.499999999999,-25.54,25.614,19.071,-6.543,-25.54,0.0027990964734269427
|
| 469 |
+
Maharashtra,Buldhana,9081.12,6737.15,-2343.970000000001,-25.81,25.4,18.844,-6.556,-25.81,0.002797011822330285
|
| 470 |
+
Telangana,Mulugu,6234.2,3583.38,-2650.8199999999997,-42.52,15.486,8.901,-6.585,-42.52,0.002484039652240865
|
| 471 |
+
Bihar,Supaul,16613.3,14368.25,-2245.0499999999993,-13.51,48.832,42.233,-6.599,-13.51,0.002939331740232224
|
| 472 |
+
Mizoram,Mamit,7510.96,5020.06,-2490.8999999999996,-33.16,20.061,13.408,-6.653,-33.16,0.002670896929287335
|
| 473 |
+
Manipur,Bishnupur,7562.18,3608.77,-3953.4100000000003,-52.28,13.0,6.204,-6.796,-52.28,0.0017190810057417306
|
| 474 |
+
Telangana,Jogulamba Gadwal,8221.69,5703.9,-2517.790000000001,-30.62,22.354,15.508,-6.846,-30.63,0.002718905723762389
|
| 475 |
+
Uttarakhand,Dehradun,6697.08,3974.77,-2722.31,-40.65,16.963,10.068,-6.895,-40.65,0.0025328949333142206
|
| 476 |
+
Telangana,Karimnagar,10068.7,7590.95,-2477.750000000001,-24.61,28.284,21.324,-6.96,-24.61,0.002809101472881305
|
| 477 |
+
Jammu and Kashmir,Kulgam,9059.91,6515.53,-2544.38,-28.08,24.982,17.966,-7.016,-28.08,0.0027574225351024457
|
| 478 |
+
Assam,Tinsukia,6803.94,3938.15,-2865.7899999999995,-42.12,16.945,9.808,-7.137,-42.12,0.002490468757807976
|
| 479 |
+
Punjab,Tarn Taran,6541.89,3349.24,-3192.6500000000005,-48.8,14.739,7.546,-7.193,-48.8,0.002253018623058474
|
| 480 |
+
Maharashtra,Wardha,7141.7,4286.39,-2855.3099999999995,-39.98,18.223,10.937,-7.286,-39.98,0.002551633364605066
|
| 481 |
+
Tripura,West Tripura,12517.25,9938.92,-2578.33,-20.6,35.676,28.327,-7.349,-20.6,0.002850146797419561
|
| 482 |
+
Assam,Baksa,8019.76,5177.88,-2841.88,-35.44,21.163,13.664,-7.499,-35.43,0.0026388570231528127
|
| 483 |
+
Telangana,Nalgonda,24806.56,22250.14,-2556.420000000002,-10.31,73.812,66.205,-7.607,-10.31,0.0029755032539779797
|
| 484 |
+
Jharkhand,Bokaro,12385.93,9702.19,-2683.74,-21.67,35.221,27.589,-7.632,-21.67,0.0028436298283616972
|
| 485 |
+
Assam,Karbi Anglong,8701.63,4140.9,-4560.73,-52.41,14.745,7.017,-7.728,-52.41,0.0016945101090255505
|
| 486 |
+
Maharashtra,Jalgaon,13448.76,10732.48,-2716.2800000000007,-20.2,38.42,30.66,-7.76,-20.2,0.002856768951189552
|
| 487 |
+
Nagaland,Mon,7082.96,3844.01,-3238.95,-45.73,17.037,9.246,-7.791,-45.73,0.002405350305521985
|
| 488 |
+
Telangana,Rajanna Sirsilla,7731.93,4681.98,-3049.9500000000007,-39.45,19.837,12.012,-7.825,-39.45,0.002565594877346277
|
| 489 |
+
Manipur,Imphal West,10697.6,5062.13,-5635.47,-52.68,14.885,7.044,-7.841,-52.68,0.0013914335925815135
|
| 490 |
+
Jammu and Kashmir,Poonch,9185.29,6261.91,-2923.380000000001,-31.83,24.693,16.834,-7.859,-31.83,0.002688320129250138
|
| 491 |
+
Uttar Pradesh,Shahjahanpur,14829.62,12092.15,-2737.470000000001,-18.46,42.603,34.739,-7.864,-18.46,0.0028728315358046933
|
| 492 |
+
Andhra Pradesh,East Godavari,17665.2,14947.41,-2717.790000000001,-15.38,51.442,43.528,-7.914,-15.38,0.0029120530761044313
|
| 493 |
+
Manipur,Churachandpur,7912.79,3860.7,-4052.09,-51.21,15.623,7.623,-8.0,-51.21,0.001974398410674364
|
| 494 |
+
Telangana,Hanumakonda,7405.49,4127.94,-3277.55,-44.26,18.155,10.12,-8.035,-44.26,0.002451559586198888
|
| 495 |
+
Kerala,Kasargod,15054.29,12255.22,-2799.0700000000015,-18.59,43.24,35.2,-8.04,-18.59,0.00287227096063647
|
| 496 |
+
Gujarat,Banas Kantha,8980.72,5966.38,-3014.3399999999992,-33.56,23.967,15.923,-8.044,-33.56,0.002668716984829724
|
| 497 |
+
Bihar,Madhepura,16379.21,13574.64,-2804.5699999999997,-17.12,47.303,39.203,-8.1,-17.12,0.002887990324319671
|
| 498 |
+
Jharkhand,West Singhbhum,12949.84,10091.99,-2857.8500000000004,-22.07,36.781,28.664,-8.117,-22.07,0.0028402667523305305
|
| 499 |
+
Uttar Pradesh,Pratapgarh,24565.83,21804.35,-2761.480000000003,-11.24,72.727,64.552,-8.175,-11.24,0.0029604943126285577
|
| 500 |
+
Telangana,Jangaon,10576.78,7578.13,-2998.6500000000005,-28.35,29.153,20.888,-8.265,-28.35,0.00275632092186847
|
| 501 |
+
Uttar Pradesh,Ayodhya,15086.22,12180.39,-2905.83,-19.26,43.271,34.936,-8.335,-19.26,0.0028682466515800513
|
| 502 |
+
Madhya Pradesh,Ujjain,8555.13,5363.44,-3191.6899999999996,-37.31,22.354,14.014,-8.34,-37.31,0.002612935162878881
|
| 503 |
+
Andhra Pradesh,Konaseema,19338.21,16440.58,-2897.6299999999974,-14.98,56.443,47.986,-8.457,-14.98,0.002918729292938695
|
| 504 |
+
Arunachal Pradesh,East Kameng,10259.58,7145.14,-3114.4399999999996,-30.36,27.996,19.497,-8.499,-30.36,0.0027287666746591967
|
| 505 |
+
Meghalaya,West Garo Hills,26803.49,23897.96,-2905.5300000000025,-10.84,79.489,70.872,-8.617,-10.84,0.0029656212679766703
|
| 506 |
+
Telangana,Nagarkurnool,12421.53,9348.18,-3073.3500000000004,-24.74,34.892,26.259,-8.633,-24.74,0.0028089937390965527
|
| 507 |
+
Jharkhand,Dhanbad,10710.57,7473.55,-3237.0199999999995,-30.22,29.237,20.401,-8.836,-30.22,0.0027297333381883504
|
| 508 |
+
Haryana,Hisar,8273.87,4700.47,-3573.4000000000005,-43.19,20.462,11.625,-8.837,-43.19,0.0024730869593068295
|
| 509 |
+
Maharashtra,Solapur,8194.99,4129.84,-4065.1499999999996,-49.61,17.87,9.006,-8.864,-49.6,0.0021806005864558713
|
| 510 |
+
Jammu and Kashmir,Baramulla,8195.06,4535.2,-3659.8599999999997,-44.66,19.942,11.036,-8.906,-44.66,0.0024334172049991095
|
| 511 |
+
Karnataka,Koppal,30419.8,27406.9,-3012.899999999998,-9.9,90.552,81.583,-8.969,-9.9,0.002976745409240035
|
| 512 |
+
Madhya Pradesh,Khandwa,10715.03,7276.14,-3438.8900000000003,-32.09,28.753,19.525,-9.228,-32.09,0.002683426924609637
|
| 513 |
+
Chhattisgarh,Mahasamund,17755.77,14549.35,-3206.42,-18.06,51.144,41.908,-9.236,-18.06,0.0028804157747030963
|
| 514 |
+
Assam,Biswanath,8946.37,4472.62,-4473.750000000001,-50.01,18.811,9.404,-9.407,-50.01,0.0021026405122971663
|
| 515 |
+
Telangana,Warangal,8559.76,4668.37,-3891.3900000000003,-45.46,20.734,11.308,-9.426,-45.46,0.0024222641756310925
|
| 516 |
+
Chhattisgarh,Surajpur,16607.97,13275.82,-3332.1500000000015,-20.06,47.493,37.964,-9.529,-20.06,0.002859651119311993
|
| 517 |
+
Gujarat,Arvalli,10041.73,6389.39,-3652.3399999999992,-36.37,26.311,16.741,-9.57,-36.37,0.0026201660470855123
|
| 518 |
+
Karnataka,Chamaraja Nagara,8668.41,4681.28,-3987.13,-46.0,20.81,11.238,-9.572,-46.0,0.002400670941960521
|
| 519 |
+
Chhattisgarh,Kondagaon,10500.12,6863.52,-3636.6000000000004,-34.63,27.823,18.187,-9.636,-34.63,0.002649779240618202
|
| 520 |
+
Bihar,Auranagabad,23220.77,19927.62,-3293.1500000000015,-14.18,68.042,58.392,-9.65,-14.18,0.002930221521508546
|
| 521 |
+
Karnataka,Vijaypura,14636.94,11211.11,-3425.83,-23.41,41.388,31.701,-9.687,-23.41,0.0028276402034851543
|
| 522 |
+
Chhattisgarh,Bastar,10032.55,4841.3,-5191.249999999999,-51.74,18.833,9.088,-9.745,-51.74,0.0018771897473723032
|
| 523 |
+
Jharkhand,Ranchi,13203.11,9706.95,-3496.16,-26.48,36.847,27.09,-9.757,-26.48,0.002790781868817271
|
| 524 |
+
Madhya Pradesh,Mandla,30027.05,26731.95,-3295.0999999999985,-10.97,89.045,79.273,-9.772,-10.97,0.0029654927806760903
|
| 525 |
+
Maharashtra,Gadchiroli,20807.51,17411.58,-3395.9299999999967,-16.32,60.192,50.368,-9.824,-16.32,0.0028928016855452672
|
| 526 |
+
Maharashtra,Bhandara,20316.86,16919.53,-3397.3300000000017,-16.72,58.748,48.924,-9.824,-16.72,0.0028915885624058047
|
| 527 |
+
Bihar,Kaimur (Bhabua),15788.74,12325.48,-3463.26,-21.93,44.85,35.012,-9.838,-21.94,0.0028406319947000204
|
| 528 |
+
Telangana,Peddapalli,9217.53,5211.93,-4005.6000000000004,-43.46,22.745,12.861,-9.884,-43.46,0.002467580794421065
|
| 529 |
+
Uttar Pradesh,Bijnor,15624.95,12134.96,-3489.9900000000016,-22.34,44.327,34.426,-9.901,-22.34,0.00283693707819865
|
| 530 |
+
Bihar,Jehanabad,11857.02,8162.56,-3694.46,-31.16,32.07,22.077,-9.993,-31.16,0.002704726820061027
|
| 531 |
+
Maharashtra,Akola,9790.55,4829.22,-4961.329999999999,-50.67,19.785,9.759,-10.026,-50.67,0.0020208262048608096
|
| 532 |
+
Himachal Pradesh,Kangra,16610.78,13056.03,-3554.749999999998,-21.4,47.269,37.153,-10.116,-21.4,0.002845682141356396
|
| 533 |
+
Meghalaya,East Khasi Hills,12863.56,9182.19,-3681.369999999999,-28.62,35.419,25.283,-10.136,-28.62,0.002753436840190429
|
| 534 |
+
Tamil Nadu,Dharmapuri,28769.36,25304.62,-3464.7400000000016,-12.04,84.86,74.64,-10.22,-12.04,0.002949665894548923
|
| 535 |
+
Chhattisgarh,Balrampur,14144.84,10474.94,-3669.8999999999996,-25.95,39.551,29.289,-10.262,-25.95,0.0027961433285919107
|
| 536 |
+
Arunachal Pradesh,Upper Subansiri,9496.64,5242.81,-4253.829999999999,-44.79,23.102,12.754,-10.348,-44.79,0.0024326498635306804
|
| 537 |
+
Bihar,Madhubani,22131.05,18548.69,-3582.3600000000006,-16.19,64.036,53.67,-10.366,-16.19,0.0028934912713133813
|
| 538 |
+
Madhya Pradesh,Betul,14066.56,10247.74,-3818.8199999999997,-27.15,39.041,28.442,-10.599,-27.15,0.002775447586332408
|
| 539 |
+
Karnataka,Chikkaballapura,11493.44,7466.72,-4026.7200000000003,-35.03,30.365,19.727,-10.638,-35.03,0.002641941838126792
|
| 540 |
+
Assam,Hailakandi,10464.21,5119.54,-5344.669999999999,-51.08,20.85,10.201,-10.649,-51.07,0.001992505884342918
|
| 541 |
+
Kerala,Wayanad,14899.67,11073.76,-3825.91,-25.68,41.696,30.989,-10.707,-25.68,0.0027984512408664084
|
| 542 |
+
Chhattisgarh,Gaurela Pendra Marwahi,10303.55,5977.51,-4326.039999999999,-41.99,25.688,14.903,-10.785,-41.98,0.00249312130285193
|
| 543 |
+
Mizoram,Lawngtlai,12818.41,8790.13,-4028.2800000000007,-31.43,34.548,23.691,-10.857,-31.43,0.0026951860644182863
|
| 544 |
+
Jharkhand,Chatra,20374.62,16559.1,-3815.5200000000004,-18.73,58.519,47.56,-10.959,-18.73,0.002872151726019921
|
| 545 |
+
Andhra Pradesh,Nandyal,25448.99,21669.74,-3779.25,-14.85,74.302,63.268,-11.034,-14.85,0.0029196443552376734
|
| 546 |
+
Andhra Pradesh,Chittoor,29200.4,25449.55,-3750.850000000002,-12.85,86.016,74.967,-11.049,-12.85,0.0029457130724236656
|
| 547 |
+
Madhya Pradesh,Anuppur,16883.64,12954.53,-3929.1099999999988,-23.27,47.759,36.645,-11.114,-23.27,0.002828714661056502
|
| 548 |
+
Uttar Pradesh,Bahraich,38940.81,35188.06,-3752.75,-9.64,116.049,104.865,-11.184,-9.64,0.0029801383176158895
|
| 549 |
+
Chhattisgarh,Surguja,10510.73,5915.05,-4595.679999999999,-43.72,25.819,14.53,-11.289,-43.72,0.0024564421310413263
|
| 550 |
+
Uttar Pradesh,Mathura,11311.04,6924.87,-4386.170000000001,-38.78,29.154,17.849,-11.305,-38.78,0.0025774818230684354
|
| 551 |
+
Uttarakhand,Uttar Kashi,11087.68,6521.34,-4566.34,-41.18,27.913,16.417,-11.496,-41.19,0.0025174788594187424
|
| 552 |
+
Bihar,Bhojpur,15100.03,10940.09,-4159.9400000000005,-27.55,41.854,30.324,-11.53,-27.55,0.0027717825726174053
|
| 553 |
+
Telangana,Suryapet,19914.79,15865.94,-4048.8500000000004,-20.33,56.85,45.292,-11.558,-20.33,0.002854662288680925
|
| 554 |
+
Madhya Pradesh,Ashok Nagar,11232.36,6636.47,-4595.89,-40.92,28.329,16.738,-11.591,-40.92,0.0025220879672660066
|
| 555 |
+
Telangana,Mancherial,10785.32,5709.16,-5076.16,-47.07,25.376,13.433,-11.943,-47.06,0.0023528277325104868
|
| 556 |
+
Madhya Pradesh,Guna,11406.57,5717.81,-5688.759999999999,-49.87,24.136,12.099,-12.037,-49.87,0.0021159735135101963
|
| 557 |
+
Rajasthan,Bharatpur,11245.4,6343.53,-4901.87,-43.59,27.734,15.645,-12.089,-43.59,0.0024662528678392947
|
| 558 |
+
Maharashtra,Nashik,14313.02,9776.78,-4536.24,-31.69,38.496,26.295,-12.201,-31.69,0.002689579138434796
|
| 559 |
+
Kerala,Kannur,18684.79,14361.5,-4323.290000000001,-23.14,52.872,40.638,-12.234,-23.14,0.0028296812541109637
|
| 560 |
+
Uttar Pradesh,Budaun,14600.0,10070.39,-4529.610000000001,-31.02,39.583,27.302,-12.281,-31.03,0.0027111643835616437
|
| 561 |
+
Madhya Pradesh,Chhindwara,28645.85,24430.15,-4215.699999999997,-14.72,83.681,71.366,-12.315,-14.72,0.002921225936741273
|
| 562 |
+
Uttar Pradesh,Kushi Nagar,19963.69,15611.35,-4352.339999999998,-21.8,56.74,44.37,-12.37,-21.8,0.002842159941373564
|
| 563 |
+
Assam,Udalguri,11340.85,6215.46,-5125.39,-45.19,27.504,15.074,-12.43,-45.19,0.0024252150412006155
|
| 564 |
+
Chhattisgarh,Jashpur,19228.02,14830.44,-4397.58,-22.87,54.441,41.99,-12.451,-22.87,0.0028313367679043398
|
| 565 |
+
Uttar Pradesh,Auraiya,12628.94,6144.86,-6484.080000000001,-51.34,24.819,12.076,-12.743,-51.34,0.0019652480730766
|
| 566 |
+
Meghalaya,South West Garo Hills,11746.32,6421.98,-5324.34,-45.33,28.456,15.558,-12.898,-45.33,0.0024225459548181897
|
| 567 |
+
Uttar Pradesh,Moradabad,14381.46,9535.17,-4846.289999999999,-33.7,38.376,25.444,-12.932,-33.7,0.002668435610848968
|
| 568 |
+
Maharashtra,Dhule,11760.61,6193.98,-5566.630000000001,-47.33,27.642,14.558,-13.084,-47.33,0.0023503882876823564
|
| 569 |
+
Jharkhand,Dumka,19096.47,14422.66,-4673.810000000001,-24.47,53.707,40.562,-13.145,-24.48,0.0028124045962421326
|
| 570 |
+
Kerala,Malappuram,18858.68,14167.44,-4691.24,-24.88,52.92,39.756,-13.164,-24.88,0.0028061348938525923
|
| 571 |
+
Uttar Pradesh,Sultanpur,27055.56,22495.24,-4560.32,-16.86,78.195,65.015,-13.18,-16.86,0.0028901637962769943
|
| 572 |
+
Madhya Pradesh,Khargone,17612.71,12878.26,-4734.449999999999,-26.88,49.054,35.868,-13.186,-26.88,0.002785147771126647
|
| 573 |
+
Meghalaya,North Garo Hills,12018.96,6506.77,-5512.189999999999,-45.86,28.898,15.645,-13.253,-45.86,0.0024043677655970234
|
| 574 |
+
Bihar,Patna,19942.98,15195.28,-4747.699999999999,-23.81,56.338,42.926,-13.412,-23.81,0.00282495394369347
|
| 575 |
+
Tripura,Dhalai,24957.5,20250.39,-4707.110000000001,-18.86,71.669,58.152,-13.517,-18.86,0.002871641791044776
|
| 576 |
+
Chhattisgarh,Mungeli,14674.75,9553.08,-5121.67,-34.9,38.776,25.243,-13.533,-34.9,0.002642361880100172
|
| 577 |
+
Maharashtra,Washim,14826.09,7134.65,-7691.4400000000005,-51.88,26.187,12.602,-13.585,-51.88,0.0017662782297962578
|
| 578 |
+
Madhya Pradesh,Damoh,20460.75,15644.48,-4816.27,-23.54,57.838,44.223,-13.615,-23.54,0.002826778099532031
|
| 579 |
+
Madhya Pradesh,Alirajpur,13119.89,7646.46,-5473.429999999999,-41.72,32.83,19.134,-13.696,-41.72,0.0025023075650786705
|
| 580 |
+
Bihar,Lakhisarai,13302.99,7806.52,-5496.469999999999,-41.32,33.486,19.65,-13.836,-41.32,0.0025171784689006003
|
| 581 |
+
Madhya Pradesh,Sheopur,12448.9,6440.0,-6008.9,-48.27,28.709,14.852,-13.857,-48.27,0.002306147531107166
|
| 582 |
+
Manipur,Imphal East,15420.98,7400.31,-8020.669999999999,-52.01,26.955,12.935,-14.02,-52.01,0.0017479433862179965
|
| 583 |
+
Jammu and Kashmir,Kupwara,14803.26,9438.86,-5364.4,-36.24,38.793,24.735,-14.058,-36.24,0.0026205714146748755
|
| 584 |
+
Uttar Pradesh,Jaunpur,39425.13,34624.39,-4800.739999999998,-12.18,116.275,102.116,-14.159,-12.18,0.00294926104238591
|
| 585 |
+
Telangana,Siddipet,17696.39,12537.31,-5159.08,-29.15,48.615,34.442,-14.173,-29.15,0.0027471704681011214
|
| 586 |
+
Chhattisgarh,Janjgir-Champa,15997.51,7655.6,-8341.91,-52.15,27.66,13.237,-14.423,-52.14,0.001729019078594106
|
| 587 |
+
Kerala,Kollam,32211.56,27169.7,-5041.860000000001,-15.65,93.768,79.091,-14.677,-15.65,0.002911004620701388
|
| 588 |
+
Chhattisgarh,Bemetara,13721.91,7703.85,-6018.0599999999995,-43.86,33.681,18.909,-14.772,-43.86,0.0024545416782357555
|
| 589 |
+
Karnataka,Kolar,14916.09,9171.84,-5744.25,-38.51,38.543,23.7,-14.843,-38.51,0.002583988163117814
|
| 590 |
+
Jharkhand,Palamu,19396.15,14026.73,-5369.420000000002,-27.68,53.749,38.87,-14.879,-27.68,0.002771116948466577
|
| 591 |
+
Odisha,Kalahandi,31155.63,25987.52,-5168.110000000001,-16.59,90.118,75.169,-14.949,-16.59,0.0028925109201771875
|
| 592 |
+
Bihar,Nawada,31475.44,26296.35,-5179.09,-16.45,91.051,76.069,-14.982,-16.45,0.002892763373601767
|
| 593 |
+
Bihar,Darbhanga,26783.76,21553.2,-5230.559999999998,-19.53,76.72,61.737,-14.983,-19.53,0.0028644223215859165
|
| 594 |
+
Telangana,Kamareddy,14712.49,8712.32,-6000.17,-40.78,37.214,22.037,-15.177,-40.78,0.002529415483035163
|
| 595 |
+
Uttar Pradesh,Hamirpur,17268.67,11587.94,-5680.729999999998,-32.9,46.167,30.98,-15.187,-32.9,0.0026734542961328236
|
| 596 |
+
Bihar,Purbi Champaran,41023.03,35863.21,-5159.82,-12.58,120.91,105.702,-15.208,-12.58,0.0029473688316050766
|
| 597 |
+
Kerala,Kozhikode,34512.66,29295.16,-5217.500000000004,-15.12,100.609,85.399,-15.21,-15.12,0.002915133171421733
|
| 598 |
+
Karnataka,Hassan,15584.31,9749.38,-5834.93,-37.44,40.7,25.461,-15.239,-37.44,0.002611601026930291
|
| 599 |
+
Tripura,Gomati,18183.65,12566.53,-5617.120000000001,-30.89,49.338,34.097,-15.241,-30.89,0.00271331663334919
|
| 600 |
+
Uttar Pradesh,Ghazipur,29772.78,24436.07,-5336.709999999999,-17.92,85.766,70.393,-15.373,-17.92,0.002880684974664778
|
| 601 |
+
Jharkhand,Latehar,18732.23,13095.88,-5636.35,-30.09,51.176,35.778,-15.398,-30.09,0.002731975851246755
|
| 602 |
+
Rajasthan,Chittorgarh,19541.01,13896.4,-5644.609999999999,-28.89,53.741,38.217,-15.524,-28.89,0.002750164909592698
|
| 603 |
+
Jharkhand,Hazaribagh,17620.63,11800.57,-5820.060000000001,-33.03,47.104,31.546,-15.558,-33.03,0.0026732301852998444
|
| 604 |
+
Rajasthan,Bundi,15856.22,9834.7,-6021.519999999999,-37.98,41.12,25.504,-15.616,-37.98,0.002593304078777918
|
| 605 |
+
Karnataka,Kalaburagi,19737.7,13930.74,-5806.960000000001,-29.42,54.121,38.198,-15.923,-29.42,0.0027420114805676446
|
| 606 |
+
Telangana,Jagtial,14450.63,7688.0,-6762.629999999999,-46.8,34.114,18.149,-15.965,-46.8,0.002360727525374326
|
| 607 |
+
Bihar,Buxar,18601.21,12531.86,-6069.3499999999985,-32.63,49.755,33.521,-16.234,-32.63,0.0026748259925026387
|
| 608 |
+
Maharashtra,Jalna,25461.84,19706.61,-5755.23,-22.6,72.187,55.87,-16.317,-22.6,0.002835105396939106
|
| 609 |
+
Assam,Dhemaji,15658.21,7786.26,-7871.949999999999,-50.27,32.625,16.223,-16.402,-50.27,0.002083571493804209
|
| 610 |
+
Andhra Pradesh,Annamayya,30000.51,24262.06,-5738.449999999997,-19.13,86.083,69.617,-16.466,-19.13,0.002869384553795919
|
| 611 |
+
Telangana,Kumram Bheem(Asifabad),14917.96,7816.98,-7100.98,-47.6,34.655,18.159,-16.496,-47.6,0.0023230388069146184
|
| 612 |
+
Rajasthan,Kota,16670.04,10138.92,-6531.120000000001,-39.18,42.786,26.023,-16.763,-39.18,0.0025666405119603793
|
| 613 |
+
Himachal Pradesh,Chamba,24035.13,18024.08,-6011.049999999999,-25.01,67.436,50.571,-16.865,-25.01,0.0028057264512403304
|
| 614 |
+
Uttar Pradesh,Rampur,16375.64,9653.41,-6722.23,-41.05,41.278,24.333,-16.945,-41.05,0.0025206953743487277
|
| 615 |
+
Chhattisgarh,Raigarh,16507.48,8164.44,-8343.04,-50.54,33.751,16.693,-17.058,-50.54,0.002044588271498739
|
| 616 |
+
Chhattisgarh,Baloda Bazar,20198.92,13878.26,-6320.659999999998,-31.29,54.523,37.462,-17.061,-31.29,0.002699302734997713
|
| 617 |
+
Bihar,Bhagalpur,19540.99,13008.27,-6532.720000000001,-33.43,52.181,34.736,-17.445,-33.43,0.002670335535712366
|
| 618 |
+
Kerala,Alappuzha,35646.37,29590.38,-6055.990000000002,-16.99,102.95,85.46,-17.49,-16.99,0.0028880921114828803
|
| 619 |
+
Rajasthan,Rajsamand,35070.63,28971.82,-6098.809999999998,-17.39,101.202,83.603,-17.599,-17.39,0.0028856624474667266
|
| 620 |
+
Tamil Nadu,Virudhunagar,34004.69,27818.51,-6186.180000000004,-18.19,97.725,79.947,-17.778,-18.19,0.0028738682811106347
|
| 621 |
+
Telangana,Bhadradri Kothagudem,17547.88,10579.01,-6968.870000000001,-39.71,44.936,27.09,-17.846,-39.71,0.0025607651750524847
|
| 622 |
+
Bihar,Muzaffarpur,31609.49,25352.01,-6257.480000000003,-19.8,90.473,72.563,-17.91,-19.8,0.0028622100514750473
|
| 623 |
+
Bihar,Munger,18270.39,11209.96,-7060.43,-38.64,47.163,28.937,-18.226,-38.64,0.002581389888228987
|
| 624 |
+
Telangana,Nirmal,20101.47,13247.03,-6854.4400000000005,-34.1,53.53,35.277,-18.253,-34.1,0.0026629893236663786
|
| 625 |
+
Bihar,Siwan,19167.54,12093.5,-7074.040000000001,-36.91,50.155,31.645,-18.51,-36.91,0.002616663379859909
|
| 626 |
+
Chhattisgarh,Raipur,21199.5,14140.67,-7058.83,-33.3,56.614,37.763,-18.851,-33.3,0.002670534682421755
|
| 627 |
+
Maharashtra,Nanded,24266.9,17224.73,-7042.170000000002,-29.02,66.715,47.355,-19.36,-29.02,0.002749218070705364
|
| 628 |
+
Chhattisgarh,Korba,18214.47,10372.17,-7842.300000000001,-43.06,45.103,25.684,-19.419,-43.05,0.0024762180837542896
|
| 629 |
+
Jammu and Kashmir,Rajauri,17857.6,9070.9,-8786.699999999999,-49.2,39.503,20.066,-19.437,-49.2,0.0022121113699489297
|
| 630 |
+
Telangana,Adilabad,18669.8,10781.2,-7888.5999999999985,-42.25,46.42,26.806,-19.614,-42.25,0.002486368359596782
|
| 631 |
+
Bihar,Rohtas,19826.86,12217.96,-7608.9000000000015,-38.38,51.245,31.579,-19.666,-38.38,0.0025846250994862523
|
| 632 |
+
Uttar Pradesh,Firozabad,18002.93,9409.44,-8593.49,-47.73,41.815,21.855,-19.96,-47.73,0.0023226774752776354
|
| 633 |
+
Madhya Pradesh,Seoni,24724.12,17317.92,-7406.200000000001,-29.96,67.58,47.336,-20.244,-29.96,0.0027333632096915887
|
| 634 |
+
Odisha,Nuapada,19930.29,9804.04,-10126.25,-50.81,40.188,19.769,-20.419,-50.81,0.0020164282607026793
|
| 635 |
+
Gujarat,Panch Mahals,18705.8,9526.75,-9179.05,-49.07,41.638,21.206,-20.432,-49.07,0.0022259406173486297
|
| 636 |
+
Andhra Pradesh,Vizianagaram,61304.86,54331.55,-6973.309999999998,-11.37,181.374,160.743,-20.631,-11.37,0.00295855826112318
|
| 637 |
+
Rajasthan,Churu,35031.76,27769.01,-7262.750000000004,-20.73,99.77,79.086,-20.684,-20.73,0.0028479870837206005
|
| 638 |
+
Bihar,Pashchim Champaran,24053.8,16301.79,-7752.009999999998,-32.23,64.524,43.729,-20.795,-32.23,0.002682486758848914
|
| 639 |
+
Telangana,Khammam,20508.11,12281.38,-8226.730000000001,-40.11,52.08,31.188,-20.892,-40.12,0.002539483160564284
|
| 640 |
+
Bihar,Samastipur,37753.31,30430.99,-7322.319999999996,-19.4,108.285,87.283,-21.002,-19.4,0.002868225329116838
|
| 641 |
+
Karnataka,Ballari,20827.49,12639.69,-8187.800000000001,-39.31,53.437,32.43,-21.007,-39.31,0.002565695626309267
|
| 642 |
+
Uttar Pradesh,Mainpuri,19129.36,10305.03,-8824.33,-46.13,45.906,24.73,-21.176,-46.13,0.0023997666414349457
|
| 643 |
+
Uttar Pradesh,Jalaun,29330.76,21524.81,-7805.949999999997,-26.61,81.77,60.008,-21.762,-26.61,0.002787858207560936
|
| 644 |
+
Jharkhand,Deoghar,22226.47,13726.38,-8500.090000000002,-38.24,57.596,35.57,-22.026,-38.24,0.002591324668289656
|
| 645 |
+
Rajasthan,Baran,21702.55,12967.67,-8734.88,-40.25,55.091,32.918,-22.173,-40.25,0.0025384574623719333
|
| 646 |
+
Karnataka,Chitradurga,22807.82,14329.31,-8478.51,-37.17,59.668,37.487,-22.181,-37.17,0.002616120260507142
|
| 647 |
+
Rajasthan,Sirohi,23668.94,15186.71,-8482.23,-35.84,62.321,39.987,-22.334,-35.84,0.0026330287710391764
|
| 648 |
+
Tamil Nadu,Salem,42525.61,34732.45,-7793.1600000000035,-18.33,122.174,99.785,-22.389,-18.33,0.002872951146379793
|
| 649 |
+
Rajasthan,Pali,31508.66,23502.16,-8006.5,-25.41,88.242,65.819,-22.423,-25.41,0.0028005634006650873
|
| 650 |
+
Bihar,Saran,29369.01,21160.3,-8208.71,-27.95,81.016,58.372,-22.644,-27.95,0.002758553999607069
|
| 651 |
+
Bihar,Purnia,27039.1,18397.28,-8641.82,-31.96,72.584,49.386,-23.198,-31.96,0.002684408874555736
|
| 652 |
+
Tamil Nadu,Ranipet,25289.2,16496.76,-8792.440000000002,-34.77,66.911,43.648,-23.263,-34.77,0.0026458330038119037
|
| 653 |
+
Karnataka,Bidar,22142.02,11040.02,-11102.0,-50.14,46.417,23.144,-23.273,-50.14,0.0020963308677347417
|
| 654 |
+
Telangana,Vikarabad,22860.64,13598.53,-9262.109999999999,-40.52,57.958,34.476,-23.482,-40.52,0.002535274602985743
|
| 655 |
+
Uttar Pradesh,Maharajganj,28312.51,19604.32,-8708.189999999999,-30.76,76.975,53.299,-23.676,-30.76,0.0027187628366400576
|
| 656 |
+
Chhattisgarh,Bilaspur,21653.66,11896.43,-9757.23,-45.06,52.635,28.917,-23.718,-45.06,0.002430766900376195
|
| 657 |
+
Odisha,Kandhamal,31893.37,23362.79,-8530.579999999998,-26.75,88.861,65.093,-23.768,-26.75,0.0027861903586858337
|
| 658 |
+
Uttar Pradesh,Chandauli,22471.24,11354.36,-11116.880000000001,-49.47,49.156,24.838,-24.318,-49.47,0.0021875072314656422
|
| 659 |
+
Andhra Pradesh,Kakinada,25518.15,16202.67,-9315.480000000001,-36.51,66.84,42.44,-24.4,-36.51,0.0026193121366556746
|
| 660 |
+
Madhya Pradesh,Dhar,22382.24,11429.06,-10953.180000000002,-48.94,50.067,25.566,-24.501,-48.94,0.0022369074766421947
|
| 661 |
+
Odisha,Sundargarh,32692.36,23598.46,-9093.900000000001,-27.82,90.337,65.208,-25.129,-27.82,0.002763244990572721
|
| 662 |
+
Bihar,Araria,24187.38,13547.11,-10640.27,-43.99,59.331,33.231,-26.1,-43.99,0.0024529734101006394
|
| 663 |
+
Karnataka,Tumakuru,24323.11,12322.59,-12000.52,-49.34,53.271,26.988,-26.283,-49.34,0.0021901393366226605
|
| 664 |
+
Tamil Nadu,Ariyalur,24033.5,12850.52,-11182.98,-46.53,56.909,30.429,-26.48,-46.53,0.0023679031352071066
|
| 665 |
+
Bihar,Gaya,45679.72,36331.56,-9348.160000000003,-20.46,130.367,103.688,-26.679,-20.46,0.0028539360574014023
|
| 666 |
+
Chhattisgarh,Kawardha,26421.9,15363.74,-11058.160000000002,-41.85,65.999,38.377,-27.622,-41.85,0.0024978900079101044
|
| 667 |
+
Maharashtra,Gondia,34159.49,23972.56,-10186.929999999997,-29.82,93.475,65.599,-27.876,-29.82,0.0027364284419937185
|
| 668 |
+
Uttar Pradesh,Hardoi,32387.17,21862.91,-10524.259999999998,-32.5,86.694,58.523,-28.171,-32.49,0.002676800720779247
|
| 669 |
+
Bihar,Kishanganj,25780.32,13853.45,-11926.869999999999,-46.26,61.547,33.073,-28.474,-46.26,0.0023873636944770275
|
| 670 |
+
Maharashtra,Yavatmal,26865.05,15226.4,-11638.65,-43.32,66.292,37.573,-28.719,-43.32,0.0024675926529077745
|
| 671 |
+
Rajasthan,Jaisalmer,30664.11,19306.12,-11357.990000000002,-37.04,80.234,50.515,-29.719,-37.04,0.002616544227111108
|
| 672 |
+
Uttar Pradesh,Gorakhpur,26825.72,14164.2,-12661.52,-47.2,63.051,33.291,-29.76,-47.2,0.00235039357750696
|
| 673 |
+
Bihar,Vaishali,28966.34,16959.44,-12006.900000000001,-41.45,72.584,42.497,-30.087,-41.45,0.002505805013681397
|
| 674 |
+
Bihar,Saharsa,36442.24,25330.99,-11111.249999999996,-30.49,99.406,69.097,-30.309,-30.49,0.0027277686552747583
|
| 675 |
+
Bihar,Nalanda,35181.56,23090.87,-12090.689999999999,-34.37,93.258,61.208,-32.05,-34.37,0.002650763638678899
|
| 676 |
+
Bihar,Katihar,31709.05,18904.35,-12804.7,-40.38,80.469,47.974,-32.495,-40.38,0.0025377297648463133
|
| 677 |
+
Maharashtra,Amravati,44981.74,33190.9,-11790.839999999997,-26.21,125.621,92.693,-32.928,-26.21,0.002792710997840457
|
| 678 |
+
Tamil Nadu,Namakkal,31198.1,17807.37,-13390.73,-42.92,77.273,44.106,-33.167,-42.92,0.0024768495517355224
|
| 679 |
+
Tamil Nadu,Tiruvallur,43896.18,31920.47,-11975.71,-27.28,121.737,88.525,-33.212,-27.28,0.0027732937125736225
|
| 680 |
+
Rajasthan,Sri Ganganagar,39608.45,27108.25,-12500.199999999997,-31.56,106.567,72.935,-33.632,-31.56,0.0026905117468620965
|
| 681 |
+
Jharkhand,Garhwa,35448.39,22413.07,-13035.32,-36.77,92.846,58.704,-34.142,-36.77,0.0026191880646765624
|
| 682 |
+
Andhra Pradesh,Eluru,42110.87,29608.99,-12501.880000000001,-29.69,115.324,81.087,-34.237,-29.69,0.002738580323797632
|
| 683 |
+
Tamil Nadu,Tiruvannamalai,78187.39,66262.78,-11924.61,-15.25,227.733,193.001,-34.732,-15.25,0.002912656375919442
|
| 684 |
+
Andhra Pradesh,Sri Sathya Sai,31516.06,16725.0,-14791.060000000001,-46.93,74.247,39.402,-34.845,-46.93,0.0023558465112707614
|
| 685 |
+
Bihar,Jamui,31828.37,16550.37,-15278.0,-48.0,73.878,38.416,-35.462,-48.0,0.002321136772005604
|
| 686 |
+
Rajasthan,Hanumangarh,39268.34,24303.41,-14964.929999999997,-38.11,101.759,62.979,-38.78,-38.11,0.002591375138343001
|
| 687 |
+
Karnataka,Raichur,41250.24,26522.55,-14727.689999999999,-35.7,108.725,69.907,-38.818,-35.7,0.0026357422405299943
|
| 688 |
+
Tamil Nadu,Madurai,40407.61,24414.36,-15993.25,-39.58,103.623,62.609,-41.014,-39.58,0.002564442687899631
|
| 689 |
+
Uttar Pradesh,Azamgarh,38338.5,21268.01,-17070.49,-44.53,93.567,51.906,-41.661,-44.53,0.0024405493172659333
|
| 690 |
+
Maharashtra,Chatrapati Sambhaji Nagar,48511.79,32812.66,-15699.129999999997,-32.36,130.042,87.958,-42.084,-32.36,0.0026806267095071117
|
| 691 |
+
Andhra Pradesh,Anakapalli,42445.38,25872.53,-16572.85,-39.05,109.084,66.492,-42.592,-39.05,0.0025699852374981686
|
| 692 |
+
Tamil Nadu,Tiruchirappalli,49657.91,33388.75,-16269.160000000003,-32.76,132.794,89.287,-43.507,-32.76,0.0026741761785785993
|
| 693 |
+
Uttar Pradesh,Basti,44631.62,27742.1,-16889.520000000004,-37.84,116.275,72.274,-44.001,-37.84,0.0026052157640704056
|
| 694 |
+
Kerala,Thiruvananthapuram,43343.3,26072.23,-17271.070000000003,-39.85,110.654,66.562,-44.092,-39.85,0.002552966663821167
|
| 695 |
+
Madhya Pradesh,Morena,40950.05,22278.84,-18671.210000000003,-45.6,98.501,53.589,-44.912,-45.6,0.0024053938884079505
|
| 696 |
+
Rajasthan,Dungarpur,75259.77,59355.17,-15904.600000000006,-21.13,214.214,168.944,-45.27,-21.13,0.0028463281245743907
|
| 697 |
+
Rajasthan,Pratapgarh,41853.16,23049.91,-18803.250000000004,-44.93,101.776,56.051,-45.725,-44.93,0.0024317399211911356
|
| 698 |
+
Andhra Pradesh,Parvathipuram Manyam,43888.63,25285.58,-18603.049999999996,-42.39,109.104,62.858,-46.246,-42.39,0.0024859285878825565
|
| 699 |
+
Tamil Nadu,Kallakurichi,46627.78,22874.65,-23753.129999999997,-50.94,93.189,45.717,-47.472,-50.94,0.001998572524791015
|
| 700 |
+
Andhra Pradesh,Srikakulam,70040.57,52991.94,-17048.630000000005,-24.34,197.023,149.065,-47.958,-24.34,0.0028129839605817025
|
| 701 |
+
Maharashtra,Beed,47848.46,27950.73,-19897.73,-41.58,119.751,69.953,-49.798,-41.58,0.0025027137759501563
|
| 702 |
+
Tamil Nadu,Thanjavur,47953.32,24742.85,-23210.47,-48.4,109.921,56.717,-53.204,-48.4,0.0022922500465035584
|
| 703 |
+
Tamil Nadu,Cuddalore,53050.01,28436.33,-24613.68,-46.4,126.522,67.819,-58.703,-46.4,0.0023849571376141116
|
| 704 |
+
Andhra Pradesh,Alluri Sitharama Raju,54797.3,27541.7,-27255.600000000002,-49.74,119.446,60.035,-59.411,-49.74,0.0021797789307137394
|
| 705 |
+
Tamil Nadu,Villupuram,60813.4,34873.88,-25939.520000000004,-42.65,150.844,86.503,-64.341,-42.65,0.002480440166147592
|
| 706 |
+
Rajasthan,Udaipur,60574.8,32307.89,-28266.910000000003,-46.66,143.347,76.455,-66.892,-46.66,0.0023664461129050364
|
| 707 |
+
Rajasthan,Barmer,92571.15,68182.19,-24388.959999999992,-26.35,258.506,190.4,-68.106,-26.35,0.00279251148981081
|
data/scraper/mnrega_scraper.py
ADDED
|
@@ -0,0 +1,312 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
mnrega_scraper.py
|
| 3 |
+
-----------------
|
| 4 |
+
Real MNREGA data scraper for nreganarep.nic.in
|
| 5 |
+
|
| 6 |
+
STRATEGY:
|
| 7 |
+
The portal has captchas on the main MIS page, but the R14 district-level
|
| 8 |
+
consolidated summary reports are accessible via direct GET URLs.
|
| 9 |
+
|
| 10 |
+
R14 report gives per-district per-year:
|
| 11 |
+
- Households demanded / offered / availed
|
| 12 |
+
- Person days (total, SC, ST, Women)
|
| 13 |
+
- Expenditure (Rs. lakhs)
|
| 14 |
+
- Average wage rate
|
| 15 |
+
- Works completed / in progress
|
| 16 |
+
|
| 17 |
+
Two-step approach:
|
| 18 |
+
Step 1: Fetch state-level page β extract district links (which have
|
| 19 |
+
embedded Digest tokens needed to access sub-pages)
|
| 20 |
+
Step 2: Follow each district link β parse the HTML table
|
| 21 |
+
|
| 22 |
+
HOW TO RUN:
|
| 23 |
+
pip install requests beautifulsoup4 lxml
|
| 24 |
+
|
| 25 |
+
# Maharashtra only (fast, ~2-5 min):
|
| 26 |
+
python data/scraper/mnrega_scraper.py --state Maharashtra
|
| 27 |
+
|
| 28 |
+
# All India (slow, ~30-60 min):
|
| 29 |
+
python data/scraper/mnrega_scraper.py --all-india
|
| 30 |
+
|
| 31 |
+
# Resume after interruption:
|
| 32 |
+
python data/scraper/mnrega_scraper.py --all-india --resume
|
| 33 |
+
|
| 34 |
+
# Custom year range:
|
| 35 |
+
python data/scraper/mnrega_scraper.py --state Maharashtra --years 2018-2019 2023-2024
|
| 36 |
+
|
| 37 |
+
OUTPUT:
|
| 38 |
+
data/raw/mnrega_real_data.csv
|
| 39 |
+
β drop this in as replacement for mnrega_india_unified.csv
|
| 40 |
+
β run: python main.py --stage 3
|
| 41 |
+
"""
|
| 42 |
+
|
| 43 |
+
import os, json, time, argparse
|
| 44 |
+
import requests
|
| 45 |
+
from bs4 import BeautifulSoup
|
| 46 |
+
import pandas as pd
|
| 47 |
+
from datetime import datetime
|
| 48 |
+
|
| 49 |
+
# ββ State codes ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 50 |
+
STATE_CODES = {
|
| 51 |
+
"Andhra Pradesh": "02",
|
| 52 |
+
"Arunachal Pradesh": "03",
|
| 53 |
+
"Assam": "04",
|
| 54 |
+
"Bihar": "05",
|
| 55 |
+
"Chhattisgarh": "33",
|
| 56 |
+
"Goa": "10",
|
| 57 |
+
"Gujarat": "11",
|
| 58 |
+
"Haryana": "12",
|
| 59 |
+
"Himachal Pradesh": "13",
|
| 60 |
+
"Jharkhand": "34",
|
| 61 |
+
"Karnataka": "15",
|
| 62 |
+
"Kerala": "16",
|
| 63 |
+
"Madhya Pradesh": "17",
|
| 64 |
+
"Maharashtra": "18",
|
| 65 |
+
"Manipur": "19",
|
| 66 |
+
"Meghalaya": "20",
|
| 67 |
+
"Mizoram": "21",
|
| 68 |
+
"Nagaland": "22",
|
| 69 |
+
"Odisha": "24",
|
| 70 |
+
"Punjab": "25",
|
| 71 |
+
"Rajasthan": "27",
|
| 72 |
+
"Sikkim": "28",
|
| 73 |
+
"Tamil Nadu": "29",
|
| 74 |
+
"Telangana": "36",
|
| 75 |
+
"Tripura": "30",
|
| 76 |
+
"Uttar Pradesh": "31",
|
| 77 |
+
"Uttarakhand": "35",
|
| 78 |
+
"West Bengal": "32",
|
| 79 |
+
"Delhi": "07",
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
ALL_YEARS = [
|
| 83 |
+
"2014-2015", "2015-2016", "2016-2017", "2017-2018",
|
| 84 |
+
"2018-2019", "2019-2020", "2020-2021", "2021-2022",
|
| 85 |
+
"2022-2023", "2023-2024"
|
| 86 |
+
]
|
| 87 |
+
|
| 88 |
+
BASE_URL = "https://nreganarep.nic.in/netnrega"
|
| 89 |
+
OUTPUT_PATH = os.path.join("data", "raw", "mnrega_real_data.csv")
|
| 90 |
+
CHECKPOINT_PATH = os.path.join("data", "raw", ".scraper_checkpoint.json")
|
| 91 |
+
DELAY = 1.5
|
| 92 |
+
|
| 93 |
+
HEADERS = {
|
| 94 |
+
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36",
|
| 95 |
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
| 96 |
+
"Referer": "https://nreganarep.nic.in/netnrega/MISreport4.aspx",
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
HIGH_ACTIVITY = {"Rajasthan","Uttar Pradesh","Madhya Pradesh","West Bengal",
|
| 100 |
+
"Andhra Pradesh","Telangana","Jharkhand","Odisha","Chhattisgarh","Bihar"}
|
| 101 |
+
MID_ACTIVITY = {"Maharashtra","Tamil Nadu","Karnataka","Gujarat",
|
| 102 |
+
"Himachal Pradesh","Uttarakhand","Assam"}
|
| 103 |
+
SOUTH = {"Tamil Nadu","Kerala","Karnataka","Andhra Pradesh","Telangana"}
|
| 104 |
+
EAST = {"West Bengal","Odisha","Jharkhand","Bihar","Assam"}
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
class MNREGAScraper:
|
| 108 |
+
|
| 109 |
+
def __init__(self, delay=DELAY):
|
| 110 |
+
self.session = requests.Session()
|
| 111 |
+
self.session.headers.update(HEADERS)
|
| 112 |
+
self.delay = delay
|
| 113 |
+
self.records = []
|
| 114 |
+
self.checkpoint = self._load_checkpoint()
|
| 115 |
+
|
| 116 |
+
# ββ Public ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 117 |
+
|
| 118 |
+
def scrape_state(self, state_name: str, years: list) -> pd.DataFrame:
|
| 119 |
+
code = STATE_CODES.get(state_name)
|
| 120 |
+
if not code:
|
| 121 |
+
raise ValueError(f"Unknown state '{state_name}'. Options: {list(STATE_CODES)}")
|
| 122 |
+
print(f"\n{'='*60}")
|
| 123 |
+
print(f"[scraper] State: {state_name} | Code: {code} | Years: {years[0]}β{years[-1]}")
|
| 124 |
+
print(f"{'='*60}")
|
| 125 |
+
for year in years:
|
| 126 |
+
self._scrape_year(state_name, code, year)
|
| 127 |
+
return self._finalize()
|
| 128 |
+
|
| 129 |
+
def scrape_all_india(self, years: list, resume: bool = False) -> pd.DataFrame:
|
| 130 |
+
done = set(self.checkpoint.get("done", [])) if resume else set()
|
| 131 |
+
total = len(STATE_CODES) * len(years)
|
| 132 |
+
count = 0
|
| 133 |
+
for state_name, code in STATE_CODES.items():
|
| 134 |
+
for year in years:
|
| 135 |
+
count += 1
|
| 136 |
+
key = f"{state_name}|{year}"
|
| 137 |
+
if key in done:
|
| 138 |
+
print(f"[scraper] [{count}/{total}] SKIP {key}")
|
| 139 |
+
continue
|
| 140 |
+
print(f"[scraper] [{count}/{total}] {key}")
|
| 141 |
+
self._scrape_year(state_name, code, year)
|
| 142 |
+
done.add(key)
|
| 143 |
+
self._save_checkpoint(list(done))
|
| 144 |
+
return self._finalize()
|
| 145 |
+
|
| 146 |
+
# ββ Core ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 147 |
+
|
| 148 |
+
def _scrape_year(self, state_name: str, state_code: str, year: str):
|
| 149 |
+
"""Fetch state-year page, find district links, scrape each."""
|
| 150 |
+
url = f"{BASE_URL}/nrega_R14.aspx?state_code={state_code}&fin_year={year}&rpt=RP"
|
| 151 |
+
soup = self._get(url)
|
| 152 |
+
if soup is None:
|
| 153 |
+
return
|
| 154 |
+
|
| 155 |
+
district_links = self._find_district_links(soup)
|
| 156 |
+
|
| 157 |
+
if district_links:
|
| 158 |
+
print(f" β {len(district_links)} districts")
|
| 159 |
+
for name, durl in district_links:
|
| 160 |
+
dsoup = self._get(durl)
|
| 161 |
+
if dsoup:
|
| 162 |
+
rows = self._parse_table(dsoup, state_name, year, name)
|
| 163 |
+
self.records.extend(rows)
|
| 164 |
+
time.sleep(self.delay)
|
| 165 |
+
else:
|
| 166 |
+
# State-level page may already contain the district table
|
| 167 |
+
rows = self._parse_table(soup, state_name, year)
|
| 168 |
+
self.records.extend(rows)
|
| 169 |
+
print(f" β {len(rows)} rows (direct table)")
|
| 170 |
+
|
| 171 |
+
def _get(self, url: str):
|
| 172 |
+
try:
|
| 173 |
+
r = self.session.get(url, timeout=20)
|
| 174 |
+
r.raise_for_status()
|
| 175 |
+
return BeautifulSoup(r.text, "lxml")
|
| 176 |
+
except Exception as e:
|
| 177 |
+
print(f" [ERROR] {url[:80]}... β {e}")
|
| 178 |
+
return None
|
| 179 |
+
|
| 180 |
+
def _find_district_links(self, soup: BeautifulSoup) -> list:
|
| 181 |
+
links = []
|
| 182 |
+
for a in soup.find_all("a", href=True):
|
| 183 |
+
href = a["href"]
|
| 184 |
+
text = a.get_text(strip=True)
|
| 185 |
+
if ("district_code" in href.lower() or "nrega_r14" in href.lower()) and text:
|
| 186 |
+
full = href if href.startswith("http") else f"{BASE_URL}/{href.lstrip('/')}"
|
| 187 |
+
links.append((text.title(), full))
|
| 188 |
+
return links
|
| 189 |
+
|
| 190 |
+
def _parse_table(self, soup, state_name, year, district_hint=None):
|
| 191 |
+
records = []
|
| 192 |
+
for table in soup.find_all("table"):
|
| 193 |
+
headers = [th.get_text(" ", strip=True).lower() for th in table.find_all("th")]
|
| 194 |
+
joined = " ".join(headers)
|
| 195 |
+
if not any(k in joined for k in ["person", "household", "expenditure"]):
|
| 196 |
+
continue
|
| 197 |
+
for row in table.find_all("tr")[1:]:
|
| 198 |
+
cells = [td.get_text(strip=True) for td in row.find_all("td")]
|
| 199 |
+
r = self._map(cells, state_name, year, district_hint)
|
| 200 |
+
if r:
|
| 201 |
+
records.append(r)
|
| 202 |
+
return records
|
| 203 |
+
|
| 204 |
+
def _map(self, cells, state_name, year, district_hint=None):
|
| 205 |
+
def num(v):
|
| 206 |
+
try: return float(str(v).replace(",","").replace("-","0") or 0)
|
| 207 |
+
except: return 0.0
|
| 208 |
+
|
| 209 |
+
if len(cells) < 6:
|
| 210 |
+
return None
|
| 211 |
+
|
| 212 |
+
district = district_hint or cells[0]
|
| 213 |
+
if not district or str(district).isdigit() or len(str(district)) < 3:
|
| 214 |
+
return None
|
| 215 |
+
|
| 216 |
+
# Skip subtotal/total rows
|
| 217 |
+
dl = district.lower()
|
| 218 |
+
if any(t in dl for t in ["total", "grand", "state"]):
|
| 219 |
+
return None
|
| 220 |
+
|
| 221 |
+
# Person days in R14 are in actual days, convert to lakhs
|
| 222 |
+
pd_raw = num(cells[4]) if len(cells) > 4 else 0
|
| 223 |
+
pd_lakhs = round(pd_raw / 1e5, 3) if pd_raw > 1000 else pd_raw # already lakhs?
|
| 224 |
+
|
| 225 |
+
exp_raw = num(cells[8]) if len(cells) > 8 else 0
|
| 226 |
+
exp_lakhs = round(exp_raw / 1e5, 2) if exp_raw > 1e5 else exp_raw
|
| 227 |
+
|
| 228 |
+
# Clean year format: 2023-2024 β 2023-24
|
| 229 |
+
yr_parts = year.split("-")
|
| 230 |
+
fin_year = f"{yr_parts[0]}-{yr_parts[1][2:]}" if len(yr_parts) == 2 else year
|
| 231 |
+
|
| 232 |
+
return {
|
| 233 |
+
"state": state_name,
|
| 234 |
+
"district": str(district).title().strip(),
|
| 235 |
+
"financial_year": fin_year,
|
| 236 |
+
"region": "South" if state_name in SOUTH else ("East" if state_name in EAST else "Other"),
|
| 237 |
+
"state_category": "high" if state_name in HIGH_ACTIVITY else ("mid" if state_name in MID_ACTIVITY else "low"),
|
| 238 |
+
"person_days_lakhs": pd_lakhs,
|
| 239 |
+
"expenditure_lakhs": exp_lakhs,
|
| 240 |
+
"avg_wage_rate": num(cells[9]) if len(cells) > 9 else None,
|
| 241 |
+
"households_demanded": num(cells[1]) if len(cells) > 1 else None,
|
| 242 |
+
"households_offered": num(cells[2]) if len(cells) > 2 else None,
|
| 243 |
+
"households_availed": num(cells[3]) if len(cells) > 3 else None,
|
| 244 |
+
"works_completed": num(cells[10]) if len(cells) > 10 else None,
|
| 245 |
+
# Stage 2/3 β fill via enrich.py with IMD/census/PMKISAN data
|
| 246 |
+
"rainfall_mm": None,
|
| 247 |
+
"crop_season_index": None,
|
| 248 |
+
"rural_population_lakhs": None,
|
| 249 |
+
"poverty_rate_pct": None,
|
| 250 |
+
"pmkisan_beneficiaries": None,
|
| 251 |
+
"pmkisan_amount_lakhs": None,
|
| 252 |
+
"pmay_houses_sanctioned": None,
|
| 253 |
+
"pmay_houses_completed": None,
|
| 254 |
+
"pmay_expenditure_lakhs": None,
|
| 255 |
+
"budget_allocated_lakhs": round(exp_lakhs * 1.12, 2) if exp_lakhs else None,
|
| 256 |
+
}
|
| 257 |
+
|
| 258 |
+
# ββ Persistence βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 259 |
+
|
| 260 |
+
def _finalize(self) -> pd.DataFrame:
|
| 261 |
+
df = pd.DataFrame(self.records)
|
| 262 |
+
os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)
|
| 263 |
+
df.to_csv(OUTPUT_PATH, index=False)
|
| 264 |
+
print(f"\n{'='*60}")
|
| 265 |
+
print(f"[scraper] DONE: {len(df)} rows | {df['district'].nunique() if len(df) else 0} districts")
|
| 266 |
+
print(f"[scraper] Saved β {OUTPUT_PATH}")
|
| 267 |
+
print(f"[scraper] Next step: copy this to data/raw/mnrega_india_unified.csv")
|
| 268 |
+
print(f" then run: python main.py --stage 3")
|
| 269 |
+
print(f"{'='*60}")
|
| 270 |
+
return df
|
| 271 |
+
|
| 272 |
+
def _save_checkpoint(self, done):
|
| 273 |
+
os.makedirs(os.path.dirname(CHECKPOINT_PATH), exist_ok=True)
|
| 274 |
+
with open(CHECKPOINT_PATH, "w") as f:
|
| 275 |
+
json.dump({"done": done, "ts": str(datetime.now())}, f)
|
| 276 |
+
|
| 277 |
+
def _load_checkpoint(self):
|
| 278 |
+
if os.path.exists(CHECKPOINT_PATH):
|
| 279 |
+
with open(CHECKPOINT_PATH) as f:
|
| 280 |
+
return json.load(f)
|
| 281 |
+
return {}
|
| 282 |
+
|
| 283 |
+
|
| 284 |
+
# ββ CLI βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 285 |
+
|
| 286 |
+
if __name__ == "__main__":
|
| 287 |
+
ap = argparse.ArgumentParser()
|
| 288 |
+
ap.add_argument("--state", type=str, help="Single state e.g. 'Maharashtra'")
|
| 289 |
+
ap.add_argument("--all-india", action="store_true")
|
| 290 |
+
ap.add_argument("--resume", action="store_true", help="Resume from checkpoint")
|
| 291 |
+
ap.add_argument("--years", nargs=2, default=["2014-2015", "2023-2024"],
|
| 292 |
+
metavar=("START", "END"),
|
| 293 |
+
help="e.g. --years 2018-2019 2023-2024")
|
| 294 |
+
ap.add_argument("--delay", type=float, default=1.5)
|
| 295 |
+
args = ap.parse_args()
|
| 296 |
+
|
| 297 |
+
start = int(args.years[0].split("-")[0])
|
| 298 |
+
end = int(args.years[1].split("-")[0])
|
| 299 |
+
years = [f"{y}-{y+1}" for y in range(start, end + 1)]
|
| 300 |
+
|
| 301 |
+
scraper = MNREGAScraper(delay=args.delay)
|
| 302 |
+
|
| 303 |
+
if args.state:
|
| 304 |
+
df = scraper.scrape_state(args.state, years)
|
| 305 |
+
elif args.all_india:
|
| 306 |
+
df = scraper.scrape_all_india(years, resume=args.resume)
|
| 307 |
+
else:
|
| 308 |
+
print("Usage:")
|
| 309 |
+
print(" python data/scraper/mnrega_scraper.py --state Maharashtra")
|
| 310 |
+
print(" python data/scraper/mnrega_scraper.py --all-india")
|
| 311 |
+
print(" python data/scraper/mnrega_scraper.py --all-india --resume")
|
| 312 |
+
exit(0)
|
fix_optimizer.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
fix_optimizer.py
|
| 3 |
+
----------------
|
| 4 |
+
Standalone script to re-run the two-stage proportional-LP optimizer.
|
| 5 |
+
|
| 6 |
+
Run this AFTER replacing src/optimize.py to regenerate
|
| 7 |
+
data/processed/optimized_budget_allocation.csv with realistic
|
| 8 |
+
continuous budget_change_pct values (instead of bang-bang -60%/+150%).
|
| 9 |
+
|
| 10 |
+
Usage:
|
| 11 |
+
cd SchemeImpactNet/
|
| 12 |
+
python fix_optimizer.py
|
| 13 |
+
|
| 14 |
+
Then reseed the database:
|
| 15 |
+
rm data/schemeimpactnet.db
|
| 16 |
+
./start.sh
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
import sys, os
|
| 20 |
+
sys.path.insert(0, os.path.dirname(__file__))
|
| 21 |
+
|
| 22 |
+
from src.optimize import run_optimizer
|
| 23 |
+
|
| 24 |
+
if __name__ == "__main__":
|
| 25 |
+
print("=" * 60)
|
| 26 |
+
print("SchemeImpactNet β Optimizer Fix (v2 Proportional-LP)")
|
| 27 |
+
print("=" * 60)
|
| 28 |
+
|
| 29 |
+
result = run_optimizer(
|
| 30 |
+
predictions_path="data/processed/mnrega_predictions.csv",
|
| 31 |
+
raw_path="data/raw/mnrega_real_data_final_clean.csv",
|
| 32 |
+
scope_state=None, # All-India
|
| 33 |
+
target_year=2024,
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
print(f"\nβ
Done. {len(result)} districts processed.")
|
| 37 |
+
print(f" budget_change_pct range: {result['budget_change_pct'].min():.1f}% to {result['budget_change_pct'].max():.1f}%")
|
| 38 |
+
print(f" Unique values: {result['budget_change_pct'].nunique()}")
|
| 39 |
+
print("\nNext steps:")
|
| 40 |
+
print(" rm data/schemeimpactnet.db")
|
| 41 |
+
print(" ./start.sh")
|
frontend/app.py
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# frontend/app.py β SchemeImpactNet entry point
|
| 2 |
+
# Run from project root: streamlit run frontend/app.py
|
| 3 |
+
|
| 4 |
+
import sys, os
|
| 5 |
+
sys.path.insert(0, os.path.dirname(__file__))
|
| 6 |
+
|
| 7 |
+
import streamlit as st
|
| 8 |
+
|
| 9 |
+
st.set_page_config(
|
| 10 |
+
page_title="SchemeImpactNet",
|
| 11 |
+
page_icon="ποΈ",
|
| 12 |
+
layout="wide",
|
| 13 |
+
initial_sidebar_state="expanded",
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
# ββ Inject CSS first β before anything else βββββββββββββββββββββββββββββββββββ
|
| 17 |
+
# Must happen before st.navigation() so sidebar styles are present when nav renders.
|
| 18 |
+
st.markdown("""
|
| 19 |
+
<style>
|
| 20 |
+
@import url('https://fonts.googleapis.com/css2?family=Fraunces:ital,opsz,wght@0,9..144,300;0,9..144,600;0,9..144,700;1,9..144,300&family=Source+Serif+4:ital,opsz,wght@0,8..60,300;0,8..60,400;0,8..60,600&family=DM+Mono:wght@400;500&display=swap');
|
| 21 |
+
|
| 22 |
+
/* ββ Global ββ */
|
| 23 |
+
html, body, [class*="css"] { font-family: 'Source Serif 4', Georgia, serif !important; }
|
| 24 |
+
.stApp { background-color: #FAF9F7 !important; }
|
| 25 |
+
#MainMenu, footer, header { visibility: hidden; }
|
| 26 |
+
.block-container { padding: 2rem 2.5rem 3rem !important; max-width: 1320px !important; }
|
| 27 |
+
|
| 28 |
+
/* ββ Sidebar shell ββ */
|
| 29 |
+
[data-testid="stSidebar"] {
|
| 30 |
+
background: #1C1917 !important;
|
| 31 |
+
border-right: none !important;
|
| 32 |
+
min-width: 220px !important;
|
| 33 |
+
}
|
| 34 |
+
[data-testid="stSidebarContent"] {
|
| 35 |
+
background: #1C1917 !important;
|
| 36 |
+
}
|
| 37 |
+
section[data-testid="stSidebar"] > div {
|
| 38 |
+
background: #1C1917 !important;
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
/* ββ Sidebar text ββ */
|
| 42 |
+
[data-testid="stSidebar"] p,
|
| 43 |
+
[data-testid="stSidebar"] span,
|
| 44 |
+
[data-testid="stSidebar"] label,
|
| 45 |
+
[data-testid="stSidebar"] div {
|
| 46 |
+
color: #A8A29E !important;
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
/* ββ Nav links from st.navigation() ββ */
|
| 50 |
+
[data-testid="stSidebarNavLink"] {
|
| 51 |
+
border-radius: 5px !important;
|
| 52 |
+
padding: 0.48rem 1rem !important;
|
| 53 |
+
font-family: 'DM Mono', monospace !important;
|
| 54 |
+
font-size: 0.7rem !important;
|
| 55 |
+
letter-spacing: 0.5px !important;
|
| 56 |
+
color: #A8A29E !important;
|
| 57 |
+
border-left: 2px solid transparent !important;
|
| 58 |
+
transition: all 0.15s ease !important;
|
| 59 |
+
}
|
| 60 |
+
[data-testid="stSidebarNavLink"]:hover {
|
| 61 |
+
background: rgba(251,146,60,0.1) !important;
|
| 62 |
+
color: #FB923C !important;
|
| 63 |
+
border-left-color: rgba(251,146,60,0.5) !important;
|
| 64 |
+
}
|
| 65 |
+
[data-testid="stSidebarNavLink"][aria-current="page"] {
|
| 66 |
+
background: rgba(251,146,60,0.15) !important;
|
| 67 |
+
color: #FB923C !important;
|
| 68 |
+
border-left-color: #FB923C !important;
|
| 69 |
+
}
|
| 70 |
+
[data-testid="stSidebarNavLink"] svg { display: none !important; }
|
| 71 |
+
|
| 72 |
+
/* ββ Sidebar nav section label ββ */
|
| 73 |
+
[data-testid="stSidebarNavSeparator"] {
|
| 74 |
+
border-color: rgba(255,255,255,0.07) !important;
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
/* ββ Collapse button ββ */
|
| 78 |
+
[data-testid="collapsedControl"] {
|
| 79 |
+
background: #1C1917 !important;
|
| 80 |
+
color: #A8A29E !important;
|
| 81 |
+
border-right: 1px solid #292524 !important;
|
| 82 |
+
}
|
| 83 |
+
button[kind="header"] { background: transparent !important; }
|
| 84 |
+
|
| 85 |
+
/* ββ Main area typography ββ */
|
| 86 |
+
h1, h2, h3 { font-family: 'Fraunces', serif !important; color: #1C1917 !important; }
|
| 87 |
+
h1 { font-size: 2.2rem !important; font-weight: 600 !important; line-height: 1.15 !important; }
|
| 88 |
+
h2 { font-size: 1.5rem !important; font-weight: 600 !important; }
|
| 89 |
+
h3 { font-size: 1.1rem !important; font-weight: 600 !important; }
|
| 90 |
+
p { font-family: 'Source Serif 4', serif !important; color: #292524 !important; }
|
| 91 |
+
|
| 92 |
+
/* ββ Metric cards ββ */
|
| 93 |
+
[data-testid="stMetric"] {
|
| 94 |
+
background: #FFFFFF !important; border: 1px solid #E7E5E4 !important;
|
| 95 |
+
border-radius: 8px !important; padding: 1rem 1.2rem !important;
|
| 96 |
+
}
|
| 97 |
+
[data-testid="stMetricLabel"] p {
|
| 98 |
+
font-family: 'DM Mono', monospace !important; font-size: 0.62rem !important;
|
| 99 |
+
letter-spacing: 2px !important; text-transform: uppercase !important; color: #78716C !important;
|
| 100 |
+
}
|
| 101 |
+
[data-testid="stMetricValue"] {
|
| 102 |
+
font-family: 'Fraunces', serif !important; font-size: 1.85rem !important;
|
| 103 |
+
font-weight: 600 !important; color: #1C1917 !important; line-height: 1.2 !important;
|
| 104 |
+
}
|
| 105 |
+
[data-testid="stMetricDelta"] { font-family: 'DM Mono', monospace !important; font-size: 0.7rem !important; }
|
| 106 |
+
|
| 107 |
+
/* ββ Inputs ββ */
|
| 108 |
+
[data-testid="stSelectbox"] label p,
|
| 109 |
+
[data-testid="stSlider"] label p,
|
| 110 |
+
[data-testid="stTextInput"] label p {
|
| 111 |
+
font-family: 'DM Mono', monospace !important; font-size: 0.65rem !important;
|
| 112 |
+
letter-spacing: 1.5px !important; text-transform: uppercase !important; color: #78716C !important;
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
/* ββ Buttons ββ */
|
| 116 |
+
.stButton > button {
|
| 117 |
+
font-family: 'DM Mono', monospace !important; font-size: 0.7rem !important;
|
| 118 |
+
letter-spacing: 1px !important; text-transform: uppercase !important;
|
| 119 |
+
background: #1C1917 !important; color: #FAF9F7 !important;
|
| 120 |
+
border: none !important; border-radius: 6px !important; padding: 0.5rem 1.2rem !important;
|
| 121 |
+
}
|
| 122 |
+
.stButton > button:hover { background: #FB923C !important; }
|
| 123 |
+
|
| 124 |
+
/* ββ Dataframes ββ */
|
| 125 |
+
[data-testid="stDataFrame"] {
|
| 126 |
+
border: 1px solid #E7E5E4 !important; border-radius: 8px !important; overflow: hidden !important;
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
/* ββ Expander ββ */
|
| 130 |
+
[data-testid="stExpander"] {
|
| 131 |
+
border: 1px solid #E7E5E4 !important; border-radius: 8px !important; background: #FFFFFF !important;
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
/* ββ Caption ββ */
|
| 135 |
+
[data-testid="stCaptionContainer"] p {
|
| 136 |
+
font-family: 'DM Mono', monospace !important; font-size: 0.63rem !important;
|
| 137 |
+
color: #A8A29E !important; letter-spacing: 0.3px !important;
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
/* ββ Divider ββ */
|
| 141 |
+
hr { border: none !important; border-top: 1px solid #E7E5E4 !important; margin: 1.5rem 0 !important; }
|
| 142 |
+
|
| 143 |
+
/* ββ Tabs ββ */
|
| 144 |
+
[data-testid="stTabs"] [role="tab"] {
|
| 145 |
+
font-family: 'DM Mono', monospace !important; font-size: 0.68rem !important;
|
| 146 |
+
letter-spacing: 1px !important; text-transform: uppercase !important;
|
| 147 |
+
}
|
| 148 |
+
</style>
|
| 149 |
+
""", unsafe_allow_html=True)
|
| 150 |
+
|
| 151 |
+
# ββ Sidebar brand β inject before navigation ββββββββββββββββββββββββββββββββββ
|
| 152 |
+
with st.sidebar:
|
| 153 |
+
st.markdown("""
|
| 154 |
+
<div style="padding:1.4rem 0.75rem 1.2rem 0.75rem;
|
| 155 |
+
border-bottom:1px solid rgba(255,255,255,0.07);
|
| 156 |
+
margin-bottom:0.75rem;">
|
| 157 |
+
<p style="font-family:'DM Mono',monospace; font-size:0.52rem; letter-spacing:4px;
|
| 158 |
+
text-transform:uppercase; color:#FB923C; margin:0 0 8px 0; line-height:1;">
|
| 159 |
+
Policy Analytics
|
| 160 |
+
</p>
|
| 161 |
+
<p style="font-family:'Fraunces',serif; font-size:1.35rem; font-weight:600;
|
| 162 |
+
color:#FAF9F7; line-height:1.1; margin:0;">
|
| 163 |
+
Scheme<br>Impact<em style="color:#FB923C;">Net</em>
|
| 164 |
+
</p>
|
| 165 |
+
<p style="font-family:'DM Mono',monospace; font-size:0.55rem; color:#57534E;
|
| 166 |
+
margin:10px 0 0 0; letter-spacing:0.4px; line-height:1.65;">
|
| 167 |
+
MNREGA Β· XGBoost Β· SciPy LP<br>
|
| 168 |
+
7,758 district-years Β· 2014β2024
|
| 169 |
+
</p>
|
| 170 |
+
</div>
|
| 171 |
+
""", unsafe_allow_html=True)
|
| 172 |
+
|
| 173 |
+
# ββ Page registry βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 174 |
+
pages = [
|
| 175 |
+
st.Page("pages/home.py", title="Home", icon="ποΈ", default=True),
|
| 176 |
+
st.Page("pages/overview.py", title="Overview", icon="π"),
|
| 177 |
+
st.Page("pages/districts.py", title="District Explorer", icon="π"),
|
| 178 |
+
st.Page("pages/predictions.py", title="Predictions", icon="π€"),
|
| 179 |
+
st.Page("pages/optimizer.py", title="Budget Optimizer", icon="βοΈ"),
|
| 180 |
+
st.Page("pages/spatial.py", title="Spatial Map", icon="πΊοΈ"),
|
| 181 |
+
st.Page("pages/insights.py", title="Strategic Insights", icon="π§ "),
|
| 182 |
+
]
|
| 183 |
+
|
| 184 |
+
pg = st.navigation(pages, position="sidebar")
|
| 185 |
+
pg.run()
|
frontend/pages/districts.py
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# pages/districts.py β District deep-dive explorer.
|
| 2 |
+
|
| 3 |
+
import sys, os
|
| 4 |
+
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
|
| 5 |
+
|
| 6 |
+
import streamlit as st
|
| 7 |
+
import plotly.graph_objects as go
|
| 8 |
+
|
| 9 |
+
from theme import inject_theme, page_header, section_label, PLOTLY_LAYOUT, SAFFRON, GREEN, RED
|
| 10 |
+
from utils.api_client import fetch_states, fetch_districts, fetch_district_history
|
| 11 |
+
|
| 12 |
+
inject_theme()
|
| 13 |
+
page_header(
|
| 14 |
+
"β Module 02",
|
| 15 |
+
"District Explorer",
|
| 16 |
+
"Full historical MNREGA performance deep-dive for any district",
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
# ββ Selectors βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 20 |
+
states = fetch_states()
|
| 21 |
+
if not states:
|
| 22 |
+
st.error("β οΈ API offline β run `uvicorn backend.main:app --port 8000`")
|
| 23 |
+
st.stop()
|
| 24 |
+
|
| 25 |
+
col1, col2 = st.columns(2)
|
| 26 |
+
with col1:
|
| 27 |
+
state = st.selectbox("State", states)
|
| 28 |
+
with col2:
|
| 29 |
+
districts = fetch_districts(state)
|
| 30 |
+
if not districts:
|
| 31 |
+
st.warning("No districts found for this state.")
|
| 32 |
+
st.stop()
|
| 33 |
+
district = st.selectbox("District", districts)
|
| 34 |
+
|
| 35 |
+
# ββ Fetch district history ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 36 |
+
df = fetch_district_history(state, district)
|
| 37 |
+
|
| 38 |
+
if df.empty:
|
| 39 |
+
st.warning("No historical data for this district.")
|
| 40 |
+
st.stop()
|
| 41 |
+
|
| 42 |
+
df = df.sort_values("financial_year").reset_index(drop=True)
|
| 43 |
+
|
| 44 |
+
# ββ District headline βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 45 |
+
latest = df.iloc[-1]
|
| 46 |
+
prev = df.iloc[-2] if len(df) > 1 else latest
|
| 47 |
+
|
| 48 |
+
st.markdown(f"""
|
| 49 |
+
<div style="margin:0.5rem 0 1.5rem;">
|
| 50 |
+
<p style="font-family:'Fraunces',serif; font-size:1.65rem; font-weight:600;
|
| 51 |
+
color:#1C1917; margin:0;">
|
| 52 |
+
{district}
|
| 53 |
+
<span style="font-size:1rem; font-weight:300; color:#78716C;">Β· {state}</span>
|
| 54 |
+
</p>
|
| 55 |
+
</div>
|
| 56 |
+
""", unsafe_allow_html=True)
|
| 57 |
+
|
| 58 |
+
pd_delta = latest['person_days_lakhs'] - prev['person_days_lakhs']
|
| 59 |
+
wage_delta = latest['avg_wage_rate'] - prev['avg_wage_rate']
|
| 60 |
+
|
| 61 |
+
c1, c2, c3 = st.columns(3)
|
| 62 |
+
c1.metric(
|
| 63 |
+
"Person-Days (latest yr)",
|
| 64 |
+
f"{latest['person_days_lakhs']:.2f}L",
|
| 65 |
+
delta=f"{pd_delta:+.2f}L",
|
| 66 |
+
)
|
| 67 |
+
c2.metric(
|
| 68 |
+
"Avg Wage Rate",
|
| 69 |
+
f"βΉ{latest['avg_wage_rate']:.0f}/day",
|
| 70 |
+
delta=f"βΉ{wage_delta:+.0f}",
|
| 71 |
+
)
|
| 72 |
+
c3.metric(
|
| 73 |
+
"Years on Record",
|
| 74 |
+
f"{len(df)}",
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
st.markdown("---")
|
| 78 |
+
|
| 79 |
+
# ββ Person-Days Trend βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 80 |
+
section_label("Person-Days Trend")
|
| 81 |
+
fig1 = go.Figure()
|
| 82 |
+
fig1.add_scatter(
|
| 83 |
+
x=df["financial_year"], y=df["person_days_lakhs"],
|
| 84 |
+
mode="lines+markers",
|
| 85 |
+
fill="tozeroy",
|
| 86 |
+
fillcolor="rgba(251,146,60,0.07)",
|
| 87 |
+
line=dict(color=SAFFRON, width=2.5),
|
| 88 |
+
marker=dict(size=6, color=SAFFRON, line=dict(width=1.5, color="#FFFFFF")),
|
| 89 |
+
name="Person-Days",
|
| 90 |
+
hovertemplate="FY%{x}<br>PD: <b>%{y:.2f}L</b><extra></extra>",
|
| 91 |
+
)
|
| 92 |
+
if 2020 in df["financial_year"].values:
|
| 93 |
+
fig1.add_vline(
|
| 94 |
+
x=2020, line_dash="dot", line_color=RED, line_width=1.5,
|
| 95 |
+
annotation_text="COVID",
|
| 96 |
+
annotation_font=dict(color=RED, size=9, family="DM Mono, monospace"),
|
| 97 |
+
)
|
| 98 |
+
if 2022 in df["financial_year"].values:
|
| 99 |
+
fig1.add_vline(
|
| 100 |
+
x=2022, line_dash="dot", line_color="#A8A29E", line_width=1,
|
| 101 |
+
annotation_text="2022 anomaly",
|
| 102 |
+
annotation_font=dict(color="#A8A29E", size=9, family="DM Mono, monospace"),
|
| 103 |
+
)
|
| 104 |
+
l1 = {**PLOTLY_LAYOUT}
|
| 105 |
+
l1.update(dict(
|
| 106 |
+
height=320,
|
| 107 |
+
yaxis=dict(**PLOTLY_LAYOUT["yaxis"], title="Lakh Person-Days"),
|
| 108 |
+
xaxis=dict(**PLOTLY_LAYOUT["xaxis"], title="Financial Year", dtick=1),
|
| 109 |
+
))
|
| 110 |
+
fig1.update_layout(**l1)
|
| 111 |
+
st.plotly_chart(fig1, use_container_width=True, config={"displayModeBar": False})
|
| 112 |
+
|
| 113 |
+
# ββ YoY Change ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 114 |
+
section_label("Year-on-Year Change")
|
| 115 |
+
df["yoy"] = df["person_days_lakhs"].pct_change() * 100
|
| 116 |
+
|
| 117 |
+
fig2 = go.Figure()
|
| 118 |
+
fig2.add_bar(
|
| 119 |
+
x=df["financial_year"],
|
| 120 |
+
y=df["yoy"],
|
| 121 |
+
marker=dict(
|
| 122 |
+
color=[GREEN if v >= 0 else RED for v in df["yoy"].fillna(0)],
|
| 123 |
+
opacity=0.8,
|
| 124 |
+
),
|
| 125 |
+
hovertemplate="FY%{x}<br>YoY: <b>%{y:+.1f}%</b><extra></extra>",
|
| 126 |
+
)
|
| 127 |
+
fig2.add_hline(y=0, line_dash="solid", line_color="#1C1917", line_width=1)
|
| 128 |
+
l2 = {**PLOTLY_LAYOUT}
|
| 129 |
+
l2.update(dict(
|
| 130 |
+
height=220,
|
| 131 |
+
bargap=0.3,
|
| 132 |
+
yaxis=dict(**PLOTLY_LAYOUT["yaxis"], title="% Change"),
|
| 133 |
+
xaxis=dict(**PLOTLY_LAYOUT["xaxis"], title="Financial Year", dtick=1),
|
| 134 |
+
))
|
| 135 |
+
fig2.update_layout(**l2)
|
| 136 |
+
st.plotly_chart(fig2, use_container_width=True, config={"displayModeBar": False})
|
| 137 |
+
|
| 138 |
+
# ββ Wage Rate Trend βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 139 |
+
section_label("Wage Rate History")
|
| 140 |
+
fig3 = go.Figure()
|
| 141 |
+
fig3.add_scatter(
|
| 142 |
+
x=df["financial_year"], y=df["avg_wage_rate"],
|
| 143 |
+
mode="lines+markers",
|
| 144 |
+
fill="tozeroy",
|
| 145 |
+
fillcolor="rgba(22,163,74,0.06)",
|
| 146 |
+
line=dict(color=GREEN, width=2),
|
| 147 |
+
marker=dict(size=6, color=GREEN),
|
| 148 |
+
hovertemplate="FY%{x}<br>βΉ%{y:.0f}/day<extra></extra>",
|
| 149 |
+
)
|
| 150 |
+
l3 = {**PLOTLY_LAYOUT}
|
| 151 |
+
l3.update(dict(
|
| 152 |
+
height=220,
|
| 153 |
+
yaxis=dict(**PLOTLY_LAYOUT["yaxis"], title="βΉ/day"),
|
| 154 |
+
xaxis=dict(**PLOTLY_LAYOUT["xaxis"], title="Financial Year", dtick=1),
|
| 155 |
+
))
|
| 156 |
+
fig3.update_layout(**l3)
|
| 157 |
+
st.plotly_chart(fig3, use_container_width=True, config={"displayModeBar": False})
|
| 158 |
+
|
| 159 |
+
# ββ Raw data ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 160 |
+
with st.expander("π Raw Data Table"):
|
| 161 |
+
display_cols = [c for c in [
|
| 162 |
+
"financial_year", "person_days_lakhs", "avg_wage_rate",
|
| 163 |
+
] if c in df.columns]
|
| 164 |
+
st.dataframe(df[display_cols].round(3), use_container_width=True, hide_index=True)
|
frontend/pages/home.py
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# pages/home.py β Landing dashboard.
|
| 2 |
+
|
| 3 |
+
import sys, os
|
| 4 |
+
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
|
| 5 |
+
|
| 6 |
+
import streamlit as st
|
| 7 |
+
import numpy as np
|
| 8 |
+
import plotly.graph_objects as go
|
| 9 |
+
|
| 10 |
+
from theme import (
|
| 11 |
+
inject_theme, page_header, section_label, kpi_html,
|
| 12 |
+
signal_card_html, PLOTLY_LAYOUT, SAFFRON, SAFFRON_SCALE, GREEN, RED, AMBER,
|
| 13 |
+
)
|
| 14 |
+
from utils.api_client import (
|
| 15 |
+
is_online, fetch_stats, fetch_predictions, fetch_optimizer_results,
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
inject_theme()
|
| 19 |
+
|
| 20 |
+
# ββ Status pill βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 21 |
+
online = is_online()
|
| 22 |
+
pill_color = "#16A34A" if online else "#DC2626"
|
| 23 |
+
pill_text = "API LIVE" if online else "API OFFLINE β run `uvicorn backend.main:app --port 8000`"
|
| 24 |
+
st.markdown(
|
| 25 |
+
f'<div style="display:flex;align-items:center;gap:8px;margin-bottom:1.4rem;">'
|
| 26 |
+
f'<span style="width:7px;height:7px;border-radius:50%;background:{pill_color};display:inline-block;"></span>'
|
| 27 |
+
f'<span style="font-family:DM Mono,monospace;font-size:0.62rem;letter-spacing:2px;'
|
| 28 |
+
f'text-transform:uppercase;color:{pill_color};">{pill_text}</span></div>',
|
| 29 |
+
unsafe_allow_html=True,
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
page_header(
|
| 33 |
+
"β MNREGA Β· India Β· 2014β2024",
|
| 34 |
+
"SchemeImpactNet",
|
| 35 |
+
"Predictive impact analysis and budget optimisation for India's rural employment scheme",
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
# ββ Data fetch ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 39 |
+
stats = fetch_stats()
|
| 40 |
+
pred_df = fetch_predictions()
|
| 41 |
+
opt_df = fetch_optimizer_results()
|
| 42 |
+
|
| 43 |
+
# Derived KPIs
|
| 44 |
+
n_dist = stats.get("total_districts", "β")
|
| 45 |
+
n_states = stats.get("total_states", "β")
|
| 46 |
+
yr_range = stats.get("year_range", "β")
|
| 47 |
+
total_pd = stats.get("total_persondays_lakhs", 0)
|
| 48 |
+
covid_pct = stats.get("covid_spike_pct", 0)
|
| 49 |
+
|
| 50 |
+
nat_gain = gain_pct = 0.0
|
| 51 |
+
if not opt_df.empty and "persondays_gain" in opt_df.columns:
|
| 52 |
+
nat_gain = opt_df["persondays_gain"].sum()
|
| 53 |
+
sq_sum = opt_df["sq_persondays"].sum() if "sq_persondays" in opt_df.columns else 1
|
| 54 |
+
gain_pct = nat_gain / sq_sum * 100 if sq_sum else 0
|
| 55 |
+
|
| 56 |
+
# ββ KPI strip βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 57 |
+
c1, c2, c3, c4, c5 = st.columns(5, gap="small")
|
| 58 |
+
cards = [
|
| 59 |
+
(str(n_dist), "Districts", SAFFRON, ""),
|
| 60 |
+
(str(n_states), "States / UTs", "#1C1917", ""),
|
| 61 |
+
(f"{total_pd:,.0f}L", "Person-Days", "#1C1917", "historical total"),
|
| 62 |
+
(f"{covid_pct:+.1f}%", "COVID-20 Spike", RED, "2020 peak"),
|
| 63 |
+
(f"{gain_pct:+.2f}%", "LP Opt. Gain", GREEN, "budget-neutral"),
|
| 64 |
+
]
|
| 65 |
+
for col, (val, label, color, note) in zip([c1, c2, c3, c4, c5], cards):
|
| 66 |
+
with col:
|
| 67 |
+
st.markdown(kpi_html(val, label, color, note), unsafe_allow_html=True)
|
| 68 |
+
|
| 69 |
+
st.markdown("<div style='margin-top:2rem'></div>", unsafe_allow_html=True)
|
| 70 |
+
|
| 71 |
+
# ββ Two-column layout βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 72 |
+
left, right = st.columns([3, 2], gap="large")
|
| 73 |
+
|
| 74 |
+
# ββ LEFT: state bubble map ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 75 |
+
STATE_COORDS = {
|
| 76 |
+
"Andhra Pradesh": (15.9, 79.7), "Arunachal Pradesh": (28.2, 94.7),
|
| 77 |
+
"Assam": (26.2, 92.9), "Bihar": (25.1, 85.3),
|
| 78 |
+
"Chhattisgarh": (21.3, 81.7), "Goa": (15.3, 74.0),
|
| 79 |
+
"Gujarat": (22.3, 71.2), "Haryana": (29.1, 76.1),
|
| 80 |
+
"Himachal Pradesh": (31.1, 77.2), "Jharkhand": (23.6, 85.3),
|
| 81 |
+
"Karnataka": (15.3, 75.7), "Kerala": (10.9, 76.3),
|
| 82 |
+
"Madhya Pradesh": (22.9, 78.7), "Maharashtra": (19.7, 75.7),
|
| 83 |
+
"Manipur": (24.7, 93.9), "Meghalaya": (25.5, 91.4),
|
| 84 |
+
"Mizoram": (23.2, 92.7), "Nagaland": (26.2, 94.6),
|
| 85 |
+
"Odisha": (20.9, 85.1), "Punjab": (31.1, 75.3),
|
| 86 |
+
"Rajasthan": (27.0, 74.2), "Sikkim": (27.5, 88.5),
|
| 87 |
+
"Tamil Nadu": (11.1, 78.7), "Telangana": (17.4, 79.1),
|
| 88 |
+
"Tripura": (23.9, 91.5), "Uttar Pradesh": (26.8, 80.9),
|
| 89 |
+
"Uttarakhand": (30.1, 79.3), "West Bengal": (22.9, 87.9),
|
| 90 |
+
"Jammu and Kashmir": (33.7, 76.9), "Ladakh": (34.2, 77.6),
|
| 91 |
+
"Delhi": (28.7, 77.1), "Puducherry": (11.9, 79.8),
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
with left:
|
| 95 |
+
section_label("State-Level Employment Β· Latest Year")
|
| 96 |
+
|
| 97 |
+
if not pred_df.empty and "financial_year" in pred_df.columns:
|
| 98 |
+
ly = pred_df["financial_year"].max()
|
| 99 |
+
agg = (
|
| 100 |
+
pred_df[pred_df["financial_year"] == ly]
|
| 101 |
+
.groupby("state", as_index=False)
|
| 102 |
+
.agg(
|
| 103 |
+
pd_sum =("person_days_lakhs", "sum"),
|
| 104 |
+
pred_sum =("predicted_persondays", "sum"),
|
| 105 |
+
n_dist =("district", "count"),
|
| 106 |
+
avg_err =("prediction_error", "mean"),
|
| 107 |
+
)
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
rng = np.random.default_rng(42)
|
| 111 |
+
lats, lons, szs = [], [], []
|
| 112 |
+
for _, r in agg.iterrows():
|
| 113 |
+
lat, lon = STATE_COORDS.get(r["state"], (22.0, 78.0))
|
| 114 |
+
lats.append(lat + rng.uniform(-0.12, 0.12))
|
| 115 |
+
lons.append(lon + rng.uniform(-0.12, 0.12))
|
| 116 |
+
szs.append(float(r["pd_sum"]))
|
| 117 |
+
|
| 118 |
+
mn, mx = min(szs), max(szs)
|
| 119 |
+
bsz = [float(np.clip((v - mn) / (mx - mn + 1e-9) * 14 + 5, 5, 19)) for v in szs]
|
| 120 |
+
|
| 121 |
+
fig = go.Figure()
|
| 122 |
+
fig.add_scattergeo(
|
| 123 |
+
lat=lats, lon=lons, mode="markers",
|
| 124 |
+
marker=dict(
|
| 125 |
+
size=bsz, color=szs,
|
| 126 |
+
colorscale=SAFFRON_SCALE,
|
| 127 |
+
colorbar=dict(
|
| 128 |
+
title=dict(text="Lakh PD", font=dict(color="#78716C", size=9)),
|
| 129 |
+
tickfont=dict(color="#78716C", size=8),
|
| 130 |
+
thickness=8, len=0.45,
|
| 131 |
+
bgcolor="rgba(255,255,255,0.85)",
|
| 132 |
+
),
|
| 133 |
+
opacity=0.88,
|
| 134 |
+
line=dict(width=1, color="#FFFFFF"),
|
| 135 |
+
),
|
| 136 |
+
text=agg["state"],
|
| 137 |
+
customdata=list(zip(
|
| 138 |
+
agg["pd_sum"].round(1),
|
| 139 |
+
agg["pred_sum"].round(1),
|
| 140 |
+
agg["n_dist"],
|
| 141 |
+
agg["avg_err"].round(2),
|
| 142 |
+
)),
|
| 143 |
+
hovertemplate=(
|
| 144 |
+
"<b>%{text}</b><br>"
|
| 145 |
+
"Actual PD: <b>%{customdata[0]}L</b><br>"
|
| 146 |
+
"Predicted: <b>%{customdata[1]}L</b><br>"
|
| 147 |
+
"Districts: %{customdata[2]}<br>"
|
| 148 |
+
"Avg Model Error: %{customdata[3]}L"
|
| 149 |
+
"<extra></extra>"
|
| 150 |
+
),
|
| 151 |
+
)
|
| 152 |
+
fig.update_geos(
|
| 153 |
+
scope="asia", showland=True, landcolor="#F5F5F4",
|
| 154 |
+
showocean=True, oceancolor="#EFF6FF",
|
| 155 |
+
showcountries=True, countrycolor="#D6D3D1",
|
| 156 |
+
showsubunits=True, subunitcolor="#E7E5E4",
|
| 157 |
+
center=dict(lat=22, lon=80), projection_scale=5.2,
|
| 158 |
+
bgcolor="rgba(0,0,0,0)",
|
| 159 |
+
)
|
| 160 |
+
fig.update_layout(
|
| 161 |
+
height=420, paper_bgcolor="rgba(0,0,0,0)",
|
| 162 |
+
margin=dict(l=0, r=0, t=0, b=0),
|
| 163 |
+
font=dict(family="DM Mono, monospace", color="#1C1917"),
|
| 164 |
+
showlegend=False,
|
| 165 |
+
)
|
| 166 |
+
st.plotly_chart(fig, use_container_width=True, config={"displayModeBar": False})
|
| 167 |
+
st.caption(f"FY {ly} Β· bubble size β employment volume Β· hover for model predictions")
|
| 168 |
+
else:
|
| 169 |
+
st.info("Start the backend to load state-level data.")
|
| 170 |
+
|
| 171 |
+
# ββ RIGHT: brief + signals ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 172 |
+
with right:
|
| 173 |
+
section_label("Intelligence Brief")
|
| 174 |
+
|
| 175 |
+
n_declining = n_underfunded = 0
|
| 176 |
+
top_state = "β"
|
| 177 |
+
if not pred_df.empty:
|
| 178 |
+
ly = pred_df["financial_year"].max()
|
| 179 |
+
lat = pred_df[pred_df["financial_year"] == ly]
|
| 180 |
+
prv = pred_df[pred_df["financial_year"] == ly - 1]
|
| 181 |
+
if not prv.empty:
|
| 182 |
+
mg = lat.merge(
|
| 183 |
+
prv[["state", "district", "person_days_lakhs"]].rename(
|
| 184 |
+
columns={"person_days_lakhs": "prev"}
|
| 185 |
+
),
|
| 186 |
+
on=["state", "district"], how="left",
|
| 187 |
+
)
|
| 188 |
+
n_declining = int((mg["predicted_persondays"] < mg["prev"]).sum())
|
| 189 |
+
|
| 190 |
+
if not opt_df.empty and "budget_allocated_lakhs" in opt_df.columns:
|
| 191 |
+
th = opt_df["budget_allocated_lakhs"].quantile(0.33)
|
| 192 |
+
n_underfunded = int((opt_df["budget_allocated_lakhs"] < th).sum())
|
| 193 |
+
if not opt_df.empty and "persondays_gain" in opt_df.columns:
|
| 194 |
+
top_state = opt_df.groupby("state")["persondays_gain"].sum().idxmax()
|
| 195 |
+
|
| 196 |
+
gain_str = f"{nat_gain:+,.1f}L" if nat_gain else "β"
|
| 197 |
+
ly_label = pred_df["financial_year"].max() if not pred_df.empty else "β"
|
| 198 |
+
|
| 199 |
+
st.markdown(f"""
|
| 200 |
+
<div style="background:#FFF7ED; border:1px solid #FED7AA; border-left:3px solid #FB923C;
|
| 201 |
+
border-radius:8px; padding:1.2rem 1.4rem; margin-bottom:1rem;">
|
| 202 |
+
<p style="font-family:'DM Mono',monospace; font-size:0.56rem; letter-spacing:2.5px;
|
| 203 |
+
text-transform:uppercase; color:#FB923C; margin:0 0 9px 0;">
|
| 204 |
+
β Auto-generated Β· Pipeline FY {ly_label}</p>
|
| 205 |
+
<p style="font-family:'Source Serif 4',serif; font-size:0.88rem; color:#431407;
|
| 206 |
+
line-height:1.75; margin:0;">
|
| 207 |
+
Budget-neutral LP reallocation yields a projected
|
| 208 |
+
<strong>{gain_str}</strong> of additional employment β
|
| 209 |
+
a <strong>{gain_pct:+.2f}%</strong> uplift at zero additional outlay.
|
| 210 |
+
<strong>{n_declining} districts</strong> face declining employment trajectories.
|
| 211 |
+
Highest reallocation opportunity: <strong>{top_state}</strong>.
|
| 212 |
+
<strong>{n_underfunded} districts</strong> in the bottom budget tercile show
|
| 213 |
+
above-average delivery efficiency.
|
| 214 |
+
</p>
|
| 215 |
+
</div>
|
| 216 |
+
""", unsafe_allow_html=True)
|
| 217 |
+
|
| 218 |
+
section_label("Live Signals")
|
| 219 |
+
signals = [
|
| 220 |
+
(str(n_declining), "High-Risk Districts", "Predicted employment decline", RED),
|
| 221 |
+
(str(n_underfunded), "Underfunded Β· High Eff.", "Bottom-tercile budget", AMBER),
|
| 222 |
+
(gain_str, "LP Reallocation Gain", f"Budget-neutral Β· {gain_pct:+.2f}%", GREEN),
|
| 223 |
+
(str(n_dist), "Districts in Model", "GBR Β· Walk-fwd CV RΒ²β0.91", SAFFRON),
|
| 224 |
+
]
|
| 225 |
+
for val, title, body, accent in signals:
|
| 226 |
+
st.markdown(signal_card_html(val, title, body, accent), unsafe_allow_html=True)
|
frontend/pages/insights.py
ADDED
|
@@ -0,0 +1,278 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# pages/insights.py β Strategic Insights & Policy Brief.
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
import sys, os
|
| 5 |
+
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
|
| 6 |
+
|
| 7 |
+
import streamlit as st
|
| 8 |
+
import plotly.graph_objects as go
|
| 9 |
+
import plotly.express as px
|
| 10 |
+
import pandas as pd
|
| 11 |
+
import numpy as np
|
| 12 |
+
|
| 13 |
+
from theme import inject_theme, page_header, section_label, kpi_html, signal_card_html, PLOTLY_LAYOUT, SAFFRON, GREEN, RED, AMBER, BLUE
|
| 14 |
+
from utils.api_client import fetch_states, fetch_predictions, fetch_optimizer_results, fetch_yearly_trend
|
| 15 |
+
|
| 16 |
+
inject_theme()
|
| 17 |
+
page_header(
|
| 18 |
+
"β Module 06",
|
| 19 |
+
"Strategic Insights",
|
| 20 |
+
"Auto-generated policy intelligence β high-risk districts, efficiency leaders, and reallocation priorities",
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
states = fetch_states()
|
| 24 |
+
if not states:
|
| 25 |
+
st.error("β οΈ API offline β run `uvicorn backend.main:app --port 8000`")
|
| 26 |
+
st.stop()
|
| 27 |
+
|
| 28 |
+
cs, _ = st.columns([1, 2])
|
| 29 |
+
with cs:
|
| 30 |
+
scope = st.selectbox("State Scope", ["All India"] + states)
|
| 31 |
+
state_param = None if scope == "All India" else scope
|
| 32 |
+
|
| 33 |
+
pred_df = fetch_predictions(state=state_param)
|
| 34 |
+
opt_df = fetch_optimizer_results(state=state_param)
|
| 35 |
+
trend = fetch_yearly_trend(state_param)
|
| 36 |
+
|
| 37 |
+
if pred_df.empty:
|
| 38 |
+
st.info("No data β run the pipeline first.")
|
| 39 |
+
st.stop()
|
| 40 |
+
|
| 41 |
+
st.markdown("---")
|
| 42 |
+
|
| 43 |
+
# ββ Section A: Declining districts βββββββββββββββββββββββββββββββββββββββββββ
|
| 44 |
+
section_label("A. High-Risk Districts β Declining Employment Trajectory")
|
| 45 |
+
|
| 46 |
+
ly = pred_df["financial_year"].max()
|
| 47 |
+
prv = ly - 1
|
| 48 |
+
|
| 49 |
+
lat = pred_df[pred_df["financial_year"] == ly].copy()
|
| 50 |
+
prv_df = pred_df[pred_df["financial_year"] == prv].copy()
|
| 51 |
+
|
| 52 |
+
if not prv_df.empty:
|
| 53 |
+
mg = lat.merge(
|
| 54 |
+
prv_df[["state", "district", "person_days_lakhs"]].rename(
|
| 55 |
+
columns={"person_days_lakhs": "prev_actual"}
|
| 56 |
+
),
|
| 57 |
+
on=["state", "district"], how="inner",
|
| 58 |
+
)
|
| 59 |
+
mg["predicted_chg"] = mg["predicted_persondays"] - mg["prev_actual"]
|
| 60 |
+
mg["predicted_chg_pct"]= (mg["predicted_chg"] / mg["prev_actual"] * 100).round(2)
|
| 61 |
+
|
| 62 |
+
declining = mg[mg["predicted_chg"] < 0].copy().nsmallest(20, "predicted_chg")
|
| 63 |
+
declining["label"] = declining["district"] + " Β· " + declining["state"]
|
| 64 |
+
|
| 65 |
+
if not declining.empty:
|
| 66 |
+
col_risk, col_info = st.columns([2, 1])
|
| 67 |
+
with col_risk:
|
| 68 |
+
fig1 = go.Figure()
|
| 69 |
+
fig1.add_bar(
|
| 70 |
+
x=declining["predicted_chg"],
|
| 71 |
+
y=declining["label"],
|
| 72 |
+
orientation="h",
|
| 73 |
+
marker=dict(
|
| 74 |
+
color=declining["predicted_chg_pct"],
|
| 75 |
+
colorscale=[[0, "#7F1D1D"], [1, "#FCA5A5"]],
|
| 76 |
+
showscale=False,
|
| 77 |
+
opacity=0.85,
|
| 78 |
+
),
|
| 79 |
+
customdata=list(zip(
|
| 80 |
+
declining["state"], declining["district"],
|
| 81 |
+
declining["prev_actual"].round(2),
|
| 82 |
+
declining["predicted_persondays"].round(2),
|
| 83 |
+
declining["predicted_chg"].round(2),
|
| 84 |
+
declining["predicted_chg_pct"],
|
| 85 |
+
)),
|
| 86 |
+
hovertemplate=(
|
| 87 |
+
"<b>%{customdata[1]}</b> Β· %{customdata[0]}<br>"
|
| 88 |
+
"Actual: %{customdata[2]}L<br>"
|
| 89 |
+
"Predicted: %{customdata[3]}L<br>"
|
| 90 |
+
"Change: <b>%{customdata[4]:+.2f}L</b> (%{customdata[5]:+.1f}%)"
|
| 91 |
+
"<extra></extra>"
|
| 92 |
+
),
|
| 93 |
+
)
|
| 94 |
+
l1 = {**PLOTLY_LAYOUT}
|
| 95 |
+
l1.update(dict(
|
| 96 |
+
height=max(380, len(declining) * 26),
|
| 97 |
+
title=dict(text=f"Districts with Declining Predicted Employment Β· FY{prv}β{ly}",
|
| 98 |
+
font=dict(family="Fraunces, serif", size=13, color="#1C1917")),
|
| 99 |
+
xaxis=dict(**PLOTLY_LAYOUT["xaxis"], title="Predicted Change (Lakh PD)"),
|
| 100 |
+
yaxis=dict(**PLOTLY_LAYOUT["yaxis"]),
|
| 101 |
+
bargap=0.28, showlegend=False,
|
| 102 |
+
))
|
| 103 |
+
fig1.update_layout(**l1)
|
| 104 |
+
st.plotly_chart(fig1, width="stretch", config={"displayModeBar": False})
|
| 105 |
+
|
| 106 |
+
with col_info:
|
| 107 |
+
st.markdown(f"""
|
| 108 |
+
<div style="background:#FEF2F2; border:1px solid #FECACA; border-left:3px solid #DC2626;
|
| 109 |
+
border-radius:8px; padding:1.1rem 1.2rem; margin-bottom:0.8rem;">
|
| 110 |
+
<p style="font-family:'DM Mono',monospace; font-size:0.56rem; letter-spacing:2px;
|
| 111 |
+
text-transform:uppercase; color:#DC2626; margin:0 0 8px 0;">Risk Alert</p>
|
| 112 |
+
<p style="font-family:'Fraunces',serif; font-size:1.6rem; font-weight:600;
|
| 113 |
+
color:#7F1D1D; margin:0 0 4px 0;">{len(declining)}</p>
|
| 114 |
+
<p style="font-family:'Source Serif 4',serif; font-size:0.82rem; color:#991B1B;
|
| 115 |
+
margin:0; line-height:1.5;">
|
| 116 |
+
Districts predicted to see employment decline next cycle.
|
| 117 |
+
Avg change: <strong>{declining['predicted_chg'].mean():+.2f}L</strong> person-days.
|
| 118 |
+
</p>
|
| 119 |
+
</div>
|
| 120 |
+
|
| 121 |
+
<div style="background:#FFFFFF; border:1px solid #E7E5E4;
|
| 122 |
+
border-radius:8px; padding:1rem 1.1rem;">
|
| 123 |
+
<p style="font-family:'DM Mono',monospace; font-size:0.56rem; letter-spacing:2px;
|
| 124 |
+
text-transform:uppercase; color:#A8A29E; margin:0 0 8px 0;">Worst Decline</p>
|
| 125 |
+
<p style="font-family:'Fraunces',serif; font-size:1.1rem; font-weight:600;
|
| 126 |
+
color:#1C1917; margin:0 0 2px 0;">{declining.iloc[0]['district']}</p>
|
| 127 |
+
<p style="font-family:'DM Mono',monospace; font-size:0.62rem; color:#78716C; margin:0;">
|
| 128 |
+
{declining.iloc[0]['state']} Β· {declining.iloc[0]['predicted_chg']:+.2f}L
|
| 129 |
+
</p>
|
| 130 |
+
</div>
|
| 131 |
+
""", unsafe_allow_html=True)
|
| 132 |
+
else:
|
| 133 |
+
st.success("β
No districts show predicted employment decline.")
|
| 134 |
+
else:
|
| 135 |
+
st.info("Previous year data unavailable for trend comparison.")
|
| 136 |
+
|
| 137 |
+
st.markdown("---")
|
| 138 |
+
|
| 139 |
+
# ββ Section B: Efficiency leaders & laggards ββββββββββββββββββββββββββββββββββ
|
| 140 |
+
section_label("B. Cost Efficiency β Leaders & Laggards")
|
| 141 |
+
|
| 142 |
+
eff_df = (
|
| 143 |
+
pred_df.groupby(["state", "district"], as_index=False)
|
| 144 |
+
.agg(
|
| 145 |
+
avg_actual =("person_days_lakhs", "mean"),
|
| 146 |
+
avg_predicted =("predicted_persondays", "mean"),
|
| 147 |
+
avg_error =("prediction_error", "mean"),
|
| 148 |
+
)
|
| 149 |
+
)
|
| 150 |
+
|
| 151 |
+
if not opt_df.empty and "persondays_per_lakh" in opt_df.columns:
|
| 152 |
+
eff_sub = opt_df[["state", "district", "persondays_per_lakh"]].drop_duplicates(["state", "district"])
|
| 153 |
+
eff_df = eff_df.merge(eff_sub, on=["state", "district"], how="left")
|
| 154 |
+
|
| 155 |
+
top_eff = eff_df.nlargest(12, "persondays_per_lakh")
|
| 156 |
+
bot_eff = eff_df.nsmallest(12, "persondays_per_lakh")
|
| 157 |
+
|
| 158 |
+
col_e1, col_e2 = st.columns(2)
|
| 159 |
+
for col_e, sub, title_str, c in [
|
| 160 |
+
(col_e1, top_eff, "Top 12 Most Efficient", GREEN),
|
| 161 |
+
(col_e2, bot_eff, "Bottom 12 Least Efficient", RED),
|
| 162 |
+
]:
|
| 163 |
+
with col_e:
|
| 164 |
+
sub = sub.copy()
|
| 165 |
+
sub["label"] = sub["district"] + " Β· " + sub["state"]
|
| 166 |
+
fig_e = go.Figure()
|
| 167 |
+
fig_e.add_bar(
|
| 168 |
+
x=sub["persondays_per_lakh"],
|
| 169 |
+
y=sub["label"],
|
| 170 |
+
orientation="h",
|
| 171 |
+
marker=dict(color=c, opacity=0.78),
|
| 172 |
+
hovertemplate="<b>%{y}</b><br>%{x:.4f} PD/βΉL<extra></extra>",
|
| 173 |
+
)
|
| 174 |
+
l_e = {**PLOTLY_LAYOUT}
|
| 175 |
+
l_e.update(dict(
|
| 176 |
+
height=340,
|
| 177 |
+
title=dict(text=title_str, font=dict(family="Fraunces, serif", size=13, color="#1C1917")),
|
| 178 |
+
xaxis=dict(**PLOTLY_LAYOUT["xaxis"], title="PD per βΉ Lakh"),
|
| 179 |
+
yaxis=dict(**PLOTLY_LAYOUT["yaxis"], autorange="reversed"),
|
| 180 |
+
bargap=0.25, showlegend=False,
|
| 181 |
+
))
|
| 182 |
+
fig_e.update_layout(**l_e)
|
| 183 |
+
st.plotly_chart(fig_e, width="stretch", config={"displayModeBar": False})
|
| 184 |
+
else:
|
| 185 |
+
st.info("Run optimizer pipeline to see efficiency rankings.")
|
| 186 |
+
|
| 187 |
+
st.markdown("---")
|
| 188 |
+
|
| 189 |
+
# ββ Section C: State-level LP opportunities βββββββββββββββββββββββββββββββββββ
|
| 190 |
+
section_label("C. LP Reallocation Opportunities by State")
|
| 191 |
+
|
| 192 |
+
if not opt_df.empty and "persondays_gain" in opt_df.columns:
|
| 193 |
+
state_gain = (
|
| 194 |
+
opt_df.groupby("state", as_index=False)
|
| 195 |
+
.agg(
|
| 196 |
+
total_gain =("persondays_gain", "sum"),
|
| 197 |
+
n_districts=("district", "count"),
|
| 198 |
+
avg_eff =("persondays_per_lakh", "mean"),
|
| 199 |
+
total_bud =("budget_allocated_lakhs", "sum"),
|
| 200 |
+
)
|
| 201 |
+
.sort_values("total_gain", ascending=False)
|
| 202 |
+
)
|
| 203 |
+
state_gain["gain_per_dist"] = (state_gain["total_gain"] / state_gain["n_districts"]).round(3)
|
| 204 |
+
|
| 205 |
+
fig_s = go.Figure()
|
| 206 |
+
fig_s.add_bar(
|
| 207 |
+
x=state_gain["state"],
|
| 208 |
+
y=state_gain["total_gain"],
|
| 209 |
+
marker=dict(
|
| 210 |
+
color=state_gain["total_gain"],
|
| 211 |
+
colorscale=[[0, "#FEF3C7"], [0.5, "#FB923C"], [1, "#7C2D12"]],
|
| 212 |
+
showscale=False,
|
| 213 |
+
opacity=0.85,
|
| 214 |
+
),
|
| 215 |
+
customdata=list(zip(
|
| 216 |
+
state_gain["state"],
|
| 217 |
+
state_gain["total_gain"].round(2),
|
| 218 |
+
state_gain["n_districts"],
|
| 219 |
+
state_gain["avg_eff"].round(4),
|
| 220 |
+
state_gain["total_bud"].round(0),
|
| 221 |
+
)),
|
| 222 |
+
hovertemplate=(
|
| 223 |
+
"<b>%{customdata[0]}</b><br>"
|
| 224 |
+
"Total PD Gain: <b>%{customdata[1]:+.2f}L</b><br>"
|
| 225 |
+
"Districts: %{customdata[2]}<br>"
|
| 226 |
+
"Avg Efficiency: %{customdata[3]} PD/βΉL<br>"
|
| 227 |
+
"Total Budget: βΉ%{customdata[4]:,.0f}L"
|
| 228 |
+
"<extra></extra>"
|
| 229 |
+
),
|
| 230 |
+
)
|
| 231 |
+
l_s = {**PLOTLY_LAYOUT}
|
| 232 |
+
l_s.update(dict(
|
| 233 |
+
height=360,
|
| 234 |
+
title=dict(text="Total LP Person-Day Gain by State",
|
| 235 |
+
font=dict(family="Fraunces, serif", size=14, color="#1C1917")),
|
| 236 |
+
xaxis=dict(**PLOTLY_LAYOUT["xaxis"], title="State", tickangle=-35),
|
| 237 |
+
yaxis=dict(**PLOTLY_LAYOUT["yaxis"], title="Total PD Gain (Lakh)"),
|
| 238 |
+
bargap=0.3,
|
| 239 |
+
))
|
| 240 |
+
fig_s.update_layout(**l_s)
|
| 241 |
+
st.plotly_chart(fig_s, width="stretch", config={"displayModeBar": False})
|
| 242 |
+
|
| 243 |
+
with st.expander("π State-Level Summary Table"):
|
| 244 |
+
st.dataframe(state_gain.round(3), width="stretch", hide_index=True)
|
| 245 |
+
else:
|
| 246 |
+
st.info("No optimizer data β run `python main.py --stage 3`.")
|
| 247 |
+
|
| 248 |
+
st.markdown("---")
|
| 249 |
+
|
| 250 |
+
# ββ Section D: National trend analysis βββββββββββββββββββββββββββββββββββββββ
|
| 251 |
+
section_label("D. National Employment Trend & COVID Impact")
|
| 252 |
+
|
| 253 |
+
if not trend.empty:
|
| 254 |
+
fig_t = go.Figure()
|
| 255 |
+
fig_t.add_scatter(
|
| 256 |
+
x=trend["financial_year"], y=trend["total_persondays"],
|
| 257 |
+
name="Total PD (Lakh)", mode="lines+markers",
|
| 258 |
+
fill="tozeroy", fillcolor="rgba(251,146,60,0.07)",
|
| 259 |
+
line=dict(color=SAFFRON, width=2.5),
|
| 260 |
+
marker=dict(size=7, color=SAFFRON),
|
| 261 |
+
)
|
| 262 |
+
if 2020 in trend["financial_year"].values:
|
| 263 |
+
fig_t.add_vline(
|
| 264 |
+
x=2020, line_dash="dot", line_color=RED, line_width=1.5,
|
| 265 |
+
annotation_text="COVID surge",
|
| 266 |
+
annotation_font=dict(color=RED, size=9, family="DM Mono, monospace"),
|
| 267 |
+
)
|
| 268 |
+
l_t = {**PLOTLY_LAYOUT}
|
| 269 |
+
l_t.update(dict(
|
| 270 |
+
height=260,
|
| 271 |
+
title=dict(text="National Person-Days Trend",
|
| 272 |
+
font=dict(family="Fraunces, serif", size=13, color="#1C1917")),
|
| 273 |
+
xaxis=dict(**PLOTLY_LAYOUT["xaxis"], title="Financial Year", dtick=1),
|
| 274 |
+
yaxis=dict(**PLOTLY_LAYOUT["yaxis"], title="Lakh PD"),
|
| 275 |
+
showlegend=False,
|
| 276 |
+
))
|
| 277 |
+
fig_t.update_layout(**l_t)
|
| 278 |
+
st.plotly_chart(fig_t, width="stretch", config={"displayModeBar": False})
|
frontend/pages/optimizer.py
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# pages/optimizer.py β Budget reallocation optimizer results and live LP runner.
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
import sys, os
|
| 5 |
+
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
|
| 6 |
+
|
| 7 |
+
import streamlit as st
|
| 8 |
+
import plotly.graph_objects as go
|
| 9 |
+
import plotly.express as px
|
| 10 |
+
import pandas as pd
|
| 11 |
+
|
| 12 |
+
from theme import inject_theme, page_header, section_label, kpi_html, PLOTLY_LAYOUT, SAFFRON, GREEN, RED, AMBER
|
| 13 |
+
from utils.api_client import fetch_states, fetch_optimizer_results, run_optimizer_live
|
| 14 |
+
|
| 15 |
+
inject_theme()
|
| 16 |
+
page_header(
|
| 17 |
+
"β Module 04",
|
| 18 |
+
"Budget Optimizer",
|
| 19 |
+
"SciPy LP two-stage proportional reallocation β maximize employment at zero additional cost",
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
# ββ Tabs: pre-computed vs live ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 23 |
+
tab1, tab2 = st.tabs(["Pre-Computed Results", "Run Live Optimizer"])
|
| 24 |
+
|
| 25 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 26 |
+
# TAB 1 β Pre-computed results
|
| 27 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 28 |
+
with tab1:
|
| 29 |
+
states = fetch_states()
|
| 30 |
+
if not states:
|
| 31 |
+
st.error("β οΈ API offline β run `uvicorn backend.main:app --port 8000`")
|
| 32 |
+
st.stop()
|
| 33 |
+
|
| 34 |
+
cs, _ = st.columns([1, 2])
|
| 35 |
+
with cs:
|
| 36 |
+
scope = st.selectbox("State Filter", ["All-India"] + states, key="pre_scope")
|
| 37 |
+
state_param = None if scope == "All-India" else scope
|
| 38 |
+
|
| 39 |
+
df = fetch_optimizer_results(state_param)
|
| 40 |
+
|
| 41 |
+
if df.empty:
|
| 42 |
+
st.info("No optimizer results β run the pipeline first: `python main.py --stage 3`")
|
| 43 |
+
else:
|
| 44 |
+
# ββ Summary KPIs ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 45 |
+
sq_total = df["sq_persondays"].sum()
|
| 46 |
+
opt_total = df["opt_persondays"].sum() if "opt_persondays" in df.columns else sq_total + df["persondays_gain"].sum()
|
| 47 |
+
gain = df["persondays_gain"].sum()
|
| 48 |
+
gain_pct = gain / sq_total * 100 if sq_total else 0
|
| 49 |
+
tot_bud = df["budget_allocated_lakhs"].sum() if "budget_allocated_lakhs" in df.columns else 0
|
| 50 |
+
n_gain = int((df["persondays_gain"] > 0).sum())
|
| 51 |
+
n_cut = int((df["persondays_gain"] <= 0).sum())
|
| 52 |
+
|
| 53 |
+
kc1, kc2, kc3, kc4, kc5 = st.columns(5)
|
| 54 |
+
with kc1: st.markdown(kpi_html(f"{sq_total:,.0f}L", "Status Quo PD", "#1C1917"), unsafe_allow_html=True)
|
| 55 |
+
with kc2: st.markdown(kpi_html(f"{opt_total:,.0f}L", "Optimized PD", GREEN), unsafe_allow_html=True)
|
| 56 |
+
with kc3: st.markdown(kpi_html(f"{gain:+,.1f}L", "Net Gain", GREEN, "lakh person-days"), unsafe_allow_html=True)
|
| 57 |
+
with kc4: st.markdown(kpi_html(f"{gain_pct:+.2f}%", "% Uplift", GREEN, "budget-neutral"), unsafe_allow_html=True)
|
| 58 |
+
with kc5: st.markdown(kpi_html(f"βΉ{tot_bud:,.0f}L", "Total Budget", "#1C1917", "unchanged"), unsafe_allow_html=True)
|
| 59 |
+
|
| 60 |
+
st.markdown("<div style='margin-top:1.5rem'></div>", unsafe_allow_html=True)
|
| 61 |
+
|
| 62 |
+
# ββ Budget change waterfall β top movers ββββββββββββββββββββββββββββββ
|
| 63 |
+
section_label("Top Budget Movers")
|
| 64 |
+
|
| 65 |
+
top_gain = df.nlargest(10, "persondays_gain").copy()
|
| 66 |
+
top_cut = df.nsmallest(10, "persondays_gain").copy()
|
| 67 |
+
show = pd.concat([top_gain, top_cut]).drop_duplicates().sort_values("persondays_gain")
|
| 68 |
+
show["label"] = show["district"] + " Β· " + show["state"]
|
| 69 |
+
|
| 70 |
+
fig1 = go.Figure()
|
| 71 |
+
fig1.add_bar(
|
| 72 |
+
x=show["persondays_gain"],
|
| 73 |
+
y=show["label"],
|
| 74 |
+
orientation="h",
|
| 75 |
+
marker=dict(
|
| 76 |
+
color=[GREEN if v > 0 else RED for v in show["persondays_gain"]],
|
| 77 |
+
opacity=0.8,
|
| 78 |
+
),
|
| 79 |
+
customdata=list(zip(
|
| 80 |
+
show["state"], show["district"],
|
| 81 |
+
show["budget_allocated_lakhs"].round(0) if "budget_allocated_lakhs" in show else [0]*len(show),
|
| 82 |
+
show.get("budget_change_pct", pd.Series([0]*len(show))).round(1),
|
| 83 |
+
show["persondays_gain"].round(2),
|
| 84 |
+
show.get("persondays_per_lakh", pd.Series([0]*len(show))).round(4),
|
| 85 |
+
)),
|
| 86 |
+
hovertemplate=(
|
| 87 |
+
"<b>%{customdata[1]}</b> Β· %{customdata[0]}<br>"
|
| 88 |
+
"Budget: βΉ%{customdata[2]:,.0f}L β %{customdata[3]:+.1f}%<br>"
|
| 89 |
+
"PD Gain: <b>%{customdata[4]:+.2f}L</b><br>"
|
| 90 |
+
"Efficiency: %{customdata[5]} PD/βΉL"
|
| 91 |
+
"<extra></extra>"
|
| 92 |
+
),
|
| 93 |
+
)
|
| 94 |
+
fig1.add_vline(x=0, line_dash="solid", line_color="#1C1917", line_width=1)
|
| 95 |
+
l1 = {**PLOTLY_LAYOUT}
|
| 96 |
+
l1.update(dict(
|
| 97 |
+
height=520,
|
| 98 |
+
title=dict(text="Person-Day Gain by District (Top 10 + Bottom 10)",
|
| 99 |
+
font=dict(family="Fraunces, serif", size=14, color="#1C1917")),
|
| 100 |
+
xaxis=dict(**PLOTLY_LAYOUT["xaxis"], title="Person-Day Gain (Lakh)"),
|
| 101 |
+
yaxis=dict(**PLOTLY_LAYOUT["yaxis"]),
|
| 102 |
+
showlegend=False,
|
| 103 |
+
bargap=0.3,
|
| 104 |
+
))
|
| 105 |
+
fig1.update_layout(**l1)
|
| 106 |
+
st.plotly_chart(fig1, use_container_width=True, config={"displayModeBar": False})
|
| 107 |
+
|
| 108 |
+
# ββ Efficiency vs budget change scatter βββββββββββββββββββββββββββββββ
|
| 109 |
+
section_label("Efficiency vs Budget Reallocation")
|
| 110 |
+
|
| 111 |
+
if "persondays_per_lakh" in df.columns and "budget_change_pct" in df.columns:
|
| 112 |
+
fig2 = go.Figure()
|
| 113 |
+
fig2.add_scatter(
|
| 114 |
+
x=df["persondays_per_lakh"],
|
| 115 |
+
y=df["budget_change_pct"],
|
| 116 |
+
mode="markers",
|
| 117 |
+
marker=dict(
|
| 118 |
+
color=df["persondays_gain"],
|
| 119 |
+
colorscale=[[0, RED], [0.5, "#FED7AA"], [1, GREEN]],
|
| 120 |
+
size=5, opacity=0.65,
|
| 121 |
+
colorbar=dict(
|
| 122 |
+
title=dict(text="PD Gain", font=dict(color="#78716C", size=9)),
|
| 123 |
+
tickfont=dict(color="#78716C", size=8),
|
| 124 |
+
thickness=8, len=0.5,
|
| 125 |
+
),
|
| 126 |
+
),
|
| 127 |
+
customdata=list(zip(
|
| 128 |
+
df["state"], df["district"],
|
| 129 |
+
df["budget_change_pct"].round(1),
|
| 130 |
+
df["persondays_gain"].round(2),
|
| 131 |
+
)),
|
| 132 |
+
hovertemplate=(
|
| 133 |
+
"<b>%{customdata[1]}</b> Β· %{customdata[0]}<br>"
|
| 134 |
+
"Budget Ξ: %{customdata[2]:+.1f}%<br>"
|
| 135 |
+
"PD Gain: %{customdata[3]:+.2f}L"
|
| 136 |
+
"<extra></extra>"
|
| 137 |
+
),
|
| 138 |
+
)
|
| 139 |
+
fig2.add_hline(y=0, line_dash="dot", line_color="#1C1917", line_width=1)
|
| 140 |
+
l2 = {**PLOTLY_LAYOUT}
|
| 141 |
+
l2.update(dict(
|
| 142 |
+
height=340,
|
| 143 |
+
title=dict(text="Efficiency (PD/βΉ Lakh) vs Budget Change %",
|
| 144 |
+
font=dict(family="Fraunces, serif", size=13, color="#1C1917")),
|
| 145 |
+
xaxis=dict(**PLOTLY_LAYOUT["xaxis"], title="PD per βΉ Lakh"),
|
| 146 |
+
yaxis=dict(**PLOTLY_LAYOUT["yaxis"], title="Budget Change (%)"),
|
| 147 |
+
showlegend=False,
|
| 148 |
+
))
|
| 149 |
+
fig2.update_layout(**l2)
|
| 150 |
+
st.plotly_chart(fig2, use_container_width=True, config={"displayModeBar": False})
|
| 151 |
+
|
| 152 |
+
# ββ Full table ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 153 |
+
with st.expander("π Full Reallocation Table"):
|
| 154 |
+
show_cols = [c for c in [
|
| 155 |
+
"state", "district", "budget_allocated_lakhs", "optimized_budget",
|
| 156 |
+
"budget_change_pct", "sq_persondays", "opt_persondays",
|
| 157 |
+
"persondays_gain", "persondays_gain_pct", "persondays_per_lakh",
|
| 158 |
+
] if c in df.columns]
|
| 159 |
+
styled = df[show_cols].round(3).sort_values("persondays_gain", ascending=False)
|
| 160 |
+
st.dataframe(styled, use_container_width=True, hide_index=True)
|
| 161 |
+
|
| 162 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 163 |
+
# TAB 2 β Live optimizer
|
| 164 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 165 |
+
with tab2:
|
| 166 |
+
st.markdown("""
|
| 167 |
+
<p style="font-family:'Source Serif 4',serif; font-size:0.9rem; color:#57534E;
|
| 168 |
+
line-height:1.7; margin-bottom:1.5rem;">
|
| 169 |
+
Run the SciPy linear-programming optimizer live with custom parameters.
|
| 170 |
+
Results are computed in real-time using the latest district predictions from the database.
|
| 171 |
+
</p>
|
| 172 |
+
""", unsafe_allow_html=True)
|
| 173 |
+
|
| 174 |
+
ca, cb = st.columns(2)
|
| 175 |
+
states2 = fetch_states() or []
|
| 176 |
+
with ca:
|
| 177 |
+
scope2 = st.selectbox("State (or All-India)", ["All-India"] + states2, key="live_scope")
|
| 178 |
+
budget_scale = st.slider("Budget Scale", 0.8, 1.5, 1.0, 0.05,
|
| 179 |
+
help="1.0 = same total budget; 1.1 = +10% more funds")
|
| 180 |
+
with cb:
|
| 181 |
+
min_frac = st.slider("Min Allocation (floor)", 0.10, 0.60, 0.40, 0.05,
|
| 182 |
+
help="No district drops below this fraction of its current budget")
|
| 183 |
+
max_frac = st.slider("Max Allocation (cap)", 1.5, 3.0, 2.5, 0.1,
|
| 184 |
+
help="No district exceeds this multiple of its current budget")
|
| 185 |
+
|
| 186 |
+
if st.button("οΏ½οΏ½ Run Optimizer", type="primary"):
|
| 187 |
+
with st.spinner("Running LP optimizationβ¦"):
|
| 188 |
+
result = run_optimizer_live(
|
| 189 |
+
state=None if scope2 == "All-India" else scope2,
|
| 190 |
+
budget_scale=budget_scale,
|
| 191 |
+
min_fraction=min_frac,
|
| 192 |
+
max_fraction=max_frac,
|
| 193 |
+
)
|
| 194 |
+
|
| 195 |
+
if result:
|
| 196 |
+
st.success(
|
| 197 |
+
f"β
Optimization complete β "
|
| 198 |
+
f"Gain: **{result['gain_lakhs']:+,.2f}L** person-days "
|
| 199 |
+
f"({result['gain_pct']:+.2f}%) Β· "
|
| 200 |
+
f"Total budget: βΉ{result['total_budget_lakhs']:,.0f}L"
|
| 201 |
+
)
|
| 202 |
+
|
| 203 |
+
# Summary metrics
|
| 204 |
+
m1, m2, m3, m4 = st.columns(4)
|
| 205 |
+
m1.metric("SQ Person-Days", f"{result['sq_persondays_total']:,.1f}L")
|
| 206 |
+
m2.metric("Opt Person-Days", f"{result['opt_persondays_total']:,.1f}L")
|
| 207 |
+
m3.metric("Net Gain", f"{result['gain_lakhs']:+,.2f}L")
|
| 208 |
+
m4.metric("% Uplift", f"{result['gain_pct']:+.2f}%")
|
| 209 |
+
|
| 210 |
+
# District breakdown
|
| 211 |
+
if result.get("districts"):
|
| 212 |
+
dist_df = pd.DataFrame(result["districts"])
|
| 213 |
+
|
| 214 |
+
section_label("District Reallocation Details")
|
| 215 |
+
top10 = dist_df.nlargest(10, "persondays_gain")
|
| 216 |
+
top10["label"] = top10["district"] + " Β· " + top10["state"]
|
| 217 |
+
|
| 218 |
+
fig_live = go.Figure()
|
| 219 |
+
fig_live.add_bar(
|
| 220 |
+
x=top10["persondays_gain"], y=top10["label"],
|
| 221 |
+
orientation="h",
|
| 222 |
+
marker=dict(color=GREEN, opacity=0.8),
|
| 223 |
+
hovertemplate=(
|
| 224 |
+
"<b>%{y}</b><br>PD Gain: <b>%{x:+.2f}L</b><extra></extra>"
|
| 225 |
+
),
|
| 226 |
+
)
|
| 227 |
+
l_live = {**PLOTLY_LAYOUT}
|
| 228 |
+
l_live.update(dict(
|
| 229 |
+
height=380, showlegend=False, bargap=0.3,
|
| 230 |
+
title=dict(text="Top 10 Districts to Increase",
|
| 231 |
+
font=dict(family="Fraunces, serif", size=13, color="#1C1917")),
|
| 232 |
+
xaxis=dict(**PLOTLY_LAYOUT["xaxis"], title="PD Gain (Lakh)"),
|
| 233 |
+
yaxis=dict(**PLOTLY_LAYOUT["yaxis"]),
|
| 234 |
+
))
|
| 235 |
+
fig_live.update_layout(**l_live)
|
| 236 |
+
st.plotly_chart(fig_live, use_container_width=True,
|
| 237 |
+
config={"displayModeBar": False})
|
| 238 |
+
|
| 239 |
+
with st.expander("π Full Live Results Table"):
|
| 240 |
+
st.dataframe(dist_df.round(3), use_container_width=True, hide_index=True)
|
frontend/pages/overview.py
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# pages/overview.py β National MNREGA trend overview.
|
| 2 |
+
|
| 3 |
+
import sys, os
|
| 4 |
+
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
|
| 5 |
+
|
| 6 |
+
import streamlit as st
|
| 7 |
+
import plotly.graph_objects as go
|
| 8 |
+
|
| 9 |
+
from theme import inject_theme, page_header, section_label, PLOTLY_LAYOUT, SAFFRON, GREEN, RED
|
| 10 |
+
from utils.api_client import fetch_stats, fetch_states, fetch_yearly_trend, fetch_top_districts
|
| 11 |
+
|
| 12 |
+
inject_theme()
|
| 13 |
+
page_header(
|
| 14 |
+
"β Module 01",
|
| 15 |
+
"Overview",
|
| 16 |
+
"Longitudinal MNREGA performance across India β employment and wage trends",
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
# ββ Stats KPIs ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 20 |
+
stats = fetch_stats()
|
| 21 |
+
if stats:
|
| 22 |
+
c1, c2, c3, c4, c5 = st.columns(5)
|
| 23 |
+
c1.metric("States", stats.get("total_states", "β"))
|
| 24 |
+
c2.metric("Districts", stats.get("total_districts", "β"))
|
| 25 |
+
c3.metric("Period", stats.get("year_range", "β"))
|
| 26 |
+
c4.metric("Total PD", f"{stats.get('total_persondays_lakhs', 0):,.0f}L")
|
| 27 |
+
c5.metric("COVID Spike", f"{stats.get('covid_spike_pct', 0):.1f}%", delta="2020 peak")
|
| 28 |
+
else:
|
| 29 |
+
st.warning("β οΈ Backend offline β run `uvicorn backend.main:app --port 8000`")
|
| 30 |
+
st.stop()
|
| 31 |
+
|
| 32 |
+
st.markdown("---")
|
| 33 |
+
|
| 34 |
+
# ββ Scope selector ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 35 |
+
states_list = fetch_states()
|
| 36 |
+
col_sel, _ = st.columns([1, 2])
|
| 37 |
+
with col_sel:
|
| 38 |
+
scope = st.selectbox("Geographic Scope", ["All-India"] + states_list)
|
| 39 |
+
state_param = None if scope == "All-India" else scope
|
| 40 |
+
|
| 41 |
+
# ββ Trend chart βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 42 |
+
section_label("Employment Trend")
|
| 43 |
+
df_trend = fetch_yearly_trend(state_param)
|
| 44 |
+
|
| 45 |
+
if not df_trend.empty:
|
| 46 |
+
fig = go.Figure()
|
| 47 |
+
|
| 48 |
+
fig.add_bar(
|
| 49 |
+
x=df_trend["financial_year"],
|
| 50 |
+
y=df_trend["total_persondays"],
|
| 51 |
+
name="Person-Days (lakh)",
|
| 52 |
+
marker=dict(color=SAFFRON, opacity=0.78),
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
# Wage on secondary axis if available
|
| 56 |
+
if "avg_wage" in df_trend.columns:
|
| 57 |
+
fig.add_scatter(
|
| 58 |
+
x=df_trend["financial_year"],
|
| 59 |
+
y=df_trend["avg_wage"],
|
| 60 |
+
name="Avg Wage Rate (βΉ/day)",
|
| 61 |
+
yaxis="y2",
|
| 62 |
+
mode="lines+markers",
|
| 63 |
+
line=dict(color=GREEN, width=2.5),
|
| 64 |
+
marker=dict(size=6, color=GREEN),
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
if 2020 in df_trend["financial_year"].values:
|
| 68 |
+
fig.add_vline(
|
| 69 |
+
x=2020, line_dash="dot", line_color=RED, line_width=1.5,
|
| 70 |
+
annotation_text="COVID-19",
|
| 71 |
+
annotation_font=dict(color=RED, size=9, family="DM Mono, monospace"),
|
| 72 |
+
annotation_position="top right",
|
| 73 |
+
)
|
| 74 |
+
if 2022 in df_trend["financial_year"].values:
|
| 75 |
+
fig.add_vline(
|
| 76 |
+
x=2022, line_dash="dot", line_color="#A8A29E", line_width=1,
|
| 77 |
+
annotation_text="WB anomaly",
|
| 78 |
+
annotation_font=dict(color="#A8A29E", size=9, family="DM Mono, monospace"),
|
| 79 |
+
annotation_position="top left",
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
layout = {**PLOTLY_LAYOUT}
|
| 83 |
+
layout.update(dict(
|
| 84 |
+
title=dict(
|
| 85 |
+
text=f"MNREGA Employment Trend β {scope}",
|
| 86 |
+
font=dict(family="Fraunces, serif", size=15, color="#1C1917"),
|
| 87 |
+
),
|
| 88 |
+
hovermode="x unified",
|
| 89 |
+
height=420,
|
| 90 |
+
bargap=0.35,
|
| 91 |
+
yaxis=dict(**PLOTLY_LAYOUT["yaxis"], title="Person-Days (lakh)"),
|
| 92 |
+
yaxis2=dict(
|
| 93 |
+
title="Avg Wage Rate (βΉ/day)", overlaying="y", side="right",
|
| 94 |
+
gridcolor="rgba(0,0,0,0)",
|
| 95 |
+
tickfont=dict(color="#78716C", size=10),
|
| 96 |
+
title_font=dict(color="#57534E", size=11),
|
| 97 |
+
),
|
| 98 |
+
legend=dict(**PLOTLY_LAYOUT["legend"], orientation="h", y=1.08, x=0),
|
| 99 |
+
))
|
| 100 |
+
fig.update_layout(**layout)
|
| 101 |
+
st.plotly_chart(fig, use_container_width=True, config={"displayModeBar": False})
|
| 102 |
+
st.caption("Source: MNREGA MIS Β· Ministry of Rural Development Β· Annual district-level aggregates")
|
| 103 |
+
else:
|
| 104 |
+
st.info("No trend data β API offline or pipeline not yet run.")
|
| 105 |
+
|
| 106 |
+
st.markdown("---")
|
| 107 |
+
|
| 108 |
+
# ββ District ranking ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 109 |
+
section_label("District Performance Benchmarking")
|
| 110 |
+
|
| 111 |
+
cm, cn = st.columns([2, 1])
|
| 112 |
+
with cm:
|
| 113 |
+
# V3: only person_days_lakhs is a real non-synthetic column
|
| 114 |
+
metric = "person_days_lakhs"
|
| 115 |
+
st.markdown(
|
| 116 |
+
'<p style="font-family:\'DM Mono\',monospace; font-size:0.65rem; '
|
| 117 |
+
'letter-spacing:1.5px; text-transform:uppercase; color:#78716C; margin-bottom:4px;">'
|
| 118 |
+
'Ranking Metric</p>'
|
| 119 |
+
'<p style="font-size:0.9rem; color:#1C1917; margin:0;">Employment Volume (Lakh Person-Days)</p>',
|
| 120 |
+
unsafe_allow_html=True
|
| 121 |
+
)
|
| 122 |
+
with cn:
|
| 123 |
+
n_top = st.slider("Top N Districts", 5, 30, 15)
|
| 124 |
+
|
| 125 |
+
df_top = fetch_top_districts(state_param, metric, n_top)
|
| 126 |
+
|
| 127 |
+
if not df_top.empty:
|
| 128 |
+
df_top["label"] = df_top["district"] + " Β· " + df_top["state"]
|
| 129 |
+
|
| 130 |
+
fig2 = go.Figure()
|
| 131 |
+
fig2.add_bar(
|
| 132 |
+
x=df_top["avg_persondays"],
|
| 133 |
+
y=df_top["label"],
|
| 134 |
+
orientation="h",
|
| 135 |
+
marker=dict(
|
| 136 |
+
color=df_top["avg_persondays"],
|
| 137 |
+
colorscale=[[0, "#FED7AA"], [1, "#9A3412"]],
|
| 138 |
+
showscale=False,
|
| 139 |
+
),
|
| 140 |
+
customdata=list(zip(df_top["state"], df_top["district"], df_top["avg_persondays"].round(2))),
|
| 141 |
+
hovertemplate=(
|
| 142 |
+
"<b>%{customdata[1]}</b> Β· %{customdata[0]}<br>"
|
| 143 |
+
"Avg Person-Days: <b>%{customdata[2]}L</b><extra></extra>"
|
| 144 |
+
),
|
| 145 |
+
)
|
| 146 |
+
layout2 = {**PLOTLY_LAYOUT}
|
| 147 |
+
layout2.update(dict(
|
| 148 |
+
title=dict(
|
| 149 |
+
text=f"Top {n_top} Districts β Employment Volume",
|
| 150 |
+
font=dict(family="Fraunces, serif", size=14, color="#1C1917"),
|
| 151 |
+
),
|
| 152 |
+
height=max(380, n_top * 30),
|
| 153 |
+
xaxis=dict(**PLOTLY_LAYOUT["xaxis"], title="Avg Lakh Person-Days"),
|
| 154 |
+
yaxis=dict(**PLOTLY_LAYOUT["yaxis"], autorange="reversed"),
|
| 155 |
+
))
|
| 156 |
+
fig2.update_layout(**layout2)
|
| 157 |
+
st.plotly_chart(fig2, use_container_width=True, config={"displayModeBar": False})
|
| 158 |
+
else:
|
| 159 |
+
st.info("No ranking data available.")
|
frontend/pages/predictions.py
ADDED
|
@@ -0,0 +1,255 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# pages/predictions.py β GBR V3 model predictions and error analysis.
|
| 2 |
+
|
| 3 |
+
import sys, os
|
| 4 |
+
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
|
| 5 |
+
|
| 6 |
+
import streamlit as st
|
| 7 |
+
import plotly.graph_objects as go
|
| 8 |
+
import numpy as np
|
| 9 |
+
|
| 10 |
+
from theme import inject_theme, page_header, section_label, kpi_html, PLOTLY_LAYOUT, SAFFRON, GREEN, RED
|
| 11 |
+
from utils.api_client import fetch_states, fetch_districts, fetch_predictions
|
| 12 |
+
|
| 13 |
+
inject_theme()
|
| 14 |
+
page_header(
|
| 15 |
+
"β Module 03",
|
| 16 |
+
"Predictions",
|
| 17 |
+
"GBR V3 district-level employment forecasts β walk-forward CV RΒ²β0.91 (excl. 2022 anomaly)",
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
# ββ Filters βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 21 |
+
states = fetch_states()
|
| 22 |
+
if not states:
|
| 23 |
+
st.error("β οΈ API offline β run `uvicorn backend.main:app --port 8000`")
|
| 24 |
+
st.stop()
|
| 25 |
+
|
| 26 |
+
c1, c2, c3 = st.columns(3)
|
| 27 |
+
with c1:
|
| 28 |
+
scope = st.selectbox("State", ["All States"] + states)
|
| 29 |
+
with c2:
|
| 30 |
+
state_param = None if scope == "All States" else scope
|
| 31 |
+
districts = ["All Districts"] + fetch_districts(state_param) if state_param else ["All Districts"]
|
| 32 |
+
dist_sel = st.selectbox("District", districts)
|
| 33 |
+
with c3:
|
| 34 |
+
df_all = fetch_predictions(state=state_param)
|
| 35 |
+
years = sorted(df_all["financial_year"].unique().tolist()) if not df_all.empty else []
|
| 36 |
+
yr_sel = st.selectbox("Year", ["All Years"] + years)
|
| 37 |
+
|
| 38 |
+
# Apply filters
|
| 39 |
+
df = fetch_predictions(
|
| 40 |
+
state=state_param,
|
| 41 |
+
district=None if dist_sel == "All Districts" else dist_sel,
|
| 42 |
+
year=None if yr_sel == "All Years" else int(yr_sel),
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
if df.empty:
|
| 46 |
+
st.info("No prediction data for selected filters.")
|
| 47 |
+
st.stop()
|
| 48 |
+
|
| 49 |
+
# ββ Model KPIs ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 50 |
+
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
|
| 51 |
+
import warnings
|
| 52 |
+
warnings.filterwarnings("ignore")
|
| 53 |
+
|
| 54 |
+
try:
|
| 55 |
+
rmse = np.sqrt(mean_squared_error(df["person_days_lakhs"], df["predicted_persondays"]))
|
| 56 |
+
mae = mean_absolute_error(df["person_days_lakhs"], df["predicted_persondays"])
|
| 57 |
+
r2 = r2_score(df["person_days_lakhs"], df["predicted_persondays"])
|
| 58 |
+
bias = (df["predicted_persondays"] - df["person_days_lakhs"]).mean()
|
| 59 |
+
|
| 60 |
+
c1, c2, c3, c4 = st.columns(4)
|
| 61 |
+
c1.metric("RΒ² Score", f"{r2:.4f}")
|
| 62 |
+
c2.metric("RMSE", f"{rmse:.3f}L")
|
| 63 |
+
c3.metric("MAE", f"{mae:.3f}L")
|
| 64 |
+
c4.metric("Mean Bias", f"{bias:+.3f}L")
|
| 65 |
+
except Exception:
|
| 66 |
+
pass
|
| 67 |
+
|
| 68 |
+
# ββ Model info callout ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 69 |
+
st.markdown("""
|
| 70 |
+
<div style="background:#F0FDF4; border:1px solid #BBF7D0; border-left:3px solid #16A34A;
|
| 71 |
+
border-radius:8px; padding:0.9rem 1.1rem; margin:1rem 0;">
|
| 72 |
+
<p style="font-family:'DM Mono',monospace; font-size:0.56rem; letter-spacing:2px;
|
| 73 |
+
text-transform:uppercase; color:#16A34A; margin:0 0 6px 0;">V3 Leak-Free Model</p>
|
| 74 |
+
<p style="font-family:'Source Serif 4',serif; font-size:0.85rem; color:#14532D;
|
| 75 |
+
line-height:1.65; margin:0;">
|
| 76 |
+
GradientBoostingRegressor Β· 17 lag-based features Β· Walk-forward CV
|
| 77 |
+
Β· RΒ²=0.91 excl. 2022 Β· Previous RΒ²=0.9963 was data leakage
|
| 78 |
+
(<code>works_completed</code> r=1.0 with target).
|
| 79 |
+
2022 West Bengal reporting anomaly (β93 to β98% drop) is structurally unpredictable.
|
| 80 |
+
</p>
|
| 81 |
+
</div>
|
| 82 |
+
""", unsafe_allow_html=True)
|
| 83 |
+
|
| 84 |
+
st.markdown("---")
|
| 85 |
+
|
| 86 |
+
col_left, col_right = st.columns(2)
|
| 87 |
+
|
| 88 |
+
# ββ Actual vs Predicted scatter βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 89 |
+
with col_left:
|
| 90 |
+
section_label("Actual vs Predicted")
|
| 91 |
+
|
| 92 |
+
fig1 = go.Figure()
|
| 93 |
+
lim_mn = min(df["person_days_lakhs"].min(), df["predicted_persondays"].min()) * 0.92
|
| 94 |
+
lim_mx = max(df["person_days_lakhs"].max(), df["predicted_persondays"].max()) * 1.06
|
| 95 |
+
|
| 96 |
+
fig1.add_scatter(
|
| 97 |
+
x=[lim_mn, lim_mx], y=[lim_mn, lim_mx],
|
| 98 |
+
mode="lines",
|
| 99 |
+
line=dict(color="#E7E5E4", width=1.5, dash="dot"),
|
| 100 |
+
name="Perfect prediction",
|
| 101 |
+
hoverinfo="skip",
|
| 102 |
+
)
|
| 103 |
+
fig1.add_scatter(
|
| 104 |
+
x=df["person_days_lakhs"],
|
| 105 |
+
y=df["predicted_persondays"],
|
| 106 |
+
mode="markers",
|
| 107 |
+
marker=dict(
|
| 108 |
+
color=df["prediction_error"].abs(),
|
| 109 |
+
colorscale=[[0, SAFFRON], [1, RED]],
|
| 110 |
+
size=5, opacity=0.65,
|
| 111 |
+
colorbar=dict(
|
| 112 |
+
title=dict(text="|Error|L", font=dict(color="#78716C", size=9)),
|
| 113 |
+
tickfont=dict(color="#78716C", size=8),
|
| 114 |
+
thickness=8, len=0.5,
|
| 115 |
+
),
|
| 116 |
+
),
|
| 117 |
+
customdata=list(zip(
|
| 118 |
+
df["state"], df["district"],
|
| 119 |
+
df["financial_year"],
|
| 120 |
+
df["person_days_lakhs"].round(2),
|
| 121 |
+
df["predicted_persondays"].round(2),
|
| 122 |
+
df["prediction_error"].round(2),
|
| 123 |
+
)),
|
| 124 |
+
hovertemplate=(
|
| 125 |
+
"<b>%{customdata[1]}</b> Β· %{customdata[0]}<br>"
|
| 126 |
+
"FY: %{customdata[2]}<br>"
|
| 127 |
+
"Actual: <b>%{customdata[3]}L</b><br>"
|
| 128 |
+
"Predicted: <b>%{customdata[4]}L</b><br>"
|
| 129 |
+
"Error: %{customdata[5]}L"
|
| 130 |
+
"<extra></extra>"
|
| 131 |
+
),
|
| 132 |
+
name="Districts",
|
| 133 |
+
)
|
| 134 |
+
|
| 135 |
+
l1 = {**PLOTLY_LAYOUT}
|
| 136 |
+
l1.update(dict(
|
| 137 |
+
height=370,
|
| 138 |
+
title=dict(text="Actual vs Predicted Person-Days",
|
| 139 |
+
font=dict(family="Fraunces, serif", size=13, color="#1C1917")),
|
| 140 |
+
xaxis=dict(**PLOTLY_LAYOUT["xaxis"], title="Actual (Lakh PD)", range=[lim_mn, lim_mx]),
|
| 141 |
+
yaxis=dict(**PLOTLY_LAYOUT["yaxis"], title="Predicted (Lakh PD)", range=[lim_mn, lim_mx]),
|
| 142 |
+
showlegend=False,
|
| 143 |
+
))
|
| 144 |
+
fig1.update_layout(**l1)
|
| 145 |
+
st.plotly_chart(fig1, use_container_width=True, config={"displayModeBar": False})
|
| 146 |
+
|
| 147 |
+
# ββ Error distribution ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 148 |
+
with col_right:
|
| 149 |
+
section_label("Prediction Error Distribution")
|
| 150 |
+
|
| 151 |
+
errors = df["prediction_error"]
|
| 152 |
+
fig2 = go.Figure()
|
| 153 |
+
fig2.add_histogram(
|
| 154 |
+
x=errors, nbinsx=40,
|
| 155 |
+
marker=dict(color=SAFFRON, opacity=0.75, line=dict(color="#FFFFFF", width=0.5)),
|
| 156 |
+
hovertemplate="Error: %{x:.2f}L<br>Count: %{y}<extra></extra>",
|
| 157 |
+
)
|
| 158 |
+
fig2.add_vline(x=0, line_dash="dot", line_color="#1C1917", line_width=1.5)
|
| 159 |
+
fig2.add_vline(x=errors.mean(), line_dash="dash", line_color=RED, line_width=1,
|
| 160 |
+
annotation_text=f"Mean={errors.mean():+.2f}",
|
| 161 |
+
annotation_font=dict(color=RED, size=9, family="DM Mono, monospace"))
|
| 162 |
+
|
| 163 |
+
l2 = {**PLOTLY_LAYOUT}
|
| 164 |
+
l2.update(dict(
|
| 165 |
+
height=370,
|
| 166 |
+
title=dict(text="Error Distribution (Actual β Predicted)",
|
| 167 |
+
font=dict(family="Fraunces, serif", size=13, color="#1C1917")),
|
| 168 |
+
xaxis=dict(**PLOTLY_LAYOUT["xaxis"], title="Error (Lakh PD)"),
|
| 169 |
+
yaxis=dict(**PLOTLY_LAYOUT["yaxis"], title="Count"),
|
| 170 |
+
showlegend=False, bargap=0.05,
|
| 171 |
+
))
|
| 172 |
+
fig2.update_layout(**l2)
|
| 173 |
+
st.plotly_chart(fig2, use_container_width=True, config={"displayModeBar": False})
|
| 174 |
+
|
| 175 |
+
st.markdown("---")
|
| 176 |
+
|
| 177 |
+
# ββ Year-on-year prediction vs actual trend βββββββββββββββββββββββββββββββββββ
|
| 178 |
+
section_label("Year-on-Year Prediction Accuracy")
|
| 179 |
+
|
| 180 |
+
trend = df.groupby("financial_year", as_index=False).agg(
|
| 181 |
+
actual =("person_days_lakhs", "sum"),
|
| 182 |
+
predicted=("predicted_persondays", "sum"),
|
| 183 |
+
)
|
| 184 |
+
|
| 185 |
+
fig3 = go.Figure()
|
| 186 |
+
fig3.add_bar(
|
| 187 |
+
x=trend["financial_year"], y=trend["actual"],
|
| 188 |
+
name="Actual",
|
| 189 |
+
marker=dict(color="#E7E5E4", opacity=0.9),
|
| 190 |
+
)
|
| 191 |
+
fig3.add_scatter(
|
| 192 |
+
x=trend["financial_year"], y=trend["predicted"],
|
| 193 |
+
name="Predicted",
|
| 194 |
+
mode="lines+markers",
|
| 195 |
+
line=dict(color=SAFFRON, width=2.5),
|
| 196 |
+
marker=dict(size=7, color=SAFFRON, line=dict(width=1.5, color="#FFFFFF")),
|
| 197 |
+
)
|
| 198 |
+
|
| 199 |
+
l3 = {**PLOTLY_LAYOUT}
|
| 200 |
+
l3.update(dict(
|
| 201 |
+
height=300,
|
| 202 |
+
barmode="overlay", bargap=0.35,
|
| 203 |
+
title=dict(text="Aggregated Actual vs Predicted by Year",
|
| 204 |
+
font=dict(family="Fraunces, serif", size=13, color="#1C1917")),
|
| 205 |
+
xaxis=dict(**PLOTLY_LAYOUT["xaxis"], title="Financial Year", dtick=1),
|
| 206 |
+
yaxis=dict(**PLOTLY_LAYOUT["yaxis"], title="Total Lakh PD"),
|
| 207 |
+
legend=dict(**PLOTLY_LAYOUT["legend"], orientation="h", y=1.08, x=0),
|
| 208 |
+
))
|
| 209 |
+
|
| 210 |
+
# Annotate known anomalies
|
| 211 |
+
if 2020 in trend["financial_year"].values:
|
| 212 |
+
fig3.add_vline(x=2020, line_dash="dot", line_color=RED, line_width=1.5,
|
| 213 |
+
annotation_text="COVID", annotation_font=dict(color=RED, size=9, family="DM Mono, monospace"))
|
| 214 |
+
if 2022 in trend["financial_year"].values:
|
| 215 |
+
fig3.add_vline(x=2022, line_dash="dot", line_color="#A8A29E", line_width=1,
|
| 216 |
+
annotation_text="WB anomaly", annotation_font=dict(color="#A8A29E", size=9, family="DM Mono, monospace"))
|
| 217 |
+
|
| 218 |
+
fig3.update_layout(**l3)
|
| 219 |
+
st.plotly_chart(fig3, use_container_width=True, config={"displayModeBar": False})
|
| 220 |
+
|
| 221 |
+
st.markdown("---")
|
| 222 |
+
|
| 223 |
+
# ββ Walk-forward CV summary βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 224 |
+
section_label("Walk-Forward CV Performance (Honest Evaluation)")
|
| 225 |
+
|
| 226 |
+
cv_data = {
|
| 227 |
+
"Year": [2018, 2019, 2020, 2021, 2022, 2023, 2024],
|
| 228 |
+
"RΒ²": [0.916, 0.926, 0.835, 0.926, 0.510, 0.909, 0.935],
|
| 229 |
+
"MAE": [6.639, 6.380, 12.681, 7.150, 13.954, 7.403, 5.673],
|
| 230 |
+
"vs Naive RΒ²": ["+0.004", "+0.061", "+0.083", "β0.012", "+0.330", "β0.014", "+0.065"],
|
| 231 |
+
"Note": ["", "", "COVID spike", "", "WB reporting anomaly", "", ""],
|
| 232 |
+
}
|
| 233 |
+
import pandas as pd
|
| 234 |
+
cv_df = pd.DataFrame(cv_data)
|
| 235 |
+
st.dataframe(cv_df, use_container_width=True, hide_index=True)
|
| 236 |
+
st.caption("Walk-forward CV: model trained on years before test year only. Mean RΒ²=0.851, excl. 2022: RΒ²=0.908.")
|
| 237 |
+
|
| 238 |
+
st.markdown("---")
|
| 239 |
+
|
| 240 |
+
# ββ Worst predictions table βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 241 |
+
section_label("Largest Prediction Errors")
|
| 242 |
+
worst = (
|
| 243 |
+
df.assign(abs_error=df["prediction_error"].abs())
|
| 244 |
+
.nlargest(20, "abs_error")[
|
| 245 |
+
["state", "district", "financial_year",
|
| 246 |
+
"person_days_lakhs", "predicted_persondays", "prediction_error"]
|
| 247 |
+
]
|
| 248 |
+
.rename(columns={
|
| 249 |
+
"person_days_lakhs": "actual_L",
|
| 250 |
+
"predicted_persondays":"predicted_L",
|
| 251 |
+
"prediction_error": "error_L",
|
| 252 |
+
})
|
| 253 |
+
.round(3)
|
| 254 |
+
)
|
| 255 |
+
st.dataframe(worst, use_container_width=True, hide_index=True)
|
frontend/pages/spatial.py
ADDED
|
@@ -0,0 +1,491 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# pages/spatial.py β Spatial Overview Map
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
import sys, os
|
| 5 |
+
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
|
| 6 |
+
|
| 7 |
+
import streamlit as st
|
| 8 |
+
import plotly.graph_objects as go
|
| 9 |
+
import pandas as pd
|
| 10 |
+
import numpy as np
|
| 11 |
+
|
| 12 |
+
from theme import inject_theme, page_header, section_label, kpi_html, PLOTLY_LAYOUT, SAFFRON, SAFFRON_SCALE, GREEN, RED, AMBER
|
| 13 |
+
from utils.api_client import fetch_states, fetch_predictions, fetch_optimizer_results, fetch_district_history
|
| 14 |
+
|
| 15 |
+
inject_theme()
|
| 16 |
+
page_header(
|
| 17 |
+
"β Module 05",
|
| 18 |
+
"Spatial Overview",
|
| 19 |
+
"District-level employment prediction map β hover any bubble for full model details",
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
# ββ District coordinates (approximate centroids for all major districts) ββββββ
|
| 23 |
+
# Covers all 36 states/UTs across India's 700+ districts.
|
| 24 |
+
# Format: "District|State": (lat, lon)
|
| 25 |
+
DISTRICT_COORDS: dict[str, tuple[float, float]] = {
|
| 26 |
+
# ββ Andhra Pradesh βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 27 |
+
"Srikakulam|Andhra Pradesh": (18.30, 83.90), "Vizianagaram|Andhra Pradesh": (18.12, 83.41),
|
| 28 |
+
"Visakhapatnam|Andhra Pradesh": (17.69, 83.22), "East Godavari|Andhra Pradesh":(17.00, 82.00),
|
| 29 |
+
"West Godavari|Andhra Pradesh": (16.92, 81.34), "Krishna|Andhra Pradesh": (16.61, 80.83),
|
| 30 |
+
"Guntur|Andhra Pradesh": (16.31, 80.44), "Prakasam|Andhra Pradesh": (15.35, 79.57),
|
| 31 |
+
"Nellore|Andhra Pradesh": (14.44, 79.99), "Kurnool|Andhra Pradesh": (15.83, 78.05),
|
| 32 |
+
"Kadapa|Andhra Pradesh": (14.47, 78.82), "Anantapur|Andhra Pradesh": (14.68, 77.60),
|
| 33 |
+
"Chittoor|Andhra Pradesh": (13.22, 79.10),
|
| 34 |
+
|
| 35 |
+
# ββ Assam βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 36 |
+
"Kamrup|Assam": (26.14, 91.77), "Barpeta|Assam": (26.32, 91.00),
|
| 37 |
+
"Dhubri|Assam": (26.02, 89.98), "Goalpara|Assam": (26.17, 90.62),
|
| 38 |
+
"Nagaon|Assam": (26.35, 92.68), "Cachar|Assam": (24.81, 92.86),
|
| 39 |
+
"Lakhimpur|Assam": (27.24, 94.10), "Dibrugarh|Assam": (27.49, 95.00),
|
| 40 |
+
"Sonitpur|Assam": (26.63, 92.80), "Jorhat|Assam": (26.75, 94.22),
|
| 41 |
+
|
| 42 |
+
# ββ Bihar βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 43 |
+
"Patna|Bihar": (25.59, 85.13), "Gaya|Bihar": (24.80, 84.99),
|
| 44 |
+
"Muzaffarpur|Bihar": (26.12, 85.38), "Bhagalpur|Bihar": (25.24, 86.98),
|
| 45 |
+
"Darbhanga|Bihar": (26.16, 85.90), "Purnea|Bihar": (25.78, 87.47),
|
| 46 |
+
"Rohtas|Bihar": (24.98, 83.98), "Siwan|Bihar": (26.22, 84.36),
|
| 47 |
+
"Saran|Bihar": (25.92, 84.74), "Nalanda|Bihar": (25.10, 85.44),
|
| 48 |
+
"Madhubani|Bihar": (26.37, 86.07), "Champaran East|Bihar": (26.65, 84.92),
|
| 49 |
+
"Champaran West|Bihar": (27.02, 84.46),
|
| 50 |
+
|
| 51 |
+
# ββ Chhattisgarh ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 52 |
+
"Raipur|Chhattisgarh": (21.25, 81.63), "Bilaspur|Chhattisgarh": (22.09, 82.15),
|
| 53 |
+
"Durg|Chhattisgarh": (21.19, 81.28), "Rajnandgaon|Chhattisgarh": (21.10, 81.03),
|
| 54 |
+
"Bastar|Chhattisgarh": (19.10, 81.95), "Sarguja|Chhattisgarh": (23.12, 83.19),
|
| 55 |
+
"Korba|Chhattisgarh": (22.35, 82.72), "Raigarh|Chhattisgarh": (21.90, 83.40),
|
| 56 |
+
|
| 57 |
+
# ββ Gujarat βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 58 |
+
"Ahmedabad|Gujarat": (23.03, 72.58), "Surat|Gujarat": (21.17, 72.83),
|
| 59 |
+
"Vadodara|Gujarat": (22.31, 73.18), "Rajkot|Gujarat": (22.30, 70.80),
|
| 60 |
+
"Bhavnagar|Gujarat": (21.77, 72.15), "Jamnagar|Gujarat": (22.47, 70.06),
|
| 61 |
+
"Junagadh|Gujarat": (21.52, 70.46), "Anand|Gujarat": (22.56, 72.93),
|
| 62 |
+
"Mehsana|Gujarat": (23.59, 72.37), "Banaskantha|Gujarat": (24.17, 72.42),
|
| 63 |
+
"Kutch|Gujarat": (23.73, 69.86), "Dahod|Gujarat": (22.83, 74.25),
|
| 64 |
+
"Narmada|Gujarat": (21.87, 73.49), "Valsad|Gujarat": (20.59, 72.93),
|
| 65 |
+
"Dang|Gujarat": (20.75, 73.69),
|
| 66 |
+
|
| 67 |
+
# ββ Haryana βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 68 |
+
"Hisar|Haryana": (29.15, 75.72), "Sirsa|Haryana": (29.53, 75.03),
|
| 69 |
+
"Bhiwani|Haryana": (28.79, 76.13), "Rohtak|Haryana": (28.89, 76.61),
|
| 70 |
+
"Sonipat|Haryana": (28.99, 77.01), "Karnal|Haryana": (29.68, 76.99),
|
| 71 |
+
"Ambala|Haryana": (30.37, 76.78), "Kurukshetra|Haryana": (29.97, 76.85),
|
| 72 |
+
"Mahendragarh|Haryana": (28.27, 76.15),
|
| 73 |
+
|
| 74 |
+
# ββ Jharkhand βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 75 |
+
"Ranchi|Jharkhand": (23.35, 85.33), "Dhanbad|Jharkhand": (23.80, 86.45),
|
| 76 |
+
"Bokaro|Jharkhand": (23.67, 86.15), "Giridih|Jharkhand": (24.19, 86.30),
|
| 77 |
+
"Hazaribagh|Jharkhand": (23.99, 85.36), "Dumka|Jharkhand": (24.27, 87.25),
|
| 78 |
+
"Palamu|Jharkhand": (24.03, 84.08), "Gumla|Jharkhand": (23.05, 84.54),
|
| 79 |
+
"Pakur|Jharkhand": (24.63, 87.84), "Lohardaga|Jharkhand": (23.44, 84.68),
|
| 80 |
+
|
| 81 |
+
# ββ Karnataka βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 82 |
+
"Bangalore Rural|Karnataka": (13.01, 77.57), "Tumkur|Karnataka": (13.34, 77.10),
|
| 83 |
+
"Kolar|Karnataka": (13.14, 78.13), "Mysore|Karnataka": (12.30, 76.65),
|
| 84 |
+
"Mandya|Karnataka": (12.52, 76.90), "Hassan|Karnataka": (13.00, 76.10),
|
| 85 |
+
"Chikmagalur|Karnataka": (13.32, 75.78), "Shimoga|Karnataka": (13.93, 75.57),
|
| 86 |
+
"Dakshina Kannada|Karnataka": (12.85, 75.24), "Uttara Kannada|Karnataka": (14.79, 74.68),
|
| 87 |
+
"Raichur|Karnataka": (16.21, 77.36), "Koppal|Karnataka": (15.35, 76.15),
|
| 88 |
+
"Gadag|Karnataka": (15.42, 75.62), "Dharwad|Karnataka": (15.46, 75.01),
|
| 89 |
+
"Bagalkot|Karnataka": (16.18, 75.70), "Bijapur|Karnataka": (16.83, 75.72),
|
| 90 |
+
"Gulbarga|Karnataka": (17.34, 76.82), "Bidar|Karnataka": (17.91, 77.52),
|
| 91 |
+
"Bellary|Karnataka": (15.14, 76.92), "Chitradurga|Karnataka": (14.23, 76.40),
|
| 92 |
+
"Davangere|Karnataka": (14.46, 75.92), "Udupi|Karnataka": (13.34, 74.75),
|
| 93 |
+
|
| 94 |
+
# ββ Kerala ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 95 |
+
"Thiruvananthapuram|Kerala": (8.52, 76.94), "Kollam|Kerala": (8.88, 76.61),
|
| 96 |
+
"Pathanamthitta|Kerala": (9.27, 76.77), "Alappuzha|Kerala": (9.49, 76.32),
|
| 97 |
+
"Kottayam|Kerala": (9.59, 76.52), "Idukki|Kerala": (9.85, 77.10),
|
| 98 |
+
"Ernakulam|Kerala": (10.01, 76.31), "Thrissur|Kerala": (10.52, 76.22),
|
| 99 |
+
"Palakkad|Kerala": (10.77, 76.65), "Malappuram|Kerala": (11.07, 76.07),
|
| 100 |
+
"Kozhikode|Kerala": (11.25, 75.78), "Wayanad|Kerala": (11.61, 76.08),
|
| 101 |
+
"Kannur|Kerala": (11.87, 75.37), "Kasaragod|Kerala": (12.50, 74.99),
|
| 102 |
+
|
| 103 |
+
# ββ Madhya Pradesh ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 104 |
+
"Bhopal|Madhya Pradesh": (23.26, 77.41), "Indore|Madhya Pradesh": (22.72, 75.86),
|
| 105 |
+
"Jabalpur|Madhya Pradesh": (23.18, 79.99), "Gwalior|Madhya Pradesh": (26.22, 78.18),
|
| 106 |
+
"Sagar|Madhya Pradesh": (23.84, 78.74), "Rewa|Madhya Pradesh": (24.53, 81.30),
|
| 107 |
+
"Satna|Madhya Pradesh": (24.60, 80.83), "Ujjain|Madhya Pradesh": (23.18, 75.78),
|
| 108 |
+
"Chhindwara|Madhya Pradesh": (22.06, 78.94), "Shivpuri|Madhya Pradesh": (25.42, 77.66),
|
| 109 |
+
"Morena|Madhya Pradesh": (26.50, 78.00), "Bhind|Madhya Pradesh": (26.56, 78.78),
|
| 110 |
+
"Datia|Madhya Pradesh": (25.67, 78.46), "Chhatarpur|Madhya Pradesh": (24.92, 79.58),
|
| 111 |
+
"Tikamgarh|Madhya Pradesh": (24.74, 78.83), "Raisen|Madhya Pradesh": (22.99, 77.79),
|
| 112 |
+
"Vidisha|Madhya Pradesh": (23.52, 77.81), "Hoshangabad|Madhya Pradesh": (22.75, 77.73),
|
| 113 |
+
"Harda|Madhya Pradesh": (22.34, 77.09), "Betul|Madhya Pradesh": (21.91, 77.90),
|
| 114 |
+
"Balaghat|Madhya Pradesh": (21.81, 80.19), "Seoni|Madhya Pradesh": (22.09, 79.55),
|
| 115 |
+
"Mandla|Madhya Pradesh": (22.60, 80.38), "Dindori|Madhya Pradesh": (22.95, 81.08),
|
| 116 |
+
"Shahdol|Madhya Pradesh": (23.30, 81.36), "Anuppur|Madhya Pradesh": (23.10, 81.69),
|
| 117 |
+
"Umaria|Madhya Pradesh": (23.53, 80.84), "Katni|Madhya Pradesh": (23.83, 80.39),
|
| 118 |
+
"Panna|Madhya Pradesh": (24.72, 80.19), "Damoh|Madhya Pradesh": (23.83, 79.45),
|
| 119 |
+
"Narsinghpur|Madhya Pradesh": (22.95, 79.19), "Niwari|Madhya Pradesh": (25.01, 78.76),
|
| 120 |
+
|
| 121 |
+
# ββ Maharashtra βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 122 |
+
"Ahmednagar|Maharashtra": (19.10, 74.74), "Akola|Maharashtra": (20.71, 77.00),
|
| 123 |
+
"Amravati|Maharashtra": (20.93, 77.75), "Aurangabad|Maharashtra": (19.88, 75.34),
|
| 124 |
+
"Beed|Maharashtra": (18.99, 75.75), "Bhandara|Maharashtra": (21.17, 79.65),
|
| 125 |
+
"Buldhana|Maharashtra": (20.53, 76.18), "Chandrapur|Maharashtra": (19.96, 79.30),
|
| 126 |
+
"Dhule|Maharashtra": (20.90, 74.78), "Gadchiroli|Maharashtra": (20.18, 80.00),
|
| 127 |
+
"Gondia|Maharashtra": (21.46, 80.20), "Hingoli|Maharashtra": (19.72, 77.15),
|
| 128 |
+
"Jalgaon|Maharashtra": (21.00, 75.57), "Jalna|Maharashtra": (19.84, 75.89),
|
| 129 |
+
"Kolhapur|Maharashtra": (16.70, 74.24), "Latur|Maharashtra": (18.40, 76.57),
|
| 130 |
+
"Mumbai City|Maharashtra": (18.96, 72.82), "Mumbai Suburban|Maharashtra": (19.17, 72.96),
|
| 131 |
+
"Nagpur|Maharashtra": (21.15, 79.09), "Nanded|Maharashtra": (19.15, 77.32),
|
| 132 |
+
"Nandurbar|Maharashtra": (21.37, 74.24), "Nashik|Maharashtra": (19.99, 73.79),
|
| 133 |
+
"Osmanabad|Maharashtra": (18.18, 76.04), "Palghar|Maharashtra": (19.70, 72.77),
|
| 134 |
+
"Parbhani|Maharashtra": (19.27, 76.77), "Pune|Maharashtra": (18.52, 73.86),
|
| 135 |
+
"Raigad|Maharashtra": (18.52, 73.18), "Ratnagiri|Maharashtra": (16.99, 73.30),
|
| 136 |
+
"Sangli|Maharashtra": (16.86, 74.56), "Satara|Maharashtra": (17.69, 74.00),
|
| 137 |
+
"Sindhudurg|Maharashtra": (16.35, 73.74), "Solapur|Maharashtra": (17.69, 75.91),
|
| 138 |
+
"Thane|Maharashtra": (19.22, 72.98), "Wardha|Maharashtra": (20.75, 78.60),
|
| 139 |
+
"Washim|Maharashtra": (20.11, 77.15), "Yavatmal|Maharashtra": (20.39, 78.13),
|
| 140 |
+
|
| 141 |
+
# ββ Odisha ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 142 |
+
"Bhubaneswar|Odisha": (20.30, 85.84), "Cuttack|Odisha": (20.46, 85.88),
|
| 143 |
+
"Balasore|Odisha": (21.49, 86.93), "Mayurbhanj|Odisha": (21.92, 86.73),
|
| 144 |
+
"Keonjhar|Odisha": (21.63, 85.58), "Sundargarh|Odisha": (22.12, 84.03),
|
| 145 |
+
"Sambalpur|Odisha": (21.47, 83.97), "Bargarh|Odisha": (21.33, 83.62),
|
| 146 |
+
"Bolangir|Odisha": (20.71, 83.49), "Kalahandi|Odisha": (19.91, 83.17),
|
| 147 |
+
"Koraput|Odisha": (18.81, 82.71), "Rayagada|Odisha": (19.17, 83.41),
|
| 148 |
+
"Ganjam|Odisha": (19.39, 84.70), "Puri|Odisha": (19.81, 85.83),
|
| 149 |
+
"Khordha|Odisha": (20.18, 85.62), "Jagatsinghpur|Odisha": (20.25, 86.18),
|
| 150 |
+
"Kendrapara|Odisha": (20.50, 86.42), "Jajpur|Odisha": (20.85, 86.33),
|
| 151 |
+
|
| 152 |
+
# ββ Rajasthan βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 153 |
+
"Jaipur|Rajasthan": (26.92, 75.79), "Jodhpur|Rajasthan": (26.29, 73.03),
|
| 154 |
+
"Udaipur|Rajasthan": (24.58, 73.69), "Kota|Rajasthan": (25.18, 75.84),
|
| 155 |
+
"Ajmer|Rajasthan": (26.45, 74.64), "Bikaner|Rajasthan": (28.02, 73.31),
|
| 156 |
+
"Alwar|Rajasthan": (27.57, 76.61), "Bharatpur|Rajasthan": (27.22, 77.49),
|
| 157 |
+
"Sikar|Rajasthan": (27.61, 75.14), "Nagaur|Rajasthan": (27.21, 73.74),
|
| 158 |
+
"Pali|Rajasthan": (25.77, 73.33), "Barmer|Rajasthan": (25.75, 71.39),
|
| 159 |
+
"Jaisalmer|Rajasthan": (26.92, 70.91), "Churu|Rajasthan": (28.30, 74.96),
|
| 160 |
+
"Jhunjhunu|Rajasthan": (28.13, 75.40), "Sirohi|Rajasthan": (24.89, 72.86),
|
| 161 |
+
"Banswara|Rajasthan": (23.54, 74.44), "Dungarpur|Rajasthan": (23.84, 73.71),
|
| 162 |
+
"Baran|Rajasthan": (25.10, 76.52), "Jhalawar|Rajasthan": (24.60, 76.16),
|
| 163 |
+
"Tonk|Rajasthan": (26.17, 75.79), "Sawai Madhopur|Rajasthan": (26.01, 76.35),
|
| 164 |
+
"Dausa|Rajasthan": (26.89, 76.34), "Karauli|Rajasthan": (26.50, 77.02),
|
| 165 |
+
|
| 166 |
+
# ββ Tamil Nadu ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 167 |
+
"Chennai|Tamil Nadu": (13.08, 80.27), "Coimbatore|Tamil Nadu": (11.02, 76.97),
|
| 168 |
+
"Madurai|Tamil Nadu": (9.93, 78.12), "Tiruchirappalli|Tamil Nadu": (10.80, 78.69),
|
| 169 |
+
"Salem|Tamil Nadu": (11.65, 78.16), "Tirunelveli|Tamil Nadu": (8.73, 77.70),
|
| 170 |
+
"Vellore|Tamil Nadu": (12.92, 79.13), "Erode|Tamil Nadu": (11.34, 77.73),
|
| 171 |
+
"Thanjavur|Tamil Nadu": (10.79, 79.14), "Virudhunagar|Tamil Nadu": (9.58, 77.96),
|
| 172 |
+
"Ramanathapuram|Tamil Nadu": (9.37, 78.83), "Pudukkottai|Tamil Nadu": (10.38, 78.82),
|
| 173 |
+
"Dindigul|Tamil Nadu": (10.36, 77.98), "Dharmapuri|Tamil Nadu": (12.13, 78.16),
|
| 174 |
+
"Krishnagiri|Tamil Nadu": (12.52, 78.21), "Namakkal|Tamil Nadu": (11.22, 78.17),
|
| 175 |
+
"Nilgiris|Tamil Nadu": (11.47, 76.73), "Tiruppur|Tamil Nadu": (11.11, 77.34),
|
| 176 |
+
"Cuddalore|Tamil Nadu": (11.75, 79.77), "Villupuram|Tamil Nadu": (11.94, 79.49),
|
| 177 |
+
"Kancheepuram|Tamil Nadu": (12.83, 79.70), "Thiruvallur|Tamil Nadu": (13.15, 79.91),
|
| 178 |
+
"Tiruvannamalai|Tamil Nadu": (12.23, 79.07),
|
| 179 |
+
|
| 180 |
+
# ββ Telangana βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 181 |
+
"Hyderabad|Telangana": (17.38, 78.47), "Medchal|Telangana": (17.62, 78.48),
|
| 182 |
+
"Rangareddy|Telangana": (17.25, 78.38), "Nalgonda|Telangana": (17.05, 79.27),
|
| 183 |
+
"Warangal|Telangana": (17.97, 79.59), "Karimnagar|Telangana": (18.44, 79.13),
|
| 184 |
+
"Khammam|Telangana": (17.25, 80.15), "Nizamabad|Telangana": (18.67, 78.10),
|
| 185 |
+
"Adilabad|Telangana": (19.67, 78.53), "Mahabubnagar|Telangana": (16.74, 77.99),
|
| 186 |
+
|
| 187 |
+
# ββ Uttar Pradesh βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 188 |
+
"Lucknow|Uttar Pradesh": (26.85, 80.95), "Kanpur Nagar|Uttar Pradesh": (26.45, 80.35),
|
| 189 |
+
"Agra|Uttar Pradesh": (27.18, 78.02), "Varanasi|Uttar Pradesh": (25.32, 83.01),
|
| 190 |
+
"Allahabad|Uttar Pradesh": (25.44, 81.85), "Meerut|Uttar Pradesh": (28.98, 77.71),
|
| 191 |
+
"Bareilly|Uttar Pradesh": (28.35, 79.43), "Gorakhpur|Uttar Pradesh": (26.76, 83.37),
|
| 192 |
+
"Mathura|Uttar Pradesh": (27.49, 77.67), "Muzaffarnagar|Uttar Pradesh": (29.47, 77.70),
|
| 193 |
+
"Shahjahanpur|Uttar Pradesh": (27.88, 79.91), "Sitapur|Uttar Pradesh": (27.57, 80.68),
|
| 194 |
+
"Lakhimpur Kheri|Uttar Pradesh": (27.94, 80.78), "Hardoi|Uttar Pradesh": (27.40, 80.13),
|
| 195 |
+
"Unnao|Uttar Pradesh": (26.54, 80.49), "Rae Bareli|Uttar Pradesh": (26.22, 81.24),
|
| 196 |
+
"Pratapgarh|Uttar Pradesh": (25.89, 81.99), "Jaunpur|Uttar Pradesh": (25.73, 82.69),
|
| 197 |
+
"Ghazipur|Uttar Pradesh": (25.58, 83.57), "Ballia|Uttar Pradesh": (25.75, 84.15),
|
| 198 |
+
"Azamgarh|Uttar Pradesh": (26.07, 83.18), "Mau|Uttar Pradesh": (25.94, 83.56),
|
| 199 |
+
"Deoria|Uttar Pradesh": (26.50, 83.78), "Basti|Uttar Pradesh": (26.79, 82.73),
|
| 200 |
+
"Siddharthnagar|Uttar Pradesh": (27.29, 83.07), "Maharajganj|Uttar Pradesh": (27.15, 83.56),
|
| 201 |
+
"Gonda|Uttar Pradesh": (27.13, 81.97), "Bahraich|Uttar Pradesh": (27.57, 81.60),
|
| 202 |
+
"Shravasti|Uttar Pradesh": (27.72, 81.87), "Balrampur|Uttar Pradesh": (27.43, 82.19),
|
| 203 |
+
"Barabanki|Uttar Pradesh": (26.94, 81.19), "Faizabad|Uttar Pradesh": (26.77, 82.14),
|
| 204 |
+
"Ambedkar Nagar|Uttar Pradesh": (26.43, 82.62), "Sultanpur|Uttar Pradesh": (26.26, 82.06),
|
| 205 |
+
"Banda|Uttar Pradesh": (25.48, 80.34), "Chitrakoot|Uttar Pradesh": (25.20, 80.90),
|
| 206 |
+
"Hamirpur|Uttar Pradesh": (25.95, 80.15), "Mahoba|Uttar Pradesh": (25.29, 79.87),
|
| 207 |
+
"Lalitpur|Uttar Pradesh": (24.69, 78.41), "Jhansi|Uttar Pradesh": (25.45, 78.57),
|
| 208 |
+
"Jalaun|Uttar Pradesh": (26.14, 79.34), "Etawah|Uttar Pradesh": (26.78, 79.02),
|
| 209 |
+
"Auraiya|Uttar Pradesh": (26.47, 79.51), "Kannauj|Uttar Pradesh": (27.05, 79.92),
|
| 210 |
+
"Farrukhabad|Uttar Pradesh": (27.38, 79.57), "Mainpuri|Uttar Pradesh": (27.23, 79.02),
|
| 211 |
+
"Firozabad|Uttar Pradesh": (27.15, 78.39), "Etah|Uttar Pradesh": (27.65, 78.67),
|
| 212 |
+
"Kasganj|Uttar Pradesh": (27.81, 78.65), "Hathras|Uttar Pradesh": (27.60, 78.06),
|
| 213 |
+
"Aligarh|Uttar Pradesh": (27.88, 78.07), "Bulandshahr|Uttar Pradesh": (28.41, 77.85),
|
| 214 |
+
"Hapur|Uttar Pradesh": (28.72, 77.78), "Gautam Buddha Nagar|Uttar Pradesh": (28.54, 77.39),
|
| 215 |
+
"Ghaziabad|Uttar Pradesh": (28.67, 77.44), "Bagpat|Uttar Pradesh": (28.94, 77.22),
|
| 216 |
+
"Bijnor|Uttar Pradesh": (29.37, 78.13), "Amroha|Uttar Pradesh": (28.91, 78.47),
|
| 217 |
+
"Sambhal|Uttar Pradesh": (28.59, 78.56), "Moradabad|Uttar Pradesh": (28.84, 78.77),
|
| 218 |
+
"Rampur|Uttar Pradesh": (28.81, 79.03), "Pilibhit|Uttar Pradesh": (28.64, 79.81),
|
| 219 |
+
"Budaun|Uttar Pradesh": (28.04, 79.13),
|
| 220 |
+
|
| 221 |
+
# ββ West Bengal βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 222 |
+
"Kolkata|West Bengal": (22.57, 88.37), "Howrah|West Bengal": (22.59, 88.31),
|
| 223 |
+
"North 24 Parganas|West Bengal": (22.86, 88.54), "South 24 Parganas|West Bengal":(22.15, 88.27),
|
| 224 |
+
"Bardhaman|West Bengal": (23.23, 87.86), "Birbhum|West Bengal": (23.90, 87.53),
|
| 225 |
+
"Murshidabad|West Bengal": (24.18, 88.27), "Nadia|West Bengal": (23.47, 88.55),
|
| 226 |
+
"Hooghly|West Bengal": (22.96, 88.38), "Midnapore West|West Bengal": (22.43, 86.92),
|
| 227 |
+
"Midnapore East|West Bengal": (22.11, 87.67), "Bankura|West Bengal": (23.23, 87.07),
|
| 228 |
+
"Purulia|West Bengal": (23.33, 86.36), "Malda|West Bengal": (25.00, 88.14),
|
| 229 |
+
"Dinajpur North|West Bengal": (25.62, 88.43), "Dinajpur South|West Bengal": (25.29, 88.68),
|
| 230 |
+
"Jalpaiguri|West Bengal": (26.54, 88.73), "Darjeeling|West Bengal": (27.04, 88.26),
|
| 231 |
+
"Cooch Behar|West Bengal": (26.32, 89.45),
|
| 232 |
+
|
| 233 |
+
# ββ Himachal Pradesh ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 234 |
+
"Shimla|Himachal Pradesh": (31.10, 77.17), "Kangra|Himachal Pradesh": (32.10, 76.27),
|
| 235 |
+
"Mandi|Himachal Pradesh": (31.71, 76.93), "Hamirpur|Himachal Pradesh": (31.69, 76.52),
|
| 236 |
+
"Una|Himachal Pradesh": (31.46, 76.27), "Chamba|Himachal Pradesh": (32.55, 76.13),
|
| 237 |
+
"Solan|Himachal Pradesh": (30.91, 77.10), "Sirmaur|Himachal Pradesh": (30.56, 77.46),
|
| 238 |
+
"Bilaspur|Himachal Pradesh": (31.34, 76.76), "Kinnaur|Himachal Pradesh": (31.59, 78.45),
|
| 239 |
+
"Kullu|Himachal Pradesh": (31.96, 77.11), "Lahul Spiti|Himachal Pradesh":(32.77, 77.67),
|
| 240 |
+
|
| 241 |
+
# ββ Uttarakhand βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 242 |
+
"Dehradun|Uttarakhand": (30.32, 78.03), "Haridwar|Uttarakhand": (29.96, 78.16),
|
| 243 |
+
"Nainital|Uttarakhand": (29.38, 79.46), "Udham Singh Nagar|Uttarakhand":(29.00, 79.52),
|
| 244 |
+
"Almora|Uttarakhand": (29.60, 79.66), "Pauri Garhwal|Uttarakhand": (29.78, 79.01),
|
| 245 |
+
"Tehri Garhwal|Uttarakhand": (30.39, 78.48), "Chamoli|Uttarakhand": (30.41, 79.32),
|
| 246 |
+
"Rudraprayag|Uttarakhand": (30.28, 78.98), "Uttarkashi|Uttarakhand": (30.73, 78.44),
|
| 247 |
+
"Bageshwar|Uttarakhand": (29.84, 79.77), "Pithoragarh|Uttarakhand": (29.58, 80.22),
|
| 248 |
+
"Champawat|Uttarakhand": (29.33, 80.09),
|
| 249 |
+
|
| 250 |
+
# ββ Punjab ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 251 |
+
"Amritsar|Punjab": (31.63, 74.87), "Ludhiana|Punjab": (30.90, 75.85),
|
| 252 |
+
"Jalandhar|Punjab": (31.33, 75.58), "Patiala|Punjab": (30.34, 76.39),
|
| 253 |
+
"Bathinda|Punjab": (30.21, 74.95), "Gurdaspur|Punjab": (32.04, 75.41),
|
| 254 |
+
"Firozpur|Punjab": (30.93, 74.61), "Hoshiarpur|Punjab": (31.53, 75.91),
|
| 255 |
+
"Rupnagar|Punjab": (30.96, 76.53), "Sangrur|Punjab": (30.25, 75.84),
|
| 256 |
+
"Moga|Punjab": (30.82, 75.17), "Faridkot|Punjab": (30.67, 74.76),
|
| 257 |
+
"Muktsar|Punjab": (30.48, 74.52), "Fazilka|Punjab": (30.40, 74.02),
|
| 258 |
+
"Nawanshahr|Punjab": (31.12, 76.12), "Kapurthala|Punjab": (31.38, 75.38),
|
| 259 |
+
|
| 260 |
+
# ββ Jharkhand extra βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 261 |
+
"Chatra|Jharkhand": (24.21, 84.88), "Koderma|Jharkhand": (24.47, 85.60),
|
| 262 |
+
"Simdega|Jharkhand": (22.61, 84.51), "Khunti|Jharkhand": (23.07, 85.28),
|
| 263 |
+
"Ramgarh|Jharkhand": (23.63, 85.51), "Jamtara|Jharkhand": (23.96, 86.80),
|
| 264 |
+
"Sahibganj|Jharkhand": (24.96, 87.63), "Godda|Jharkhand": (24.83, 87.21),
|
| 265 |
+
"Deoghar|Jharkhand": (24.48, 86.70),
|
| 266 |
+
|
| 267 |
+
# ββ Generic fallback centroids for states βββββββββββββββββββββββββββββββββ
|
| 268 |
+
"Unknown|Andhra Pradesh": (15.9, 79.7),
|
| 269 |
+
"Unknown|Assam": (26.2, 92.9),
|
| 270 |
+
"Unknown|Bihar": (25.1, 85.3),
|
| 271 |
+
"Unknown|Chhattisgarh": (21.3, 81.7),
|
| 272 |
+
"Unknown|Gujarat": (22.3, 71.2),
|
| 273 |
+
"Unknown|Haryana": (29.1, 76.1),
|
| 274 |
+
"Unknown|Jharkhand": (23.6, 85.3),
|
| 275 |
+
"Unknown|Karnataka": (15.3, 75.7),
|
| 276 |
+
"Unknown|Kerala": (10.9, 76.3),
|
| 277 |
+
"Unknown|Madhya Pradesh": (22.9, 78.7),
|
| 278 |
+
"Unknown|Maharashtra": (19.7, 75.7),
|
| 279 |
+
"Unknown|Odisha": (20.9, 85.1),
|
| 280 |
+
"Unknown|Rajasthan": (27.0, 74.2),
|
| 281 |
+
"Unknown|Tamil Nadu": (11.1, 78.7),
|
| 282 |
+
"Unknown|Telangana": (17.4, 79.1),
|
| 283 |
+
"Unknown|Uttar Pradesh": (26.8, 80.9),
|
| 284 |
+
"Unknown|West Bengal": (22.9, 87.9),
|
| 285 |
+
}
|
| 286 |
+
|
| 287 |
+
|
| 288 |
+
def get_coords(district: str, state: str) -> tuple[float, float]:
|
| 289 |
+
"""Return (lat, lon) for a district, with fallback to state centroid."""
|
| 290 |
+
rng = np.random.default_rng(abs(hash(f"{district}{state}")) % (2**31))
|
| 291 |
+
key = f"{district}|{state}"
|
| 292 |
+
if key in DISTRICT_COORDS:
|
| 293 |
+
lat, lon = DISTRICT_COORDS[key]
|
| 294 |
+
lat += rng.uniform(-0.08, 0.08)
|
| 295 |
+
lon += rng.uniform(-0.08, 0.08)
|
| 296 |
+
return lat, lon
|
| 297 |
+
# Fallback: state centroid + jitter
|
| 298 |
+
fb_key = f"Unknown|{state}"
|
| 299 |
+
lat, lon = DISTRICT_COORDS.get(fb_key, (22.0, 78.0))
|
| 300 |
+
lat += rng.uniform(-1.2, 1.2)
|
| 301 |
+
lon += rng.uniform(-1.2, 1.2)
|
| 302 |
+
return lat, lon
|
| 303 |
+
|
| 304 |
+
|
| 305 |
+
# ββ Controls ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 306 |
+
states = fetch_states()
|
| 307 |
+
if not states:
|
| 308 |
+
st.error("β οΈ API offline β run `uvicorn backend.main:app --port 8000`")
|
| 309 |
+
st.stop()
|
| 310 |
+
|
| 311 |
+
cc1, cc2, cc3 = st.columns(3)
|
| 312 |
+
with cc1:
|
| 313 |
+
state_filter = st.selectbox("State Filter", ["All India"] + states)
|
| 314 |
+
with cc2:
|
| 315 |
+
map_metric = st.selectbox("Bubble Color / Size", [
|
| 316 |
+
"Predicted Person-Days",
|
| 317 |
+
"Prediction Error",
|
| 318 |
+
"Budget Gain (LP Optimizer)",
|
| 319 |
+
"Actual Person-Days",
|
| 320 |
+
])
|
| 321 |
+
with cc3:
|
| 322 |
+
year_opts = []
|
| 323 |
+
_df_raw = fetch_predictions()
|
| 324 |
+
if not _df_raw.empty:
|
| 325 |
+
year_opts = sorted(_df_raw["financial_year"].unique().tolist())
|
| 326 |
+
selected_year = st.selectbox("Financial Year", year_opts if year_opts else ["β"])
|
| 327 |
+
|
| 328 |
+
# ββ Fetch & merge data ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 329 |
+
pred_df = fetch_predictions(
|
| 330 |
+
state=None if state_filter == "All India" else state_filter,
|
| 331 |
+
year=int(selected_year) if selected_year != "β" else None,
|
| 332 |
+
)
|
| 333 |
+
opt_df = fetch_optimizer_results(
|
| 334 |
+
state=None if state_filter == "All India" else state_filter,
|
| 335 |
+
)
|
| 336 |
+
|
| 337 |
+
if pred_df.empty:
|
| 338 |
+
st.info("No prediction data for selected filters. Ensure the pipeline has run.")
|
| 339 |
+
st.stop()
|
| 340 |
+
|
| 341 |
+
# Merge optimizer results in if available
|
| 342 |
+
if not opt_df.empty:
|
| 343 |
+
merge_cols = ["state", "district"]
|
| 344 |
+
opt_sub = opt_df[merge_cols + [
|
| 345 |
+
c for c in ["persondays_gain", "budget_change_pct", "persondays_per_lakh",
|
| 346 |
+
"budget_allocated_lakhs", "optimized_budget"]
|
| 347 |
+
if c in opt_df.columns
|
| 348 |
+
]].drop_duplicates(subset=merge_cols)
|
| 349 |
+
pred_df = pred_df.merge(opt_sub, on=merge_cols, how="left")
|
| 350 |
+
|
| 351 |
+
# Pick what to color by
|
| 352 |
+
COLOR_MAP = {
|
| 353 |
+
"Predicted Person-Days": "predicted_persondays",
|
| 354 |
+
"Prediction Error": "prediction_error",
|
| 355 |
+
"Budget Gain (LP Optimizer)": "persondays_gain",
|
| 356 |
+
"Actual Person-Days": "person_days_lakhs",
|
| 357 |
+
}
|
| 358 |
+
color_col = COLOR_MAP[map_metric]
|
| 359 |
+
if color_col not in pred_df.columns:
|
| 360 |
+
color_col = "predicted_persondays"
|
| 361 |
+
|
| 362 |
+
# ββ Build map data ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 363 |
+
lats, lons, colors, sizes = [], [], [], []
|
| 364 |
+
hover_data = []
|
| 365 |
+
|
| 366 |
+
for _, row in pred_df.iterrows():
|
| 367 |
+
lat, lon = get_coords(str(row["district"]), str(row["state"]))
|
| 368 |
+
lats.append(lat)
|
| 369 |
+
lons.append(lon)
|
| 370 |
+
colors.append(float(row.get(color_col, 0) or 0))
|
| 371 |
+
sizes.append(max(float(row.get("predicted_persondays", 1) or 1), 0.1))
|
| 372 |
+
hover_data.append(row)
|
| 373 |
+
|
| 374 |
+
# Normalize sizes for bubble radius
|
| 375 |
+
sz_arr = np.array(sizes)
|
| 376 |
+
sz_min, sz_max = sz_arr.min(), sz_arr.max()
|
| 377 |
+
norm_sz = np.clip((sz_arr - sz_min) / (sz_max - sz_min + 1e-9) * 13 + 4, 4, 17).tolist()
|
| 378 |
+
|
| 379 |
+
# ββ Choose colorscale based on metric ββββββββββββββββββββββββββββββββββββββββ
|
| 380 |
+
if color_col == "prediction_error":
|
| 381 |
+
cscale = [[0, RED], [0.5, "#FED7AA"], [1, "#FED7AA"]]
|
| 382 |
+
cscale = [[0, RED], [0.5, "#FAFAF9"], [1, GREEN]]
|
| 383 |
+
elif color_col == "persondays_gain":
|
| 384 |
+
cscale = [[0, RED], [0.5, "#FFF7ED"], [1, GREEN]]
|
| 385 |
+
else:
|
| 386 |
+
cscale = SAFFRON_SCALE
|
| 387 |
+
|
| 388 |
+
# ββ Build hover template ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 389 |
+
# customdata columns: 0=district, 1=state, 2=fy, 3=actual, 4=predicted,
|
| 390 |
+
# 5=error, 6=persondays_gain, 7=budget_chg_pct,
|
| 391 |
+
# 8=persondays_per_lakh, 9=budget_allocated
|
| 392 |
+
custom = []
|
| 393 |
+
for row in hover_data:
|
| 394 |
+
custom.append([
|
| 395 |
+
str(row.get("district", "")),
|
| 396 |
+
str(row.get("state", "")),
|
| 397 |
+
int(row.get("financial_year", 0)),
|
| 398 |
+
float(row.get("person_days_lakhs", 0) or 0),
|
| 399 |
+
float(row.get("predicted_persondays", 0) or 0),
|
| 400 |
+
float(row.get("prediction_error", 0) or 0),
|
| 401 |
+
float(row.get("persondays_gain", 0) or 0),
|
| 402 |
+
float(row.get("budget_change_pct", 0) or 0),
|
| 403 |
+
float(row.get("persondays_per_lakh", 0) or 0),
|
| 404 |
+
float(row.get("budget_allocated_lakhs", 0) or 0),
|
| 405 |
+
])
|
| 406 |
+
|
| 407 |
+
hover_tmpl = (
|
| 408 |
+
"<b>%{customdata[0]}</b><br>"
|
| 409 |
+
"<span style='color:#A8A29E'>%{customdata[1]}</span><br>"
|
| 410 |
+
"<br>"
|
| 411 |
+
"<b>FY:</b> %{customdata[2]}<br>"
|
| 412 |
+
"<b>Actual PD:</b> %{customdata[3]:.2f}L<br>"
|
| 413 |
+
"<b>Predicted PD:</b> %{customdata[4]:.2f}L<br>"
|
| 414 |
+
"<b>Model Error:</b> %{customdata[5]:+.2f}L<br>"
|
| 415 |
+
"<br>"
|
| 416 |
+
"<b>LP Optimizer</b><br>"
|
| 417 |
+
"<b>PD Gain:</b> %{customdata[6]:+.2f}L<br>"
|
| 418 |
+
"<b>Budget Ξ:</b> %{customdata[7]:+.1f}%<br>"
|
| 419 |
+
"<b>Efficiency:</b> %{customdata[8]:.4f} PD/βΉL<br>"
|
| 420 |
+
"<b>Budget:</b> βΉ%{customdata[9]:,.0f}L"
|
| 421 |
+
"<extra></extra>"
|
| 422 |
+
)
|
| 423 |
+
|
| 424 |
+
fig = go.Figure()
|
| 425 |
+
fig.add_scattergeo(
|
| 426 |
+
lat=lats, lon=lons,
|
| 427 |
+
mode="markers",
|
| 428 |
+
marker=dict(
|
| 429 |
+
size=norm_sz,
|
| 430 |
+
color=colors,
|
| 431 |
+
colorscale=cscale,
|
| 432 |
+
colorbar=dict(
|
| 433 |
+
title=dict(text=map_metric[:12], font=dict(color="#78716C", size=9)),
|
| 434 |
+
tickfont=dict(color="#78716C", size=8),
|
| 435 |
+
thickness=10, len=0.55,
|
| 436 |
+
bgcolor="rgba(255,255,255,0.88)",
|
| 437 |
+
),
|
| 438 |
+
opacity=0.80,
|
| 439 |
+
line=dict(width=0.8, color="rgba(255,255,255,0.7)"),
|
| 440 |
+
),
|
| 441 |
+
customdata=custom,
|
| 442 |
+
hovertemplate=hover_tmpl,
|
| 443 |
+
)
|
| 444 |
+
|
| 445 |
+
fig.update_geos(
|
| 446 |
+
scope="asia",
|
| 447 |
+
showland=True, landcolor="#F5F5F4",
|
| 448 |
+
showocean=True, oceancolor="#EFF6FF",
|
| 449 |
+
showcountries=True, countrycolor="#D6D3D1",
|
| 450 |
+
showsubunits=True, subunitcolor="#E7E5E4",
|
| 451 |
+
showrivers=True, rivercolor="#DBEAFE",
|
| 452 |
+
center=dict(lat=22, lon=80),
|
| 453 |
+
projection_scale=5.0,
|
| 454 |
+
bgcolor="rgba(0,0,0,0)",
|
| 455 |
+
)
|
| 456 |
+
fig.update_layout(
|
| 457 |
+
height=620,
|
| 458 |
+
paper_bgcolor="rgba(0,0,0,0)",
|
| 459 |
+
margin=dict(l=0, r=0, t=10, b=0),
|
| 460 |
+
font=dict(family="DM Mono, monospace", color="#1C1917"),
|
| 461 |
+
showlegend=False,
|
| 462 |
+
hoverlabel=dict(
|
| 463 |
+
bgcolor="#1C1917",
|
| 464 |
+
bordercolor="#1C1917",
|
| 465 |
+
font=dict(family="DM Mono, monospace", size=11, color="#FAF9F7"),
|
| 466 |
+
),
|
| 467 |
+
)
|
| 468 |
+
st.plotly_chart(fig, use_container_width=True, config={"displayModeBar": False})
|
| 469 |
+
|
| 470 |
+
# ββ Caption βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 471 |
+
n_mapped = len([c for c in custom if c[0]])
|
| 472 |
+
year_label = selected_year if selected_year != "β" else "all years"
|
| 473 |
+
st.caption(
|
| 474 |
+
f"{n_mapped} districts Β· FY {year_label} Β· "
|
| 475 |
+
f"Bubble size β predicted person-days Β· Hover for full model details"
|
| 476 |
+
)
|
| 477 |
+
|
| 478 |
+
# ββ Summary cards below map βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 479 |
+
st.markdown("<div style='margin-top:1.5rem'></div>", unsafe_allow_html=True)
|
| 480 |
+
section_label("Prediction Summary for Filtered View")
|
| 481 |
+
|
| 482 |
+
c1, c2, c3, c4 = st.columns(4)
|
| 483 |
+
total_pred = pred_df["predicted_persondays"].sum()
|
| 484 |
+
total_act = pred_df["person_days_lakhs"].sum()
|
| 485 |
+
mean_err = pred_df["prediction_error"].mean()
|
| 486 |
+
gain_total = pred_df["persondays_gain"].sum() if "persondays_gain" in pred_df.columns else 0
|
| 487 |
+
|
| 488 |
+
c1.metric("Total Predicted PD", f"{total_pred:,.1f}L")
|
| 489 |
+
c2.metric("Total Actual PD", f"{total_act:,.1f}L")
|
| 490 |
+
c3.metric("Mean Model Error", f"{mean_err:+.3f}L")
|
| 491 |
+
c4.metric("Total LP Gain", f"{gain_total:+,.1f}L")
|
frontend/theme.py
ADDED
|
@@ -0,0 +1,297 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
theme.py β SchemeImpactNet shared design system
|
| 3 |
+
Editorial / policy-brief aesthetic.
|
| 4 |
+
Fonts: Fraunces (display) + Source Serif 4 (body) + DM Mono (data/labels)
|
| 5 |
+
Palette: warm off-white #FAF9F7, deep stone #1C1917, saffron accent #FB923C
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
THEME_CSS = """
|
| 9 |
+
<style>
|
| 10 |
+
@import url('https://fonts.googleapis.com/css2?family=Fraunces:ital,opsz,wght@0,9..144,300;0,9..144,600;0,9..144,700;1,9..144,300&family=Source+Serif+4:ital,opsz,wght@0,8..60,300;0,8..60,400;0,8..60,600&family=DM+Mono:wght@400;500&display=swap');
|
| 11 |
+
|
| 12 |
+
html, body, [class*="css"] {
|
| 13 |
+
font-family: 'Source Serif 4', Georgia, serif !important;
|
| 14 |
+
}
|
| 15 |
+
.stApp {
|
| 16 |
+
background-color: #FAF9F7 !important;
|
| 17 |
+
}
|
| 18 |
+
#MainMenu, footer, header { visibility: hidden; }
|
| 19 |
+
|
| 20 |
+
.block-container {
|
| 21 |
+
padding: 2rem 2.5rem 3rem !important;
|
| 22 |
+
max-width: 1320px !important;
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
/* ββ Sidebar ββ */
|
| 26 |
+
[data-testid="stSidebar"] {
|
| 27 |
+
background: #1C1917 !important;
|
| 28 |
+
border-right: none !important;
|
| 29 |
+
}
|
| 30 |
+
[data-testid="stSidebarContent"] {
|
| 31 |
+
background: #1C1917 !important;
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
/* Nav links generated by st.navigation */
|
| 35 |
+
[data-testid="stSidebarNavLink"] {
|
| 36 |
+
border-radius: 5px !important;
|
| 37 |
+
padding: 0.5rem 1rem !important;
|
| 38 |
+
font-family: 'DM Mono', monospace !important;
|
| 39 |
+
font-size: 0.7rem !important;
|
| 40 |
+
letter-spacing: 0.5px !important;
|
| 41 |
+
color: #A8A29E !important;
|
| 42 |
+
text-decoration: none !important;
|
| 43 |
+
transition: all 0.15s ease !important;
|
| 44 |
+
border-left: 2px solid transparent !important;
|
| 45 |
+
}
|
| 46 |
+
[data-testid="stSidebarNavLink"]:hover {
|
| 47 |
+
background: rgba(251,146,60,0.1) !important;
|
| 48 |
+
color: #FB923C !important;
|
| 49 |
+
border-left-color: rgba(251,146,60,0.4) !important;
|
| 50 |
+
}
|
| 51 |
+
[data-testid="stSidebarNavLink"][aria-current="page"] {
|
| 52 |
+
background: rgba(251,146,60,0.15) !important;
|
| 53 |
+
color: #FB923C !important;
|
| 54 |
+
border-left-color: #FB923C !important;
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
/* ββ Typography ββ */
|
| 58 |
+
h1, h2, h3 {
|
| 59 |
+
font-family: 'Fraunces', serif !important;
|
| 60 |
+
color: #1C1917 !important;
|
| 61 |
+
}
|
| 62 |
+
h1 { font-size: 2.2rem !important; font-weight: 600 !important; line-height: 1.15 !important; }
|
| 63 |
+
h2 { font-size: 1.5rem !important; font-weight: 600 !important; }
|
| 64 |
+
h3 { font-size: 1.1rem !important; font-weight: 600 !important; }
|
| 65 |
+
p { font-family: 'Source Serif 4', serif !important; color: #292524 !important; }
|
| 66 |
+
|
| 67 |
+
/* ββ Metric cards ββ */
|
| 68 |
+
[data-testid="stMetric"] {
|
| 69 |
+
background: #FFFFFF !important;
|
| 70 |
+
border: 1px solid #E7E5E4 !important;
|
| 71 |
+
border-radius: 8px !important;
|
| 72 |
+
padding: 1rem 1.2rem !important;
|
| 73 |
+
}
|
| 74 |
+
[data-testid="stMetricLabel"] p {
|
| 75 |
+
font-family: 'DM Mono', monospace !important;
|
| 76 |
+
font-size: 0.62rem !important;
|
| 77 |
+
letter-spacing: 2px !important;
|
| 78 |
+
text-transform: uppercase !important;
|
| 79 |
+
color: #78716C !important;
|
| 80 |
+
}
|
| 81 |
+
[data-testid="stMetricValue"] {
|
| 82 |
+
font-family: 'Fraunces', serif !important;
|
| 83 |
+
font-size: 1.85rem !important;
|
| 84 |
+
font-weight: 600 !important;
|
| 85 |
+
color: #1C1917 !important;
|
| 86 |
+
line-height: 1.2 !important;
|
| 87 |
+
}
|
| 88 |
+
[data-testid="stMetricDelta"] {
|
| 89 |
+
font-family: 'DM Mono', monospace !important;
|
| 90 |
+
font-size: 0.7rem !important;
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
/* ββ Inputs ββ */
|
| 94 |
+
[data-testid="stSelectbox"] label p,
|
| 95 |
+
[data-testid="stSlider"] label p,
|
| 96 |
+
[data-testid="stTextInput"] label p,
|
| 97 |
+
[data-testid="stMultiSelect"] label p {
|
| 98 |
+
font-family: 'DM Mono', monospace !important;
|
| 99 |
+
font-size: 0.65rem !important;
|
| 100 |
+
letter-spacing: 1.5px !important;
|
| 101 |
+
text-transform: uppercase !important;
|
| 102 |
+
color: #78716C !important;
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
/* ββ Buttons ββ */
|
| 106 |
+
.stButton > button {
|
| 107 |
+
font-family: 'DM Mono', monospace !important;
|
| 108 |
+
font-size: 0.7rem !important;
|
| 109 |
+
letter-spacing: 1px !important;
|
| 110 |
+
text-transform: uppercase !important;
|
| 111 |
+
background: #1C1917 !important;
|
| 112 |
+
color: #FAF9F7 !important;
|
| 113 |
+
border: none !important;
|
| 114 |
+
border-radius: 6px !important;
|
| 115 |
+
padding: 0.5rem 1.2rem !important;
|
| 116 |
+
}
|
| 117 |
+
.stButton > button:hover {
|
| 118 |
+
background: #FB923C !important;
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
/* ββ Dataframes ββ */
|
| 122 |
+
[data-testid="stDataFrame"] {
|
| 123 |
+
border: 1px solid #E7E5E4 !important;
|
| 124 |
+
border-radius: 8px !important;
|
| 125 |
+
overflow: hidden !important;
|
| 126 |
+
}
|
| 127 |
+
[data-testid="stDataFrame"] th {
|
| 128 |
+
font-family: 'DM Mono', monospace !important;
|
| 129 |
+
font-size: 0.65rem !important;
|
| 130 |
+
letter-spacing: 1px !important;
|
| 131 |
+
text-transform: uppercase !important;
|
| 132 |
+
background: #F5F5F4 !important;
|
| 133 |
+
color: #57534E !important;
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
/* ββ Expander ββ */
|
| 137 |
+
[data-testid="stExpander"] {
|
| 138 |
+
border: 1px solid #E7E5E4 !important;
|
| 139 |
+
border-radius: 8px !important;
|
| 140 |
+
background: #FFFFFF !important;
|
| 141 |
+
}
|
| 142 |
+
details summary p {
|
| 143 |
+
font-family: 'DM Mono', monospace !important;
|
| 144 |
+
font-size: 0.72rem !important;
|
| 145 |
+
letter-spacing: 0.5px !important;
|
| 146 |
+
color: #57534E !important;
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
/* ββ Alerts ββ */
|
| 150 |
+
[data-testid="stAlert"] {
|
| 151 |
+
border-radius: 8px !important;
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
/* ββ Caption ββ */
|
| 155 |
+
[data-testid="stCaptionContainer"] p {
|
| 156 |
+
font-family: 'DM Mono', monospace !important;
|
| 157 |
+
font-size: 0.63rem !important;
|
| 158 |
+
color: #A8A29E !important;
|
| 159 |
+
letter-spacing: 0.3px !important;
|
| 160 |
+
}
|
| 161 |
+
|
| 162 |
+
/* ββ Divider ββ */
|
| 163 |
+
hr {
|
| 164 |
+
border: none !important;
|
| 165 |
+
border-top: 1px solid #E7E5E4 !important;
|
| 166 |
+
margin: 1.5rem 0 !important;
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
/* ββ Tab strip ββ */
|
| 170 |
+
[data-testid="stTabs"] [role="tab"] {
|
| 171 |
+
font-family: 'DM Mono', monospace !important;
|
| 172 |
+
font-size: 0.68rem !important;
|
| 173 |
+
letter-spacing: 1px !important;
|
| 174 |
+
text-transform: uppercase !important;
|
| 175 |
+
}
|
| 176 |
+
</style>
|
| 177 |
+
"""
|
| 178 |
+
|
| 179 |
+
# ββ Plotly shared layout (light, editorial) βββββββββββββββββββββββββββββββββββ
|
| 180 |
+
PLOTLY_LAYOUT = dict(
|
| 181 |
+
paper_bgcolor="#FFFFFF",
|
| 182 |
+
plot_bgcolor="#FAFAF9",
|
| 183 |
+
font=dict(family="DM Mono, monospace", color="#292524", size=10.5),
|
| 184 |
+
margin=dict(l=0, r=0, t=44, b=0),
|
| 185 |
+
legend=dict(
|
| 186 |
+
bgcolor="rgba(255,255,255,0.92)",
|
| 187 |
+
bordercolor="#E7E5E4", borderwidth=1,
|
| 188 |
+
font=dict(size=10),
|
| 189 |
+
),
|
| 190 |
+
xaxis=dict(
|
| 191 |
+
gridcolor="#F5F5F4", linecolor="#E7E5E4",
|
| 192 |
+
tickfont=dict(color="#78716C", size=10),
|
| 193 |
+
title_font=dict(color="#57534E", size=11),
|
| 194 |
+
zerolinecolor="#E7E5E4",
|
| 195 |
+
),
|
| 196 |
+
yaxis=dict(
|
| 197 |
+
gridcolor="#F5F5F4", linecolor="#E7E5E4",
|
| 198 |
+
tickfont=dict(color="#78716C", size=10),
|
| 199 |
+
title_font=dict(color="#57534E", size=11),
|
| 200 |
+
zerolinecolor="#E7E5E4",
|
| 201 |
+
),
|
| 202 |
+
)
|
| 203 |
+
|
| 204 |
+
# ββ Colour tokens βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 205 |
+
SAFFRON = "#FB923C" # primary accent
|
| 206 |
+
SAFFRON_D = "#EA580C" # darker saffron
|
| 207 |
+
SLATE = "#1C1917" # near-black
|
| 208 |
+
STONE = "#78716C" # muted label
|
| 209 |
+
BORDER = "#E7E5E4"
|
| 210 |
+
BG = "#FAF9F7"
|
| 211 |
+
WHITE = "#FFFFFF"
|
| 212 |
+
GREEN = "#16A34A"
|
| 213 |
+
RED = "#DC2626"
|
| 214 |
+
AMBER = "#D97706"
|
| 215 |
+
BLUE = "#2563EB"
|
| 216 |
+
|
| 217 |
+
# ββ Saffron scale for choropleth / sequential maps βββββββββββββββββββββββββββ
|
| 218 |
+
SAFFRON_SCALE = [
|
| 219 |
+
[0.0, "#FFF7ED"],
|
| 220 |
+
[0.25, "#FED7AA"],
|
| 221 |
+
[0.5, "#FB923C"],
|
| 222 |
+
[0.75, "#EA580C"],
|
| 223 |
+
[1.0, "#7C2D12"],
|
| 224 |
+
]
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
# ββ Helpers βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 228 |
+
def inject_theme():
|
| 229 |
+
import streamlit as st
|
| 230 |
+
st.markdown(THEME_CSS, unsafe_allow_html=True)
|
| 231 |
+
|
| 232 |
+
|
| 233 |
+
def page_header(eyebrow: str, title: str, subtitle: str = ""):
|
| 234 |
+
import streamlit as st
|
| 235 |
+
sub_html = (
|
| 236 |
+
f'<p style="font-family:\'Source Serif 4\',serif; font-size:0.92rem; '
|
| 237 |
+
f'color:#78716C; margin:6px 0 0 0; line-height:1.5;">{subtitle}</p>'
|
| 238 |
+
if subtitle else ""
|
| 239 |
+
)
|
| 240 |
+
st.markdown(f"""
|
| 241 |
+
<div style="margin-bottom:1.75rem; padding-bottom:1.25rem; border-bottom:2px solid #E7E5E4;">
|
| 242 |
+
<p style="font-family:'DM Mono',monospace; font-size:0.58rem; letter-spacing:3.5px;
|
| 243 |
+
text-transform:uppercase; color:#FB923C; margin:0 0 7px 0;">{eyebrow}</p>
|
| 244 |
+
<h1 style="font-family:'Fraunces',serif; font-size:2.1rem; font-weight:600;
|
| 245 |
+
color:#1C1917; margin:0; line-height:1.15;">{title}</h1>
|
| 246 |
+
{sub_html}
|
| 247 |
+
</div>""", unsafe_allow_html=True)
|
| 248 |
+
|
| 249 |
+
|
| 250 |
+
def section_label(text: str):
|
| 251 |
+
import streamlit as st
|
| 252 |
+
st.markdown(
|
| 253 |
+
f'<p style="font-family:\'DM Mono\',monospace; font-size:0.58rem; '
|
| 254 |
+
f'letter-spacing:3px; text-transform:uppercase; color:#A8A29E; '
|
| 255 |
+
f'margin:0 0 10px 0; padding-bottom:8px; border-bottom:1px solid #F5F5F4;">'
|
| 256 |
+
f'{text}</p>',
|
| 257 |
+
unsafe_allow_html=True,
|
| 258 |
+
)
|
| 259 |
+
|
| 260 |
+
|
| 261 |
+
def kpi_html(value: str, label: str, color: str = "#1C1917", note: str = "") -> str:
|
| 262 |
+
note_html = (
|
| 263 |
+
f'<p style="font-family:\'DM Mono\',monospace; font-size:0.62rem; '
|
| 264 |
+
f'color:#A8A29E; margin:3px 0 0 0;">{note}</p>'
|
| 265 |
+
if note else ""
|
| 266 |
+
)
|
| 267 |
+
return f"""
|
| 268 |
+
<div style="background:#FFFFFF; border:1px solid #E7E5E4; border-radius:8px; padding:1rem 1.25rem;">
|
| 269 |
+
<p style="font-family:'DM Mono',monospace; font-size:0.58rem; letter-spacing:2.5px;
|
| 270 |
+
text-transform:uppercase; color:#A8A29E; margin:0 0 5px 0;">{label}</p>
|
| 271 |
+
<p style="font-family:'Fraunces',serif; font-size:1.9rem; font-weight:600;
|
| 272 |
+
color:{color}; line-height:1; margin:0;">{value}</p>
|
| 273 |
+
{note_html}
|
| 274 |
+
</div>"""
|
| 275 |
+
|
| 276 |
+
|
| 277 |
+
def signal_card_html(value: str, title: str, body: str, accent: str = "#FB923C") -> str:
|
| 278 |
+
return f"""
|
| 279 |
+
<div style="background:#FFFFFF; border:1px solid #E7E5E4; border-left:3px solid {accent};
|
| 280 |
+
border-radius:8px; padding:0.85rem 1rem; margin-bottom:7px;
|
| 281 |
+
display:flex; align-items:center; gap:0.9rem;">
|
| 282 |
+
<span style="font-family:'Fraunces',serif; font-size:1.55rem; font-weight:600;
|
| 283 |
+
color:{accent}; min-width:56px; text-align:right; flex-shrink:0;">{value}</span>
|
| 284 |
+
<div>
|
| 285 |
+
<p style="font-family:'DM Mono',monospace; font-size:0.6rem; letter-spacing:1.2px;
|
| 286 |
+
text-transform:uppercase; color:#57534E; margin:0 0 2px 0;">{title}</p>
|
| 287 |
+
<p style="font-family:'Source Serif 4',serif; font-size:0.78rem;
|
| 288 |
+
color:#A8A29E; margin:0; line-height:1.4;">{body}</p>
|
| 289 |
+
</div>
|
| 290 |
+
</div>"""
|
| 291 |
+
|
| 292 |
+
|
| 293 |
+
# NOTE: inject_theme() is now a no-op for page files.
|
| 294 |
+
# All CSS is injected once in app.py before st.navigation() runs,
|
| 295 |
+
# which means it persists across every page automatically.
|
| 296 |
+
def inject_theme():
|
| 297 |
+
pass # CSS already injected globally by app.py
|
frontend/utils/api_client.py
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
utils/api_client.py
|
| 3 |
+
--------------------
|
| 4 |
+
Centralized, cached API wrappers.
|
| 5 |
+
|
| 6 |
+
HF Spaces compatible: reads API_URL from environment variable so the
|
| 7 |
+
same code works locally (localhost:8000) and on HuggingFace (localhost:8000
|
| 8 |
+
since both services run in the same container).
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import os
|
| 12 |
+
import requests
|
| 13 |
+
import pandas as pd
|
| 14 |
+
import streamlit as st
|
| 15 |
+
|
| 16 |
+
# HF Spaces: backend always on localhost:8000 inside the container
|
| 17 |
+
API = os.environ.get("API_URL", "http://localhost:8000")
|
| 18 |
+
TIMEOUT = 15
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
@st.cache_data(ttl=300)
|
| 22 |
+
def _get(endpoint: str, params: dict | None = None):
|
| 23 |
+
"""Raw cached GET β returns JSON or None on any error."""
|
| 24 |
+
try:
|
| 25 |
+
r = requests.get(f"{API}{endpoint}", params=params or {}, timeout=TIMEOUT)
|
| 26 |
+
r.raise_for_status()
|
| 27 |
+
return r.json()
|
| 28 |
+
except requests.exceptions.ConnectionError:
|
| 29 |
+
return None
|
| 30 |
+
except requests.exceptions.Timeout:
|
| 31 |
+
return None
|
| 32 |
+
except Exception:
|
| 33 |
+
return None
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def _df(data) -> pd.DataFrame:
|
| 37 |
+
if not data:
|
| 38 |
+
return pd.DataFrame()
|
| 39 |
+
if isinstance(data, list):
|
| 40 |
+
return pd.DataFrame(data)
|
| 41 |
+
if isinstance(data, dict):
|
| 42 |
+
return pd.DataFrame([data])
|
| 43 |
+
return pd.DataFrame()
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
# ββ Health βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 47 |
+
def is_online() -> bool:
|
| 48 |
+
try:
|
| 49 |
+
requests.get(f"{API}/health", timeout=5)
|
| 50 |
+
return True
|
| 51 |
+
except Exception:
|
| 52 |
+
return False
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
# ββ /districts/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 56 |
+
def fetch_stats() -> dict:
|
| 57 |
+
return _get("/districts/stats") or {}
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def fetch_states() -> list[str]:
|
| 61 |
+
return _get("/districts/states") or []
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def fetch_districts(state: str) -> list[str]:
|
| 65 |
+
return _get("/districts/list", {"state": state}) or []
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def fetch_district_history(state: str, district: str) -> pd.DataFrame:
|
| 69 |
+
return _df(_get("/districts/history", {"state": state, "district": district}))
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def fetch_top_districts(
|
| 73 |
+
state: str | None = None,
|
| 74 |
+
metric: str = "person_days_lakhs",
|
| 75 |
+
n: int = 12,
|
| 76 |
+
) -> pd.DataFrame:
|
| 77 |
+
params = {"metric": metric, "n": n}
|
| 78 |
+
if state:
|
| 79 |
+
params["state"] = state
|
| 80 |
+
return _df(_get("/districts/top", params))
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def fetch_yearly_trend(state: str | None = None) -> pd.DataFrame:
|
| 84 |
+
params = {"state": state} if state else {}
|
| 85 |
+
return _df(_get("/districts/trend", params))
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
# ββ /predictions/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 89 |
+
def fetch_predictions(
|
| 90 |
+
state: str | None = None,
|
| 91 |
+
district: str | None = None,
|
| 92 |
+
year: int | None = None,
|
| 93 |
+
) -> pd.DataFrame:
|
| 94 |
+
params = {}
|
| 95 |
+
if state: params["state"] = state
|
| 96 |
+
if district: params["district"] = district
|
| 97 |
+
if year: params["year"] = year
|
| 98 |
+
return _df(_get("/predictions/", params))
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
# ββ /optimizer/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 102 |
+
def fetch_optimizer_results(state: str | None = None) -> pd.DataFrame:
|
| 103 |
+
params = {"state": state} if state else {}
|
| 104 |
+
return _df(_get("/optimizer/results", params))
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
def run_optimizer_live(
|
| 108 |
+
state: str | None = None,
|
| 109 |
+
budget_scale: float = 1.0,
|
| 110 |
+
min_fraction: float = 0.40,
|
| 111 |
+
max_fraction: float = 2.50,
|
| 112 |
+
) -> dict | None:
|
| 113 |
+
payload = {
|
| 114 |
+
"state": state,
|
| 115 |
+
"budget_scale": budget_scale,
|
| 116 |
+
"min_fraction": min_fraction,
|
| 117 |
+
"max_fraction": max_fraction,
|
| 118 |
+
}
|
| 119 |
+
try:
|
| 120 |
+
r = requests.post(f"{API}/optimizer/run", json=payload, timeout=60)
|
| 121 |
+
r.raise_for_status()
|
| 122 |
+
return r.json()
|
| 123 |
+
except requests.exceptions.ConnectionError:
|
| 124 |
+
st.error("Cannot reach API β backend may still be starting up, refresh in a moment.")
|
| 125 |
+
return None
|
| 126 |
+
except Exception as e:
|
| 127 |
+
st.error(f"Optimizer error: {e}")
|
| 128 |
+
return None
|
hf_start.sh
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# hf_start.sh β SchemeImpactNet HuggingFace Spaces entrypoint
|
| 3 |
+
# Runs pipeline (if needed), starts FastAPI on 8000, Streamlit on 7860
|
| 4 |
+
|
| 5 |
+
set -euo pipefail
|
| 6 |
+
|
| 7 |
+
echo "============================================================"
|
| 8 |
+
echo " SchemeImpactNet β HuggingFace Spaces Startup"
|
| 9 |
+
echo "============================================================"
|
| 10 |
+
|
| 11 |
+
cd /app
|
| 12 |
+
|
| 13 |
+
# ββ Step 1: Generate / verify processed data βββββββββββββββββββββββββββββββββ
|
| 14 |
+
echo ""
|
| 15 |
+
echo "β Checking processed data..."
|
| 16 |
+
|
| 17 |
+
NEEDS_PIPELINE=false
|
| 18 |
+
for f in data/processed/mnrega_cleaned.csv \
|
| 19 |
+
data/processed/mnrega_predictions.csv \
|
| 20 |
+
data/processed/optimized_budget_allocation.csv; do
|
| 21 |
+
if [[ ! -f "$f" ]]; then
|
| 22 |
+
echo " Missing: $f"
|
| 23 |
+
NEEDS_PIPELINE=true
|
| 24 |
+
fi
|
| 25 |
+
done
|
| 26 |
+
|
| 27 |
+
if [[ "$NEEDS_PIPELINE" == true ]]; then
|
| 28 |
+
echo "β Running data pipeline (Stage 3)..."
|
| 29 |
+
python main.py --stage 3
|
| 30 |
+
echo "β Pipeline complete"
|
| 31 |
+
else
|
| 32 |
+
echo "β Processed data found β skipping pipeline"
|
| 33 |
+
fi
|
| 34 |
+
|
| 35 |
+
# ββ Step 2: Start FastAPI backend on port 8000 (background) ββββββββββββββββββ
|
| 36 |
+
echo ""
|
| 37 |
+
echo "β Starting FastAPI backend on port 8000..."
|
| 38 |
+
python -m uvicorn backend.main:app \
|
| 39 |
+
--host 0.0.0.0 \
|
| 40 |
+
--port 8000 \
|
| 41 |
+
--log-level warning &
|
| 42 |
+
BACKEND_PID=$!
|
| 43 |
+
|
| 44 |
+
# Wait for backend health
|
| 45 |
+
MAX_WAIT=20
|
| 46 |
+
WAITED=0
|
| 47 |
+
until curl -sf "http://localhost:8000/health" >/dev/null 2>&1; do
|
| 48 |
+
sleep 1
|
| 49 |
+
WAITED=$((WAITED + 1))
|
| 50 |
+
if [[ $WAITED -ge $MAX_WAIT ]]; then
|
| 51 |
+
echo " β Backend health timeout β continuing"
|
| 52 |
+
break
|
| 53 |
+
fi
|
| 54 |
+
done
|
| 55 |
+
echo "β Backend live"
|
| 56 |
+
|
| 57 |
+
# ββ Step 3: Start Streamlit on HF port 7860 (foreground) ββββββββββββββββββββ
|
| 58 |
+
echo ""
|
| 59 |
+
echo "β Starting Streamlit frontend on port 7860..."
|
| 60 |
+
echo "β Dashboard: https://huggingface.co/spaces/{YOUR_SPACE}"
|
| 61 |
+
echo ""
|
| 62 |
+
|
| 63 |
+
exec python -m streamlit run frontend/app.py \
|
| 64 |
+
--server.port 7860 \
|
| 65 |
+
--server.address 0.0.0.0 \
|
| 66 |
+
--server.headless true \
|
| 67 |
+
--server.enableCORS false \
|
| 68 |
+
--server.enableXsrfProtection false \
|
| 69 |
+
--browser.gatherUsageStats false
|
main.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
main.py
|
| 3 |
+
-------
|
| 4 |
+
Entry point for SchemeImpactNet.
|
| 5 |
+
|
| 6 |
+
Usage:
|
| 7 |
+
python main.py # Stage 1 β Maharashtra
|
| 8 |
+
python main.py --stage 2 # Stage 2 β All-India
|
| 9 |
+
python main.py --stage 3 # Stage 3 β All-India + optimize
|
| 10 |
+
python main.py --stage 3 --state Maharashtra # Stage 3, one state
|
| 11 |
+
python main.py --optimize-only # Run optimizer on existing predictions
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import sys
|
| 15 |
+
from src.pipeline import run_pipeline, run_optimizer_step
|
| 16 |
+
|
| 17 |
+
if __name__ == "__main__":
|
| 18 |
+
args = sys.argv[1:]
|
| 19 |
+
|
| 20 |
+
stage = 1
|
| 21 |
+
if "--stage" in args:
|
| 22 |
+
stage = int(args[args.index("--stage") + 1])
|
| 23 |
+
|
| 24 |
+
scope_state = None
|
| 25 |
+
if "--state" in args:
|
| 26 |
+
scope_state = args[args.index("--state") + 1]
|
| 27 |
+
|
| 28 |
+
optimize_only = "--optimize-only" in args
|
| 29 |
+
|
| 30 |
+
if optimize_only:
|
| 31 |
+
print("\nRunning optimizer on existing predictions...")
|
| 32 |
+
run_optimizer_step(scope_state=scope_state)
|
| 33 |
+
else:
|
| 34 |
+
predictions = run_pipeline(stage=stage)
|
| 35 |
+
|
| 36 |
+
print(f"\nTop 10 predicted districts (2023):")
|
| 37 |
+
latest = predictions[predictions["financial_year"] == 2023]
|
| 38 |
+
print(
|
| 39 |
+
latest[["state", "district", "person_days_lakhs", "predicted_persondays"]]
|
| 40 |
+
.sort_values("predicted_persondays", ascending=False)
|
| 41 |
+
.head(10)
|
| 42 |
+
.to_string(index=False)
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
# Stage 3: automatically run optimizer after model
|
| 46 |
+
if stage == 3:
|
| 47 |
+
print("\n" + "β" * 60)
|
| 48 |
+
print(" Running Stage 3 Budget Optimizer...")
|
| 49 |
+
print("β" * 60)
|
| 50 |
+
run_optimizer_step(scope_state=scope_state)
|
overview.txt
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### Project Overview: SchemeImpactNet - A Machine Learning Framework for Predictive Impact Analysis and Optimization of Indian Government Schemes
|
| 2 |
+
|
| 3 |
+
SchemeImpactNet is an innovative, machine learning-powered platform designed to revolutionize how Indian government schemes are analyzed, predicted, and optimized. Building at the intersection of AI and public policy, this system addresses the limitations of traditional scheme management tools by shifting from reactive administration to proactive, data-driven decision-making. It leverages publicly available datasets from Indian government portals (such as data.gov.in, mospi.gov.in, and scheme-specific sites like pmkisan.gov.in) to forecast the socio-economic impacts of schemes, identify inefficiencies, and recommend optimized resource allocations. This makes it particularly suitable for a final-year engineering project, as it combines real-world data integration, advanced ML algorithms, and practical visualizations, demonstrating technical depth while solving a pressing national issue: enhancing the effectiveness of welfare programs that affect millions.
|
| 4 |
+
|
| 5 |
+
The core inspiration stems from the vast, underutilized data on schemes like Pradhan Mantri Kisan Samman Nidhi (PM-KISAN), Mahatma Gandhi National Rural Employment Guarantee Act (MNREGA), Pradhan Mantri Awas Yojana (PMAY), and others. These datasets include beneficiary demographics, budget expenditures, regional implementations, and outcome metrics (e.g., employment generated, houses built, income uplifts). By applying ML, SchemeImpactNet not only manages this data but transforms it into actionable insightsβpredicting future outcomes and simulating policy tweaks to maximize benefits like poverty alleviation or rural development. Unlike basic management systems (which might have led to your initial rejection), this framework emphasizes predictive analytics and optimization, making it unique, scalable, and aligned with India's National AI Strategy for governance.
|
| 6 |
+
|
| 7 |
+
#### Key Features and Benefits
|
| 8 |
+
- **Predictive Impact Analysis**: Uses historical data to forecast scheme performance. For instance, it could predict how MNREGA's job creation in a Maharashtra district might reduce migration rates over the next 5 years, factoring in variables like rainfall, population density, and overlapping schemes.
|
| 9 |
+
- **Optimization Engine**: Recommends budget reallocations or scheme integrations to minimize waste. E.g., if PMAY is underperforming in urban slums due to labor shortages, the system might suggest diverting funds from less critical areas, using optimization algorithms to ensure equitable distribution.
|
| 10 |
+
- **Interconnected Scheme Network**: Models schemes as a graph, revealing dependencies (e.g., how agricultural schemes like PM-KISAN influence health outcomes via better nutrition), enabling holistic policy simulations.
|
| 11 |
+
- **User-Friendly Dashboard**: An interactive interface for stakeholders (policymakers, researchers, or even citizens) to query predictions, visualize data, and explore "what-if" scenarios.
|
| 12 |
+
- **Ethical and Bias-Aware Design**: Incorporates fairness checks to avoid regional biases in predictions, ensuring the system promotes inclusive growth.
|
| 13 |
+
- **Benefits for India-Specific Context**: With over 400 central schemes and trillions in annual budgets, inefficiencies like duplication (e.g., multiple housing aids) cost billions. SchemeImpactNet could help save resources, improve targeting (e.g., to underserved tribal areas), and support evidence-based policymaking, aligning with Sustainable Development Goals (SDGs) like No Poverty and Decent Work.
|
| 14 |
+
|
| 15 |
+
This project stands out for examiners because it tackles a real problem with measurable impact: You can quantify improvements (e.g., 15-20% better resource utilization in simulations) using metrics from ML models. It's feasible with open data, requires no proprietary tools, and has extension potential (e.g., integrating real-time APIs from government sites).
|
| 16 |
+
|
| 17 |
+
#### System Architecture
|
| 18 |
+
To visualize the high-level structure, here's a text-based diagram representing the end-to-end architecture. (If a graphical diagram is preferred, imagine this as a flowchart: Data sources feed into preprocessing, which branches to ML modules, converging at the optimization and UI layers.)
|
| 19 |
+
|
| 20 |
+
```
|
| 21 |
+
+-------------------+ +-------------------+ +-------------------+
|
| 22 |
+
| Data Sources | | Data Preprocessing| | ML Core Engine |
|
| 23 |
+
| - data.gov.in | --> | - Cleaning | --> | - Predictive Models|
|
| 24 |
+
| - mospi.gov.in | | - Integration | | (XGBoost, LSTM) |
|
| 25 |
+
| - Scheme APIs | | - Feature Eng. | | - Clustering (K-Means)|
|
| 26 |
+
+-------------------+ +-------------------+ | - Graph Analysis (GNN)|
|
| 27 |
+
| - Optimization (RL/PuLP)|
|
| 28 |
+
+-------------------+
|
| 29 |
+
|
|
| 30 |
+
v
|
| 31 |
+
+-------------------+ +-------------------+ +-------------------+
|
| 32 |
+
| Impact Simulation| <-- | Visualization | <-- | User Interface |
|
| 33 |
+
| - What-If Scenarios| | - Dashboards | | - Web App (Streamlit)|
|
| 34 |
+
| - Network Propagation| | - Geospatial Maps| | - Interactive Queries|
|
| 35 |
+
+-------------------+ +-------------------+ +-------------------+
|
| 36 |
+
```
|
| 37 |
+
|
| 38 |
+
- **Data Layer**: Ingests raw datasets (e.g., CSV files on beneficiary counts, budgets by state/year). Handles challenges like missing values or inconsistent formats using Pandas.
|
| 39 |
+
- **Preprocessing Layer**: Normalizes data, engineers features (e.g., deriving "impact score" from outcomes), and merges datasets (e.g., linking MNREGA jobs to PMAY completions via district codes).
|
| 40 |
+
- **ML Core**:
|
| 41 |
+
- Predictive: Time-series models (LSTM for forecasting beneficiary growth) and regression (XGBoost for impact scores).
|
| 42 |
+
- Unsupervised: Clustering districts into performance groups.
|
| 43 |
+
- Graph-Based: Represents schemes as nodes/edges in a network (using NetworkX/PyTorch Geometric) to model ripple effects.
|
| 44 |
+
- Optimization: Solves allocation problems (e.g., maximize total impact under budget constraints) with linear programming or reinforcement learning.
|
| 45 |
+
- **Simulation Layer**: Runs scenarios, e.g., "Increase PM-KISAN funding by 10% in drought areasβpredict GDP lift."
|
| 46 |
+
- **Visualization & UI Layer**: Outputs charts, maps, and reports. E.g., a heatmap showing predicted poverty reduction across India.
|
| 47 |
+
|
| 48 |
+
#### Data Flow and Workflow
|
| 49 |
+
1. **Ingestion**: Automatically pull or upload data from gov sites (focus on 5-10 schemes initially, like agriculture and rural development ones relevant to Maharashtra, given your location).
|
| 50 |
+
2. **Analysis Pipeline**: Feed cleaned data into models. Train on 80% historical data (e.g., 2015-2023), test on recent years.
|
| 51 |
+
3. **Output Generation**: Generate predictions (e.g., "MNREGA in Pimpri-Chinchwad could generate 50,000 jobs by 2027, reducing unemployment by 8%"), optimizations, and visuals.
|
| 52 |
+
4. **Iteration**: Users refine inputs via the dashboard, re-running simulations for refined insights.
|
| 53 |
+
|
| 54 |
+
#### Implementation Considerations
|
| 55 |
+
- **Scalability**: Start with subsets (e.g., Maharashtra-focused data for local relevance), expand nationwide.
|
| 56 |
+
- **Challenges and Solutions**: Data privacy (use anonymized aggregates); model accuracy (cross-validate with real outcomes); computational needs (run on local machines with GPU for GNNs if available).
|
| 57 |
+
- **Evaluation**: Measure success via metrics like prediction accuracy (RMSE < 0.1 for forecasts) and optimization gains (e.g., 10-25% efficiency boost in simulations). Compare against baselines like non-ML rule-based allocators.
|
| 58 |
+
- **Future Enhancements**: Integrate NLP for scheme document analysis (e.g., extracting eligibility rules) or blockchain for transparent tracking.
|
| 59 |
+
|
| 60 |
+
This elaborate overview positions SchemeImpactNet as a cutting-edge project that not only impresses with its technical sophistication but also its potential societal impact. If diagrams are crucial, I can suggest generating one via tools like Draw.io or Python's Matplotlib in your codeβe.g., a simple flowchart script you can run locally. Let me know if you'd like code snippets for that!
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
The core problem
|
| 64 |
+
India spends βΉ70,000β90,000 crore per year on MNREGA alone. But nobody can reliably answer questions like:
|
| 65 |
+
|
| 66 |
+
Will this district generate enough employment next year, or will it fall short?
|
| 67 |
+
Is this district spending efficiently, or is money being wasted?
|
| 68 |
+
Where should we prioritize resources to get the most impact?
|
| 69 |
+
|
| 70 |
+
Right now, decisions are made reactively β officials look at last year's numbers and make gut calls. There's no forecasting, no early warning system, no optimization.
|
| 71 |
+
|
| 72 |
+
What SchemeImpactNet actually tries to solve
|
| 73 |
+
At its core, three concrete questions:
|
| 74 |
+
1. Prediction β "What will this district's MNREGA performance look like next year?"
|
| 75 |
+
Given historical persondays, expenditure, and other factors β predict future performance. Early warning if a district is going to underperform.
|
| 76 |
+
2. Efficiency Analysis β "Is this district getting good value for money?"
|
| 77 |
+
Some districts generate 50 persondays per βΉ1000 spent. Others generate 20. Why? What separates high-performers from low-performers?
|
| 78 |
+
3. Resource Optimization β "Where should budget go to maximize employment generated?"
|
| 79 |
+
Given a fixed budget, which districts should get more funding to maximize total persondays across Maharashtra?
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
-----------------------------------------------------------------------------------------------------
|
| 84 |
+
|
| 85 |
+
3. Data Sources and Processing
|
| 86 |
+
The dataset used in this study combines real government data with domain-informed estimates to produce a comprehensive district-level MNREGA dataset spanning 2014-15 to 2024-25 across 759 districts and 34 states.
|
| 87 |
+
Primary Source β MNREGA Employment Data: Person days generated and households engaged in work were sourced from the Ministry of Rural Development's official MIS portal via Dataful.in (Dataset ID: 20063), which aggregates monthly district-level records from nreganarep.nic.in. Monthly figures were summed to produce annual totals. Person days were converted from absolute numbers to lakh units by dividing by 100,000.
|
| 88 |
+
Wage and Expenditure Derivation: District-level expenditure was derived using MoRD's officially notified state wage rates, which are revised annually. Expenditure in Rs. lakhs was computed as the product of person days (lakhs) and the prevailing wage rate (Rs./day). Budget allocated was estimated as expenditure divided by 0.89, reflecting the national average budget utilization rate of approximately 89%.
|
| 89 |
+
Rainfall Data: Annual subdivision-level rainfall data was sourced from the India Meteorological Department (IMD) historical dataset covering 1901-2017. IMD meteorological subdivisions were mapped to states. For years 2018-2024 where IMD data was unavailable, rainfall was estimated using each subdivision's 2000-2017 average with Β±5% stochastic variation using a fixed random seed for reproducibility.
|
| 90 |
+
Demographic and Poverty Data: Rural population figures were derived from Census of India 2011 state-level rural headcounts, distributed across districts proportional to each district's share of state-level person days. A 1.2% annual growth rate was applied to project values from 2011 to 2024. Poverty rates were sourced from NITI Aayog's National Multidimensional Poverty Index (MPI) 2021 report, with district-level variation introduced based on relative MNREGA activity.
|
| 91 |
+
Scheme Interdependency Features: PM-KISAN beneficiary estimates were computed from rural population assuming 35% farmer household penetration from 2019-20 onwards (scheme launch year), scaled by district activity. PMAY-G housing figures were estimated from 2016-17 onwards using poverty-weighted rural population ratios, with completion rates linearly interpolated from 30% (2016-17) to 85% (2023-24) based on reported national progress.
|
| 92 |
+
Final Dataset: 7,758 district-year observations, 22 features, zero missing values.
|
reports/model_comparison.csv
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model,rmse,mae,r2,selected
|
| 2 |
+
XGBoost,2.3301,1.3795,0.9963,β
|
| 3 |
+
GradientBoostingRegressor,1.9383,1.1863,0.9975,
|
| 4 |
+
RandomForestRegressor,2.2926,1.0879,0.9965,
|
reports/model_report.txt
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
SchemeImpactNet β V4 Model Selection Report
|
| 2 |
+
============================================================
|
| 3 |
+
|
| 4 |
+
Best Model : GradientBoosting
|
| 5 |
+
Selection : max mean RΒ² excl. 2022 (walk-forward CV)
|
| 6 |
+
Features : 17
|
| 7 |
+
Evaluation : Walk-forward CV (2018β2024)
|
| 8 |
+
|
| 9 |
+
Algorithm Comparison:
|
| 10 |
+
Model RΒ² ex22 RΒ² MAE RMSE
|
| 11 |
+
------------------------------------------------------------
|
| 12 |
+
GradientBoosting 0.8510 0.9078 8.554 16.334 β BEST
|
| 13 |
+
RandomForest 0.8417 0.9063 8.739 16.679
|
| 14 |
+
Ridge 0.8018 0.8824 9.975 18.545
|
| 15 |
+
ElasticNet 0.7982 0.8811 9.890 18.678
|
| 16 |
+
XGBoost 0.8533 0.9034 8.457 16.409
|
| 17 |
+
|
| 18 |
+
Best Model (GradientBoosting) Walk-Forward CV:
|
| 19 |
+
Mean RΒ² : 0.8510
|
| 20 |
+
excl.2022 RΒ²: 0.9078
|
| 21 |
+
Mean MAE : 8.554 lakh
|
| 22 |
+
Mean RMSE : 16.334 lakh
|
| 23 |
+
RΒ² gain : +0.0737 vs naive lag-1
|
| 24 |
+
|
| 25 |
+
Previous (leaked) RΒ²: 0.9963
|
| 26 |
+
Leakage source: works_completed (r=1.0 with target)
|
| 27 |
+
|
| 28 |
+
2022 anomaly: West Bengal -93 to -98% reporting drop. Excl. RΒ²=0.9078
|
| 29 |
+
|
| 30 |
+
Feature Importances:
|
| 31 |
+
lag1_pd 0.5270
|
| 32 |
+
lag1_adj 0.2512
|
| 33 |
+
state_lag1_zscore 0.0837
|
| 34 |
+
roll2_mean 0.0612
|
| 35 |
+
blended_capacity 0.0199
|
| 36 |
+
roll3_mean 0.0188
|
| 37 |
+
is_covid 0.0067
|
| 38 |
+
lag3_pd 0.0065
|
| 39 |
+
state_lag1_norm 0.0056
|
| 40 |
+
lag2_pd 0.0054
|
| 41 |
+
relative_to_state 0.0034
|
| 42 |
+
state_enc 0.0033
|
| 43 |
+
roll3_std 0.0030
|
| 44 |
+
avg_wage_rate 0.0015
|
| 45 |
+
lag1_vs_capacity 0.0014
|
| 46 |
+
wage_yoy 0.0013
|
| 47 |
+
lag1_is_covid 0.0000
|
| 48 |
+
|
| 49 |
+
Year-by-year CV (GradientBoosting):
|
| 50 |
+
year n r2 mae rmse mape naive_r2 naive_mae r2_gain mae_gain
|
| 51 |
+
2018 689 0.9160 6.639 13.168 1.996413e+09 0.9124 7.556 0.0036 0.916
|
| 52 |
+
2019 701 0.9262 6.380 11.111 1.571437e+10 0.8651 7.484 0.0611 1.104
|
| 53 |
+
2020 695 0.8354 12.681 23.825 1.346619e+09 0.7526 18.279 0.0828 5.598
|
| 54 |
+
2021 698 0.9261 7.150 14.966 6.480334e+08 0.9384 7.988 -0.0122 0.839
|
| 55 |
+
2022 713 0.5101 13.954 28.022 2.442193e+08 0.1804 14.288 0.3297 0.334
|
| 56 |
+
2023 709 0.9089 7.403 13.336 3.815669e+10 0.9227 6.984 -0.0139 -0.419
|
| 57 |
+
2024 727 0.9345 5.673 9.911 2.038457e+10 0.8697 7.278 0.0648 1.605
|
requirements.txt
CHANGED
|
@@ -1,3 +1,30 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
contourpy==1.3.3
|
| 2 |
+
cycler==0.12.1
|
| 3 |
+
fonttools==4.61.1
|
| 4 |
+
joblib==1.5.3
|
| 5 |
+
kiwisolver==1.4.9
|
| 6 |
+
matplotlib==3.10.8
|
| 7 |
+
numpy==2.4.2
|
| 8 |
+
nvidia-nccl-cu12==2.29.3
|
| 9 |
+
packaging==26.0
|
| 10 |
+
pandas==3.0.1
|
| 11 |
+
pillow==12.1.1
|
| 12 |
+
pyparsing==3.3.2
|
| 13 |
+
python-dateutil==2.9.0.post0
|
| 14 |
+
scikit-learn==1.8.0
|
| 15 |
+
scipy==1.17.0
|
| 16 |
+
seaborn==0.13.2
|
| 17 |
+
six==1.17.0
|
| 18 |
+
threadpoolctl==3.6.0
|
| 19 |
+
xgboost==3.2.0
|
| 20 |
+
# Backend
|
| 21 |
+
fastapi>=0.104.0
|
| 22 |
+
uvicorn[standard]>=0.24.0
|
| 23 |
+
sqlalchemy>=2.0.0
|
| 24 |
+
pydantic>=2.0.0
|
| 25 |
+
|
| 26 |
+
# Frontend
|
| 27 |
+
streamlit
|
| 28 |
+
plotly>=5.17.0
|
| 29 |
+
requests>=2.31.0
|
| 30 |
+
|
src/__init__.py
ADDED
|
File without changes
|
src/clean.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
clean.py
|
| 3 |
+
--------
|
| 4 |
+
Cleans and standardizes the unified MNREGA dataset.
|
| 5 |
+
Works for Stage 1 (Maharashtra) through Stage 3 (All-India + scheme data).
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import pandas as pd
|
| 9 |
+
import numpy as np
|
| 10 |
+
|
| 11 |
+
CRITICAL_COLS = ["person_days_lakhs", "expenditure_lakhs", "avg_wage_rate"]
|
| 12 |
+
|
| 13 |
+
NON_CRITICAL_COLS = [
|
| 14 |
+
"households_demanded", "households_offered", "households_availed",
|
| 15 |
+
"works_completed", "rainfall_mm", "crop_season_index",
|
| 16 |
+
"rural_population_lakhs", "poverty_rate_pct",
|
| 17 |
+
"pmkisan_beneficiaries", "pmkisan_amount_lakhs",
|
| 18 |
+
"pmay_houses_sanctioned", "pmay_houses_completed",
|
| 19 |
+
"pmay_expenditure_lakhs", "budget_allocated_lakhs"
|
| 20 |
+
]
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def clean(df: pd.DataFrame) -> pd.DataFrame:
|
| 24 |
+
print("[clean] Starting cleaning pipeline...")
|
| 25 |
+
df = _strip_strings(df)
|
| 26 |
+
df = _parse_financial_year(df)
|
| 27 |
+
df = _cast_numerics(df)
|
| 28 |
+
df = _handle_missing(df)
|
| 29 |
+
df = _enforce_logical_constraints(df)
|
| 30 |
+
print(f"[clean] Done. Shape: {df.shape}")
|
| 31 |
+
return df
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def _strip_strings(df: pd.DataFrame) -> pd.DataFrame:
|
| 35 |
+
for col in df.select_dtypes(include="object").columns:
|
| 36 |
+
df[col] = df[col].str.strip()
|
| 37 |
+
return df
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def _parse_financial_year(df: pd.DataFrame) -> pd.DataFrame:
|
| 41 |
+
"""Convert '2018-19' β integer 2018."""
|
| 42 |
+
def _parse(val):
|
| 43 |
+
val = str(val).strip()
|
| 44 |
+
return int(val.split("-")[0]) if "-" in val else int(val)
|
| 45 |
+
|
| 46 |
+
df["financial_year"] = df["financial_year"].apply(_parse)
|
| 47 |
+
print(f"[clean] financial_year range: {df['financial_year'].min()} β {df['financial_year'].max()}")
|
| 48 |
+
return df
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def _cast_numerics(df: pd.DataFrame) -> pd.DataFrame:
|
| 52 |
+
all_numeric = CRITICAL_COLS + NON_CRITICAL_COLS
|
| 53 |
+
for col in all_numeric:
|
| 54 |
+
if col in df.columns:
|
| 55 |
+
df[col] = pd.to_numeric(df[col], errors="coerce")
|
| 56 |
+
return df
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def _handle_missing(df: pd.DataFrame) -> pd.DataFrame:
|
| 60 |
+
"""
|
| 61 |
+
Critical cols β forward-fill within district, drop if still null.
|
| 62 |
+
Non-critical β forward-fill within district, leave remaining NaN.
|
| 63 |
+
"""
|
| 64 |
+
df = df.sort_values(["state", "district", "financial_year"])
|
| 65 |
+
|
| 66 |
+
for col in CRITICAL_COLS + NON_CRITICAL_COLS:
|
| 67 |
+
if col not in df.columns:
|
| 68 |
+
continue
|
| 69 |
+
before = df[col].isna().sum()
|
| 70 |
+
if before > 0:
|
| 71 |
+
df[col] = df.groupby(["state", "district"])[col].transform(lambda s: s.ffill())
|
| 72 |
+
filled = before - df[col].isna().sum()
|
| 73 |
+
if filled > 0:
|
| 74 |
+
print(f"[clean] '{col}': forward-filled {filled} value(s)")
|
| 75 |
+
|
| 76 |
+
before = len(df)
|
| 77 |
+
df = df.dropna(subset=CRITICAL_COLS).reset_index(drop=True)
|
| 78 |
+
if len(df) < before:
|
| 79 |
+
print(f"[clean] Dropped {before - len(df)} rows with unresolvable critical nulls")
|
| 80 |
+
|
| 81 |
+
return df
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def _enforce_logical_constraints(df: pd.DataFrame) -> pd.DataFrame:
|
| 85 |
+
"""Clip any constraint violations that slipped through generation."""
|
| 86 |
+
if all(c in df.columns for c in ["households_offered", "households_demanded"]):
|
| 87 |
+
violations = (df["households_offered"] > df["households_demanded"]).sum()
|
| 88 |
+
if violations:
|
| 89 |
+
df["households_offered"] = df[["households_offered", "households_demanded"]].min(axis=1)
|
| 90 |
+
print(f"[clean] Fixed {violations} households_offered > households_demanded")
|
| 91 |
+
|
| 92 |
+
if all(c in df.columns for c in ["households_availed", "households_offered"]):
|
| 93 |
+
violations = (df["households_availed"] > df["households_offered"]).sum()
|
| 94 |
+
if violations:
|
| 95 |
+
df["households_availed"] = df[["households_availed", "households_offered"]].min(axis=1)
|
| 96 |
+
print(f"[clean] Fixed {violations} households_availed > households_offered")
|
| 97 |
+
|
| 98 |
+
return df
|
src/eda.py
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
eda.py
|
| 3 |
+
------
|
| 4 |
+
Exploratory Data Analysis for MNREGA unified dataset.
|
| 5 |
+
Automatically adapts to Maharashtra-only or All-India data.
|
| 6 |
+
|
| 7 |
+
Figures produced:
|
| 8 |
+
01_statewide_trend.png
|
| 9 |
+
02_district_performance_ranking.png
|
| 10 |
+
03_efficiency_ranking.png
|
| 11 |
+
04_covid_impact.png
|
| 12 |
+
05_correlation_heatmap.png
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
import os
|
| 16 |
+
import pandas as pd
|
| 17 |
+
import numpy as np
|
| 18 |
+
import matplotlib.pyplot as plt
|
| 19 |
+
import matplotlib.font_manager as fm
|
| 20 |
+
import seaborn as sns
|
| 21 |
+
|
| 22 |
+
FIGURES_DIR = os.path.join("reports", "figures")
|
| 23 |
+
os.makedirs(FIGURES_DIR, exist_ok=True)
|
| 24 |
+
|
| 25 |
+
sns.set_theme(style="whitegrid", palette="muted")
|
| 26 |
+
plt.rcParams.update({"figure.dpi": 120, "font.size": 10})
|
| 27 |
+
|
| 28 |
+
# Use a font that supports the rupee symbol if available, else fallback
|
| 29 |
+
def _get_font():
|
| 30 |
+
available = [f.name for f in fm.fontManager.ttflist]
|
| 31 |
+
for font in ["DejaVu Sans", "FreeSans", "Liberation Sans", "Arial"]:
|
| 32 |
+
if font in available:
|
| 33 |
+
return font
|
| 34 |
+
return None
|
| 35 |
+
|
| 36 |
+
FONT = _get_font()
|
| 37 |
+
if FONT:
|
| 38 |
+
plt.rcParams["font.family"] = FONT
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def run_eda(df: pd.DataFrame, scope: str = "Maharashtra") -> None:
|
| 42 |
+
print(f"\n[eda] Starting EDA β scope: {scope}")
|
| 43 |
+
_summary_stats(df)
|
| 44 |
+
_plot_trend(df, scope)
|
| 45 |
+
_plot_top_bottom_districts(df, scope)
|
| 46 |
+
_plot_efficiency_ranking(df, scope)
|
| 47 |
+
_plot_covid_impact(df)
|
| 48 |
+
_plot_correlation_heatmap(df)
|
| 49 |
+
print(f"[eda] All figures saved to: {FIGURES_DIR}/")
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
# ββ 1. Summary ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 53 |
+
|
| 54 |
+
def _summary_stats(df: pd.DataFrame) -> None:
|
| 55 |
+
print(f"\n[eda] {'β'*50}")
|
| 56 |
+
print(f"[eda] Rows : {len(df)}")
|
| 57 |
+
print(f"[eda] States : {df['state'].nunique()}")
|
| 58 |
+
print(f"[eda] Districts : {df['district'].nunique()}")
|
| 59 |
+
print(f"[eda] Years : {df['financial_year'].min()} β {df['financial_year'].max()}")
|
| 60 |
+
print(f"[eda] Total persondays: {df['person_days_lakhs'].sum():,.1f} lakh")
|
| 61 |
+
if "expenditure_lakhs" in df.columns:
|
| 62 |
+
print(f"[eda] Total expenditure: Rs. {df['expenditure_lakhs'].sum():,.1f} lakh")
|
| 63 |
+
|
| 64 |
+
print(f"\n[eda] Person days by year (state-aggregated mean):")
|
| 65 |
+
by_year = df.groupby("financial_year")["person_days_lakhs"].mean()
|
| 66 |
+
max_val = by_year.max()
|
| 67 |
+
for yr, val in by_year.items():
|
| 68 |
+
bar = "β" * int(val / max_val * 28)
|
| 69 |
+
print(f" {yr}: {bar} {val:.2f}")
|
| 70 |
+
print(f"[eda] {'β'*50}")
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
# ββ 2. Trend ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 74 |
+
|
| 75 |
+
def _plot_trend(df: pd.DataFrame, scope: str) -> None:
|
| 76 |
+
yearly = df.groupby("financial_year").agg(
|
| 77 |
+
total_persondays=("person_days_lakhs", "sum"),
|
| 78 |
+
).reset_index()
|
| 79 |
+
|
| 80 |
+
fig, ax1 = plt.subplots(figsize=(11, 5))
|
| 81 |
+
ax1.bar(yearly["financial_year"], yearly["total_persondays"],
|
| 82 |
+
color="#2196F3", alpha=0.75, label="Person Days (lakh)")
|
| 83 |
+
ax1.set_ylabel("Total Person Days (lakh)", color="#2196F3")
|
| 84 |
+
ax1.tick_params(axis="y", labelcolor="#2196F3")
|
| 85 |
+
ax1.set_xlabel("Financial Year")
|
| 86 |
+
plt.title(f"MNREGA Trend β {scope} (Person Days)")
|
| 87 |
+
fig.tight_layout()
|
| 88 |
+
_save("01_statewide_trend.png")
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
# ββ 3. District rankings ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 92 |
+
|
| 93 |
+
def _plot_top_bottom_districts(df: pd.DataFrame, scope: str) -> None:
|
| 94 |
+
avg = df.groupby("district")["person_days_lakhs"].mean().sort_values(ascending=False)
|
| 95 |
+
n = min(10, len(avg) // 2)
|
| 96 |
+
top = avg.head(n)
|
| 97 |
+
bot = avg.tail(n).sort_values()
|
| 98 |
+
|
| 99 |
+
fig, axes = plt.subplots(1, 2, figsize=(14, max(5, n * 0.55)))
|
| 100 |
+
axes[0].barh(top.index, top.values, color="#4CAF50")
|
| 101 |
+
axes[0].set_title(f"Top {n} Districts")
|
| 102 |
+
axes[0].set_xlabel("Avg Person Days (lakh)")
|
| 103 |
+
axes[0].invert_yaxis()
|
| 104 |
+
|
| 105 |
+
axes[1].barh(bot.index, bot.values, color="#FF7043")
|
| 106 |
+
axes[1].set_title(f"Bottom {n} Districts")
|
| 107 |
+
axes[1].set_xlabel("Avg Person Days (lakh)")
|
| 108 |
+
axes[1].invert_yaxis()
|
| 109 |
+
|
| 110 |
+
plt.suptitle(f"MNREGA District Performance β {scope}", fontsize=13)
|
| 111 |
+
plt.tight_layout()
|
| 112 |
+
_save("02_district_performance_ranking.png")
|
| 113 |
+
|
| 114 |
+
print(f"\n[eda] Top 5 districts:")
|
| 115 |
+
for d, v in avg.head(5).items():
|
| 116 |
+
print(f" {d:35s}: {v:.2f} lakh")
|
| 117 |
+
print(f"[eda] Bottom 5 districts:")
|
| 118 |
+
for d, v in avg.tail(5).items():
|
| 119 |
+
print(f" {d:35s}: {v:.2f} lakh")
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
# ββ 4. Efficiency ranking βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 123 |
+
|
| 124 |
+
def _plot_efficiency_ranking(df: pd.DataFrame, scope: str) -> None:
|
| 125 |
+
if "expenditure_per_personday" not in df.columns:
|
| 126 |
+
print("[eda] Skipping efficiency ranking β expenditure_per_personday not in V3 features")
|
| 127 |
+
return
|
| 128 |
+
eff = (
|
| 129 |
+
df.groupby("district")["expenditure_per_personday"]
|
| 130 |
+
.mean().sort_values().dropna()
|
| 131 |
+
)
|
| 132 |
+
if len(eff) > 30:
|
| 133 |
+
eff = pd.concat([eff.head(15), eff.tail(15)])
|
| 134 |
+
fig, ax = plt.subplots(figsize=(10, max(6, len(eff) * 0.3)))
|
| 135 |
+
colors = ["#43A047" if v <= eff.median() else "#EF5350" for v in eff.values]
|
| 136 |
+
ax.barh(eff.index, eff.values, color=colors)
|
| 137 |
+
ax.axvline(eff.median(), color="navy", linestyle="--",
|
| 138 |
+
linewidth=1.5, label=f"Median: {eff.median():.1f}")
|
| 139 |
+
ax.set_title(f"Cost Efficiency β {scope}\n(Rs. expenditure per lakh persondays β lower is better)")
|
| 140 |
+
ax.set_xlabel("Rs. lakh per lakh persondays")
|
| 141 |
+
ax.legend()
|
| 142 |
+
plt.tight_layout()
|
| 143 |
+
_save("03_efficiency_ranking.png")
|
| 144 |
+
print(f"\n[eda] Most efficient : {eff.idxmin()} ({eff.min():.1f})")
|
| 145 |
+
print(f"[eda] Least efficient: {eff.idxmax()} ({eff.max():.1f})")
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
# ββ 5. COVID impact βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 149 |
+
|
| 150 |
+
def _plot_covid_impact(df: pd.DataFrame) -> None:
|
| 151 |
+
pre = df[df["financial_year"] == 2019].groupby("district")["person_days_lakhs"].mean()
|
| 152 |
+
post = df[df["financial_year"] == 2020].groupby("district")["person_days_lakhs"].mean()
|
| 153 |
+
common = pre.index.intersection(post.index)
|
| 154 |
+
change = ((post[common] - pre[common]) / pre[common] * 100).sort_values(ascending=False)
|
| 155 |
+
|
| 156 |
+
# Cap at 20 districts for readability
|
| 157 |
+
show = pd.concat([change.head(10), change.tail(10)]) if len(change) > 20 else change
|
| 158 |
+
|
| 159 |
+
fig, ax = plt.subplots(figsize=(10, max(6, len(show) * 0.35)))
|
| 160 |
+
colors = ["#388E3C" if v >= 0 else "#D32F2F" for v in show.values]
|
| 161 |
+
ax.barh(show.index, show.values, color=colors)
|
| 162 |
+
ax.axvline(0, color="black", linewidth=0.8)
|
| 163 |
+
ax.set_title("COVID Impact: % Change in Person Days\n(2019-20 to 2020-21)")
|
| 164 |
+
ax.set_xlabel("% Change")
|
| 165 |
+
plt.tight_layout()
|
| 166 |
+
_save("04_covid_impact.png")
|
| 167 |
+
|
| 168 |
+
print(f"\n[eda] COVID β biggest spike : {change.idxmax()} (+{change.max():.1f}%)")
|
| 169 |
+
print(f"[eda] COVID β least impacted : {change.idxmin()} ({change.min():.1f}%)")
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
# ββ 6. Correlation heatmap ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 173 |
+
|
| 174 |
+
def _plot_correlation_heatmap(df: pd.DataFrame) -> None:
|
| 175 |
+
candidates = [
|
| 176 |
+
"person_days_lakhs", "expenditure_lakhs", "avg_wage_rate",
|
| 177 |
+
"expenditure_per_personday", "lag_person_days", "yoy_growth",
|
| 178 |
+
"demand_fulfillment_rate", "district_avg_persondays",
|
| 179 |
+
"rainfall_mm", "poverty_rate_pct", "scheme_overlap_score",
|
| 180 |
+
"budget_utilization_rate"
|
| 181 |
+
]
|
| 182 |
+
cols = [c for c in candidates if c in df.columns]
|
| 183 |
+
corr = df[cols].corr()
|
| 184 |
+
|
| 185 |
+
fig, ax = plt.subplots(figsize=(11, 9))
|
| 186 |
+
mask = np.triu(np.ones_like(corr, dtype=bool))
|
| 187 |
+
sns.heatmap(corr, mask=mask, annot=True, fmt=".2f",
|
| 188 |
+
cmap="coolwarm", center=0, ax=ax,
|
| 189 |
+
linewidths=0.5, annot_kws={"size": 8})
|
| 190 |
+
ax.set_title("Feature Correlation Heatmap")
|
| 191 |
+
plt.tight_layout()
|
| 192 |
+
_save("05_correlation_heatmap.png")
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
# ββ Helper ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 196 |
+
|
| 197 |
+
def _save(filename: str) -> None:
|
| 198 |
+
path = os.path.join(FIGURES_DIR, filename)
|
| 199 |
+
plt.savefig(path, bbox_inches="tight")
|
| 200 |
+
plt.close()
|
| 201 |
+
print(f"[eda] Saved: {path}")
|
src/extract.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
extract.py
|
| 3 |
+
----------
|
| 4 |
+
Loads and validates the unified MNREGA CSV.
|
| 5 |
+
Supports both the synthetic unified dataset and any real CSV
|
| 6 |
+
that matches the schema.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import pandas as pd
|
| 10 |
+
|
| 11 |
+
REQUIRED_COLUMNS = {
|
| 12 |
+
"state", "district", "financial_year",
|
| 13 |
+
"person_days_lakhs", "expenditure_lakhs", "avg_wage_rate"
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
STAGE1_COLUMNS = REQUIRED_COLUMNS
|
| 17 |
+
STAGE2_COLUMNS = STAGE1_COLUMNS | {"rainfall_mm", "crop_season_index", "rural_population_lakhs", "poverty_rate_pct"}
|
| 18 |
+
STAGE3_COLUMNS = STAGE2_COLUMNS | {"pmkisan_beneficiaries", "pmay_houses_sanctioned", "budget_allocated_lakhs"}
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def load_csv(filepath: str, state_filter: str = None) -> pd.DataFrame:
|
| 22 |
+
"""
|
| 23 |
+
Load unified MNREGA CSV.
|
| 24 |
+
|
| 25 |
+
Args:
|
| 26 |
+
filepath : Path to CSV file.
|
| 27 |
+
state_filter : If provided, filter to a single state e.g. "Maharashtra".
|
| 28 |
+
Pass None for all-India (Stage 2+).
|
| 29 |
+
|
| 30 |
+
Returns:
|
| 31 |
+
Raw DataFrame.
|
| 32 |
+
"""
|
| 33 |
+
print(f"[extract] Loading: {filepath}")
|
| 34 |
+
try:
|
| 35 |
+
df = pd.read_csv(filepath)
|
| 36 |
+
except FileNotFoundError:
|
| 37 |
+
raise FileNotFoundError(f"[extract] File not found: {filepath}")
|
| 38 |
+
|
| 39 |
+
# Normalize column names
|
| 40 |
+
df.columns = [c.strip().lower().replace(" ", "_") for c in df.columns]
|
| 41 |
+
|
| 42 |
+
_validate_columns(df)
|
| 43 |
+
|
| 44 |
+
if state_filter:
|
| 45 |
+
before = len(df)
|
| 46 |
+
df = df[df["state"] == state_filter].reset_index(drop=True)
|
| 47 |
+
print(f"[extract] Filtered to '{state_filter}': {before} β {len(df)} rows")
|
| 48 |
+
|
| 49 |
+
print(f"[extract] Loaded {len(df)} rows | {df['state'].nunique()} state(s) | {df['district'].nunique()} districts | {df['financial_year'].nunique()} years")
|
| 50 |
+
print(f"[extract] Validation passed β")
|
| 51 |
+
return df
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def _validate_columns(df: pd.DataFrame) -> None:
|
| 55 |
+
actual = set(df.columns)
|
| 56 |
+
missing = REQUIRED_COLUMNS - actual
|
| 57 |
+
if missing:
|
| 58 |
+
raise ValueError(f"[extract] Missing required columns: {missing}")
|
src/features.py
ADDED
|
@@ -0,0 +1,320 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
features.py
|
| 3 |
+
-----------
|
| 4 |
+
V3 leak-free feature engineering for MNREGA district-level forecasting.
|
| 5 |
+
|
| 6 |
+
LEAKAGE AUDIT (what was removed vs original):
|
| 7 |
+
REMOVED β works_completed : r=1.00 with target (formula of person_days)
|
| 8 |
+
REMOVED β expenditure_lakhs : r=0.976 (person_days Γ wage_rate)
|
| 9 |
+
REMOVED β budget_allocated_lakhs : r=0.976 (derived from expenditure)
|
| 10 |
+
REMOVED β households_demanded/offered/availed : r=0.94 (copies of target structure)
|
| 11 |
+
REMOVED β lag_expenditure : r=0.866 (derived from target)
|
| 12 |
+
REMOVED β district_avg_persondays : replaced with blended_capacity (safer)
|
| 13 |
+
REMOVED β yoy_growth : computed from current-year target β leaky
|
| 14 |
+
REMOVED β demand_fulfillment_rate : uses current-year availed (target-correlated)
|
| 15 |
+
REMOVED β all synthetic columns : rainfall, poverty, pmkisan, pmay (fabricated)
|
| 16 |
+
|
| 17 |
+
V3 FEATURES (all computed from lagged/historical values only):
|
| 18 |
+
lag1_pd : person_days_lakhs shifted 1 year per district
|
| 19 |
+
lag2_pd : shifted 2 years
|
| 20 |
+
lag3_pd : shifted 3 years
|
| 21 |
+
roll2_mean : 2-year rolling mean of lag1
|
| 22 |
+
roll3_mean : 3-year rolling mean of lag1
|
| 23 |
+
roll3_std : 3-year rolling std of lag1 (volatility)
|
| 24 |
+
lag1_adj : lag1 deflated by COVID multiplier when lag year = 2020
|
| 25 |
+
lag_yoy : YoY growth of lag1 vs lag2 (historical, not current)
|
| 26 |
+
lag2_yoy : YoY growth of lag2 vs lag3
|
| 27 |
+
momentum : lag_yoy - lag2_yoy (acceleration)
|
| 28 |
+
district_capacity : expanding mean of lag1 (long-run structural level)
|
| 29 |
+
blended_capacity : district_capacity blended with state mean when history < 3yr
|
| 30 |
+
relative_to_state : lag1 / state-year lag1 mean (district's share)
|
| 31 |
+
state_lag1_norm : state total lag1 / state historical mean
|
| 32 |
+
lag1_vs_capacity : lag1 / district_capacity (how anomalous last year was)
|
| 33 |
+
lag1_zscore : z-score of lag1 vs district expanding history
|
| 34 |
+
state_lag1_zscore : z-score of state-level lag1
|
| 35 |
+
lag1_extreme : flag when |lag1_zscore| > 2.5
|
| 36 |
+
lag1_is_covid : flag when lag year = 2020
|
| 37 |
+
history_length : cumulative count of observations per district
|
| 38 |
+
avg_wage_rate : official wage schedule (genuinely exogenous)
|
| 39 |
+
wage_yoy : year-on-year % change in wage rate
|
| 40 |
+
is_covid : flag for FY 2020 (COVID demand shock year)
|
| 41 |
+
is_post_covid : flag for FY >= 2021
|
| 42 |
+
is_2022_anomaly : flag for FY 2022 (West Bengal + others reporting anomaly)
|
| 43 |
+
year_trend : years since dataset start (linear time trend)
|
| 44 |
+
state_enc : label-encoded state
|
| 45 |
+
district_enc : label-encoded district (state|district composite)
|
| 46 |
+
|
| 47 |
+
Walk-forward CV results (GBR, max_depth=4, lr=0.03, n_est=200, subsample=0.7):
|
| 48 |
+
Mean RΒ² : 0.7722 (excl. 2022: 0.8618)
|
| 49 |
+
Mean MAE : 10.68L
|
| 50 |
+
Old RΒ² : 0.9963 β was leakage from works_completed (r=1.0)
|
| 51 |
+
"""
|
| 52 |
+
|
| 53 |
+
import pandas as pd
|
| 54 |
+
import numpy as np
|
| 55 |
+
from sklearn.preprocessing import LabelEncoder
|
| 56 |
+
|
| 57 |
+
# COVID multiplier: how much 2020 inflated vs 2019 nationally
|
| 58 |
+
# Computed from real data: 55.01L / 38.04L = 1.447
|
| 59 |
+
COVID_MULTIPLIER = 1.447
|
| 60 |
+
|
| 61 |
+
TARGET = "person_days_lakhs"
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def build_features(df: pd.DataFrame) -> pd.DataFrame:
|
| 65 |
+
"""
|
| 66 |
+
Main entry point. Takes a cleaned DataFrame and returns it with
|
| 67 |
+
all V3 features added. Drops rows with no lag1/lag2 (first 1-2 years
|
| 68 |
+
per district cannot be used for training).
|
| 69 |
+
|
| 70 |
+
Args:
|
| 71 |
+
df : Cleaned DataFrame with at minimum:
|
| 72 |
+
state, district, financial_year, person_days_lakhs,
|
| 73 |
+
households_availed, avg_wage_rate
|
| 74 |
+
|
| 75 |
+
Returns:
|
| 76 |
+
Feature-engineered DataFrame ready for model training/inference.
|
| 77 |
+
"""
|
| 78 |
+
print("[features] Building V3 leak-free features...")
|
| 79 |
+
|
| 80 |
+
df = df.sort_values(["state", "district", "financial_year"]).reset_index(drop=True)
|
| 81 |
+
|
| 82 |
+
df = _lag_features(df)
|
| 83 |
+
df = _rolling_features(df)
|
| 84 |
+
df = _covid_features(df)
|
| 85 |
+
df = _trend_features(df)
|
| 86 |
+
df = _capacity_features(df)
|
| 87 |
+
df = _anomaly_features(df)
|
| 88 |
+
df = _state_features(df)
|
| 89 |
+
df = _temporal_flags(df)
|
| 90 |
+
df = _wage_features(df)
|
| 91 |
+
df = _encode_categoricals(df)
|
| 92 |
+
|
| 93 |
+
# Drop rows with no lag1/lag2 β cannot train or predict without history
|
| 94 |
+
before = len(df)
|
| 95 |
+
df = df.dropna(subset=["lag1_pd", "lag2_pd"]).reset_index(drop=True)
|
| 96 |
+
print(f"[features] Dropped {before - len(df)} rows (insufficient history)")
|
| 97 |
+
print(f"[features] Done. Final shape: {df.shape}")
|
| 98 |
+
return df
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
# ββ Lag features ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 102 |
+
|
| 103 |
+
def _lag_features(df: pd.DataFrame) -> pd.DataFrame:
|
| 104 |
+
grp = df.groupby(["state", "district"])
|
| 105 |
+
df["lag1_pd"] = grp[TARGET].shift(1)
|
| 106 |
+
df["lag2_pd"] = grp[TARGET].shift(2)
|
| 107 |
+
df["lag3_pd"] = grp[TARGET].shift(3)
|
| 108 |
+
df["lag1_hh"] = grp["households_availed"].shift(1)
|
| 109 |
+
return df
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
# ββ Rolling statistics (computed on lag1, so no leakage) βββββββββββββββββββββ
|
| 113 |
+
|
| 114 |
+
def _rolling_features(df: pd.DataFrame) -> pd.DataFrame:
|
| 115 |
+
lag1 = df.groupby(["state", "district"])["lag1_pd"]
|
| 116 |
+
df["roll2_mean"] = lag1.transform(lambda s: s.rolling(2, min_periods=1).mean())
|
| 117 |
+
df["roll3_mean"] = lag1.transform(lambda s: s.rolling(3, min_periods=1).mean())
|
| 118 |
+
df["roll3_std"] = lag1.transform(
|
| 119 |
+
lambda s: s.rolling(3, min_periods=1).std().fillna(0)
|
| 120 |
+
)
|
| 121 |
+
return df
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
# ββ COVID-aware lag adjustment ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 125 |
+
|
| 126 |
+
def _covid_features(df: pd.DataFrame) -> pd.DataFrame:
|
| 127 |
+
"""
|
| 128 |
+
When predicting year T and lag1 comes from 2020 (COVID spike),
|
| 129 |
+
the model would otherwise extrapolate the spike forward. We:
|
| 130 |
+
1. Flag that lag1 is a COVID year value.
|
| 131 |
+
2. Provide a deflated version (lag1_adj) so the model has a
|
| 132 |
+
COVID-corrected signal alongside the raw lag1.
|
| 133 |
+
"""
|
| 134 |
+
df["lag1_is_covid"] = (df["financial_year"] - 1 == 2020).astype(int)
|
| 135 |
+
df["lag1_adj"] = np.where(
|
| 136 |
+
df["lag1_is_covid"] == 1,
|
| 137 |
+
df["lag1_pd"] / COVID_MULTIPLIER,
|
| 138 |
+
df["lag1_pd"]
|
| 139 |
+
)
|
| 140 |
+
return df
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
# ββ YoY trend / momentum (all historical β no current-year leakage) βββββββββββ
|
| 144 |
+
|
| 145 |
+
def _trend_features(df: pd.DataFrame) -> pd.DataFrame:
|
| 146 |
+
df["lag_yoy"] = (
|
| 147 |
+
(df["lag1_pd"] - df["lag2_pd"]) / df["lag2_pd"].replace(0, np.nan)
|
| 148 |
+
).clip(-1, 3)
|
| 149 |
+
df["lag2_yoy"] = (
|
| 150 |
+
(df["lag2_pd"] - df["lag3_pd"]) / df["lag3_pd"].replace(0, np.nan)
|
| 151 |
+
).clip(-1, 3)
|
| 152 |
+
df["momentum"] = df["lag_yoy"] - df["lag2_yoy"]
|
| 153 |
+
return df
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
# ββ District structural capacity ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 157 |
+
|
| 158 |
+
def _capacity_features(df: pd.DataFrame) -> pd.DataFrame:
|
| 159 |
+
"""
|
| 160 |
+
district_capacity: expanding mean of lag1 β the district's long-run level.
|
| 161 |
+
blended_capacity : when history is short (<3 years), blend district mean
|
| 162 |
+
with state mean to reduce cold-start noise.
|
| 163 |
+
"""
|
| 164 |
+
df["district_capacity"] = df.groupby(["state", "district"])["lag1_pd"].transform(
|
| 165 |
+
lambda s: s.expanding().mean()
|
| 166 |
+
)
|
| 167 |
+
df["history_length"] = df.groupby(["state", "district"]).cumcount()
|
| 168 |
+
|
| 169 |
+
state_mean = df.groupby(["state", "financial_year"])["lag1_pd"].transform("mean")
|
| 170 |
+
df["blended_capacity"] = np.where(
|
| 171 |
+
df["history_length"] < 3,
|
| 172 |
+
0.5 * df["district_capacity"].fillna(state_mean) + 0.5 * state_mean,
|
| 173 |
+
df["district_capacity"]
|
| 174 |
+
)
|
| 175 |
+
|
| 176 |
+
# How anomalous was last year vs the district's own history?
|
| 177 |
+
df["lag1_vs_capacity"] = (
|
| 178 |
+
df["lag1_pd"] / df["blended_capacity"].replace(0, np.nan)
|
| 179 |
+
).clip(0, 5).fillna(1.0)
|
| 180 |
+
|
| 181 |
+
# Lagged household ratio (demand signal β uses only lagged values)
|
| 182 |
+
df["lag1_hh_ratio"] = (
|
| 183 |
+
df["lag1_hh"] / df["blended_capacity"].replace(0, np.nan)
|
| 184 |
+
).clip(0, 5).fillna(1.0)
|
| 185 |
+
|
| 186 |
+
return df
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
# ββ Anomaly detection βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 190 |
+
|
| 191 |
+
def _rolling_zscore(s: pd.Series) -> pd.Series:
|
| 192 |
+
"""Z-score of each value vs its own expanding historical mean/std."""
|
| 193 |
+
exp_mean = s.shift(1).expanding().mean()
|
| 194 |
+
exp_std = s.shift(1).expanding().std().fillna(1).replace(0, 1)
|
| 195 |
+
return ((s - exp_mean) / exp_std).clip(-4, 4)
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
def _anomaly_features(df: pd.DataFrame) -> pd.DataFrame:
|
| 199 |
+
"""
|
| 200 |
+
Detect when lag1_pd is anomalous for this district or state.
|
| 201 |
+
The model uses these to discount or adjust its reliance on lag1
|
| 202 |
+
when it was an outlier year (e.g. West Bengal in 2022).
|
| 203 |
+
"""
|
| 204 |
+
# District-level z-score of lag1
|
| 205 |
+
df["lag1_zscore"] = df.groupby(["state", "district"])[TARGET].transform(
|
| 206 |
+
lambda s: _rolling_zscore(s).shift(1)
|
| 207 |
+
).fillna(0)
|
| 208 |
+
|
| 209 |
+
df["lag1_extreme"] = (df["lag1_zscore"].abs() > 2.5).astype(int)
|
| 210 |
+
|
| 211 |
+
return df
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
# ββ State-level features ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 215 |
+
|
| 216 |
+
def _state_features(df: pd.DataFrame) -> pd.DataFrame:
|
| 217 |
+
"""
|
| 218 |
+
State-level lag and z-score. Captures state budget decisions and
|
| 219 |
+
policy changes that affect all districts simultaneously.
|
| 220 |
+
"""
|
| 221 |
+
# State total person_days per year
|
| 222 |
+
state_yr = (
|
| 223 |
+
df.groupby(["state", "financial_year"])[TARGET]
|
| 224 |
+
.sum().reset_index()
|
| 225 |
+
.rename(columns={TARGET: "state_total"})
|
| 226 |
+
)
|
| 227 |
+
state_yr["state_total_lag1"] = state_yr.groupby("state")["state_total"].shift(1)
|
| 228 |
+
|
| 229 |
+
# State z-score of lag1
|
| 230 |
+
state_yr["state_lag1_zscore"] = state_yr.groupby("state")["state_total"].transform(
|
| 231 |
+
lambda s: _rolling_zscore(s)
|
| 232 |
+
)
|
| 233 |
+
|
| 234 |
+
# Normalised state lag (state lag relative to its own history)
|
| 235 |
+
state_hist_mean = state_yr.groupby("state")["state_total_lag1"].transform("mean")
|
| 236 |
+
state_yr["state_lag1_norm"] = (
|
| 237 |
+
state_yr["state_total_lag1"] / state_hist_mean.replace(0, np.nan)
|
| 238 |
+
).clip(0, 5).fillna(1.0)
|
| 239 |
+
|
| 240 |
+
df = df.merge(
|
| 241 |
+
state_yr[["state", "financial_year",
|
| 242 |
+
"state_lag1_zscore", "state_lag1_norm"]],
|
| 243 |
+
on=["state", "financial_year"],
|
| 244 |
+
how="left"
|
| 245 |
+
)
|
| 246 |
+
|
| 247 |
+
# District's position relative to state mean (its structural share)
|
| 248 |
+
state_yr_lag = df.groupby(["state", "financial_year"])["lag1_pd"].transform("mean")
|
| 249 |
+
df["relative_to_state"] = (
|
| 250 |
+
df["lag1_pd"] / state_yr_lag.replace(0, np.nan)
|
| 251 |
+
).clip(0, 10).fillna(1.0)
|
| 252 |
+
|
| 253 |
+
return df
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
# ββ Temporal flags ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 257 |
+
|
| 258 |
+
def _temporal_flags(df: pd.DataFrame) -> pd.DataFrame:
|
| 259 |
+
fy_min = df["financial_year"].min()
|
| 260 |
+
df["year_trend"] = df["financial_year"] - fy_min
|
| 261 |
+
df["is_covid"] = (df["financial_year"] == 2020).astype(int)
|
| 262 |
+
df["is_post_covid"] = (df["financial_year"] >= 2021).astype(int)
|
| 263 |
+
df["is_2022_anomaly"] = (df["financial_year"] == 2022).astype(int)
|
| 264 |
+
return df
|
| 265 |
+
|
| 266 |
+
|
| 267 |
+
# ββ Wage features βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 268 |
+
|
| 269 |
+
def _wage_features(df: pd.DataFrame) -> pd.DataFrame:
|
| 270 |
+
"""
|
| 271 |
+
avg_wage_rate is the official state-notified wage schedule β genuinely
|
| 272 |
+
exogenous (set by government, not derived from person_days).
|
| 273 |
+
wage_yoy captures the policy signal of wage revision speed.
|
| 274 |
+
"""
|
| 275 |
+
if "avg_wage_rate" not in df.columns:
|
| 276 |
+
return df
|
| 277 |
+
df["wage_yoy"] = (
|
| 278 |
+
df.groupby(["state", "district"])["avg_wage_rate"]
|
| 279 |
+
.pct_change(fill_method=None)
|
| 280 |
+
.fillna(0)
|
| 281 |
+
.clip(-0.2, 0.5)
|
| 282 |
+
)
|
| 283 |
+
return df
|
| 284 |
+
|
| 285 |
+
|
| 286 |
+
# ββ Categorical encoding ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 287 |
+
|
| 288 |
+
def _encode_categoricals(df: pd.DataFrame) -> pd.DataFrame:
|
| 289 |
+
le_state = LabelEncoder()
|
| 290 |
+
le_dist = LabelEncoder()
|
| 291 |
+
df["state_enc"] = le_state.fit_transform(df["state"].astype(str))
|
| 292 |
+
df["district_enc"] = le_dist.fit_transform(
|
| 293 |
+
(df["district"] + "|" + df["state"]).astype(str)
|
| 294 |
+
)
|
| 295 |
+
return df
|
| 296 |
+
|
| 297 |
+
|
| 298 |
+
# ββ Feature list for model ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 299 |
+
|
| 300 |
+
# Canonical lean feature set β chosen by permutation importance analysis.
|
| 301 |
+
# All features are computed from lagged/historical values only.
|
| 302 |
+
FEATURE_COLS = [
|
| 303 |
+
"lag1_pd",
|
| 304 |
+
"roll2_mean",
|
| 305 |
+
"roll3_mean",
|
| 306 |
+
"lag1_adj",
|
| 307 |
+
"lag2_pd",
|
| 308 |
+
"lag3_pd",
|
| 309 |
+
"roll3_std",
|
| 310 |
+
"state_lag1_norm",
|
| 311 |
+
"relative_to_state",
|
| 312 |
+
"blended_capacity",
|
| 313 |
+
"lag1_vs_capacity",
|
| 314 |
+
"state_lag1_zscore",
|
| 315 |
+
"state_enc",
|
| 316 |
+
"is_covid",
|
| 317 |
+
"lag1_is_covid",
|
| 318 |
+
"wage_yoy",
|
| 319 |
+
"avg_wage_rate",
|
| 320 |
+
]
|
src/generate_synthetic.py
ADDED
|
@@ -0,0 +1,184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
generate_synthetic.py
|
| 3 |
+
----------------------
|
| 4 |
+
Generates realistic synthetic MNREGA district-level data for Maharashtra.
|
| 5 |
+
|
| 6 |
+
Mimics the structure of real data available from:
|
| 7 |
+
- nregarep1.nic.in (MoRD official portal)
|
| 8 |
+
- dataful.in (district-wise persondays + expenditure)
|
| 9 |
+
|
| 10 |
+
Columns produced match what you'd get from real sources:
|
| 11 |
+
state, district, financial_year,
|
| 12 |
+
households_demanded, households_offered, households_availed,
|
| 13 |
+
person_days, expenditure_lakhs, avg_wage_rate, works_completed
|
| 14 |
+
|
| 15 |
+
Design principles for realism:
|
| 16 |
+
- Each district has a stable "base capacity" (some districts are
|
| 17 |
+
structurally larger / more active than others)
|
| 18 |
+
- Year-on-year growth follows real MNREGA trends (spike in 2020-21
|
| 19 |
+
due to COVID reverse migration, slowdown in urban-adjacent districts)
|
| 20 |
+
- Expenditure correlates with person_days but has noise (efficiency varies)
|
| 21 |
+
- Wage rate increases over years (matches real wage revision schedule)
|
| 22 |
+
- ~8% missing values injected randomly to simulate real data quality
|
| 23 |
+
"""
|
| 24 |
+
|
| 25 |
+
import numpy as np
|
| 26 |
+
import pandas as pd
|
| 27 |
+
import os
|
| 28 |
+
|
| 29 |
+
# ββ Maharashtra districts (all 36) βββββββββββββββββββββββββββββββββββββββββββ
|
| 30 |
+
MAHARASHTRA_DISTRICTS = [
|
| 31 |
+
"Ahmednagar", "Akola", "Amravati", "Aurangabad", "Beed",
|
| 32 |
+
"Bhandara", "Buldhana", "Chandrapur", "Dhule", "Gadchiroli",
|
| 33 |
+
"Gondia", "Hingoli", "Jalgaon", "Jalna", "Kolhapur",
|
| 34 |
+
"Latur", "Mumbai City", "Mumbai Suburban", "Nagpur", "Nanded",
|
| 35 |
+
"Nandurbar", "Nashik", "Osmanabad", "Palghar", "Parbhani",
|
| 36 |
+
"Pune", "Raigad", "Ratnagiri", "Sangli", "Satara",
|
| 37 |
+
"Sindhudurg", "Solapur", "Thane", "Wardha", "Washim", "Yavatmal"
|
| 38 |
+
]
|
| 39 |
+
|
| 40 |
+
YEARS = [
|
| 41 |
+
"2014-15", "2015-16", "2016-17", "2017-18", "2018-19",
|
| 42 |
+
"2019-20", "2020-21", "2021-22", "2022-23", "2023-24"
|
| 43 |
+
]
|
| 44 |
+
|
| 45 |
+
# Real MNREGA wage rates in Maharashtra (approx βΉ/day by year)
|
| 46 |
+
WAGE_RATES = {
|
| 47 |
+
"2014-15": 162, "2015-16": 174, "2016-17": 183, "2017-18": 194,
|
| 48 |
+
"2018-19": 203, "2019-20": 213, "2020-21": 238, "2021-22": 256,
|
| 49 |
+
"2022-23": 273, "2023-24": 289
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
# Year-level demand multipliers based on real MNREGA trends
|
| 53 |
+
# COVID year (2020-21) saw massive spike due to reverse migration
|
| 54 |
+
YEAR_MULTIPLIERS = {
|
| 55 |
+
"2014-15": 0.85, "2015-16": 0.90, "2016-17": 0.92, "2017-18": 0.95,
|
| 56 |
+
"2018-19": 1.00, "2019-20": 1.05, "2020-21": 1.45, "2021-22": 1.20,
|
| 57 |
+
"2022-23": 1.10, "2023-24": 1.08
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
# District profile: (base_persondays_lakhs, efficiency_score, rural_weight)
|
| 61 |
+
# Urban/peri-urban districts have lower base; tribal/rural have higher
|
| 62 |
+
DISTRICT_PROFILES = {
|
| 63 |
+
"Gadchiroli": (18.5, 0.72, 0.95),
|
| 64 |
+
"Nandurbar": (16.2, 0.68, 0.93),
|
| 65 |
+
"Yavatmal": (15.8, 0.74, 0.91),
|
| 66 |
+
"Amravati": (14.3, 0.76, 0.88),
|
| 67 |
+
"Chandrapur": (13.9, 0.71, 0.87),
|
| 68 |
+
"Washim": (12.1, 0.73, 0.89),
|
| 69 |
+
"Buldhana": (11.8, 0.75, 0.86),
|
| 70 |
+
"Beed": (11.5, 0.70, 0.90),
|
| 71 |
+
"Hingoli": (10.9, 0.72, 0.88),
|
| 72 |
+
"Osmanabad": (10.7, 0.69, 0.87),
|
| 73 |
+
"Latur": (10.4, 0.71, 0.85),
|
| 74 |
+
"Nanded": (10.2, 0.73, 0.84),
|
| 75 |
+
"Jalna": (9.8, 0.74, 0.85),
|
| 76 |
+
"Parbhani": (9.5, 0.72, 0.84),
|
| 77 |
+
"Akola": (9.3, 0.75, 0.83),
|
| 78 |
+
"Dhule": (9.1, 0.70, 0.85),
|
| 79 |
+
"Gondia": (8.9, 0.76, 0.82),
|
| 80 |
+
"Bhandara": (8.6, 0.74, 0.81),
|
| 81 |
+
"Wardha": (8.3, 0.77, 0.80),
|
| 82 |
+
"Ahmednagar": (8.1, 0.78, 0.79),
|
| 83 |
+
"Solapur": (7.9, 0.76, 0.80),
|
| 84 |
+
"Aurangabad": (7.6, 0.79, 0.75),
|
| 85 |
+
"Jalgaon": (7.4, 0.77, 0.77),
|
| 86 |
+
"Nashik": (7.1, 0.80, 0.73),
|
| 87 |
+
"Satara": (6.8, 0.81, 0.74),
|
| 88 |
+
"Sangli": (6.5, 0.80, 0.73),
|
| 89 |
+
"Kolhapur": (6.2, 0.82, 0.71),
|
| 90 |
+
"Palghar": (6.0, 0.75, 0.78),
|
| 91 |
+
"Nandurbar": (5.8, 0.71, 0.82),
|
| 92 |
+
"Ratnagiri": (5.5, 0.79, 0.74),
|
| 93 |
+
"Sindhudurg": (5.1, 0.80, 0.72),
|
| 94 |
+
"Raigad": (4.8, 0.78, 0.68),
|
| 95 |
+
"Pune": (4.2, 0.83, 0.55),
|
| 96 |
+
"Thane": (3.5, 0.81, 0.45),
|
| 97 |
+
"Mumbai Suburban": (1.2, 0.85, 0.15),
|
| 98 |
+
"Mumbai City": (0.4, 0.88, 0.05),
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
def generate(seed: int = 42, missing_rate: float = 0.08) -> pd.DataFrame:
|
| 103 |
+
"""
|
| 104 |
+
Generate a synthetic MNREGA dataset for Maharashtra.
|
| 105 |
+
|
| 106 |
+
Args:
|
| 107 |
+
seed : Random seed for reproducibility.
|
| 108 |
+
missing_rate: Fraction of cells to nullify (simulates real data gaps).
|
| 109 |
+
|
| 110 |
+
Returns:
|
| 111 |
+
DataFrame with realistic MNREGA data.
|
| 112 |
+
"""
|
| 113 |
+
rng = np.random.default_rng(seed)
|
| 114 |
+
records = []
|
| 115 |
+
|
| 116 |
+
for district in MAHARASHTRA_DISTRICTS:
|
| 117 |
+
profile = DISTRICT_PROFILES.get(district, (7.0, 0.75, 0.70))
|
| 118 |
+
base_pd, efficiency, rural_w = profile
|
| 119 |
+
|
| 120 |
+
for year in YEARS:
|
| 121 |
+
year_mult = YEAR_MULTIPLIERS[year]
|
| 122 |
+
wage = WAGE_RATES[year]
|
| 123 |
+
|
| 124 |
+
# ββ Person days (in lakhs) ββββββββββββββββββββββββββββββββββββ
|
| 125 |
+
noise = rng.normal(1.0, 0.07)
|
| 126 |
+
person_days_lakhs = base_pd * year_mult * noise
|
| 127 |
+
person_days_lakhs = max(person_days_lakhs, 0.1)
|
| 128 |
+
|
| 129 |
+
# ββ Households βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 130 |
+
# Avg ~45 days per household β households = person_days / 45
|
| 131 |
+
hh_demanded = int(person_days_lakhs * 1e5 / 38 * rng.uniform(1.05, 1.15))
|
| 132 |
+
hh_offered = int(hh_demanded * rng.uniform(0.92, 0.99))
|
| 133 |
+
hh_availed = int(hh_offered * rng.uniform(0.88, 0.97))
|
| 134 |
+
|
| 135 |
+
# ββ Expenditure (βΉ lakhs) ββββββββββββββββββββββββββββββββββββ
|
| 136 |
+
# Base = person_days * wage_rate, efficiency introduces noise
|
| 137 |
+
base_expenditure = person_days_lakhs * 1e5 * wage / 1e5
|
| 138 |
+
expenditure_lakhs = base_expenditure / efficiency * rng.uniform(0.93, 1.07)
|
| 139 |
+
|
| 140 |
+
# ββ Works completed ββββββββββββββββββββββββββββββββββββββββββ
|
| 141 |
+
works = int(person_days_lakhs * rng.uniform(18, 35))
|
| 142 |
+
|
| 143 |
+
records.append({
|
| 144 |
+
"state": "Maharashtra",
|
| 145 |
+
"district": district,
|
| 146 |
+
"financial_year": year,
|
| 147 |
+
"households_demanded": hh_demanded,
|
| 148 |
+
"households_offered": hh_offered,
|
| 149 |
+
"households_availed": hh_availed,
|
| 150 |
+
"person_days_lakhs": round(person_days_lakhs, 3),
|
| 151 |
+
"expenditure_lakhs": round(expenditure_lakhs, 2),
|
| 152 |
+
"avg_wage_rate": wage,
|
| 153 |
+
"works_completed": works,
|
| 154 |
+
})
|
| 155 |
+
|
| 156 |
+
df = pd.DataFrame(records)
|
| 157 |
+
|
| 158 |
+
# ββ Inject realistic missing values ββββββββββββββββββββββββββββββββββββββ
|
| 159 |
+
nullable_cols = [
|
| 160 |
+
"households_demanded", "households_offered",
|
| 161 |
+
"households_availed", "works_completed"
|
| 162 |
+
]
|
| 163 |
+
for col in nullable_cols:
|
| 164 |
+
mask = rng.random(len(df)) < missing_rate
|
| 165 |
+
df.loc[mask, col] = np.nan
|
| 166 |
+
|
| 167 |
+
print(f"[generate] Created {len(df)} rows Γ {len(df.columns)} columns")
|
| 168 |
+
print(f"[generate] Districts: {df['district'].nunique()} | Years: {df['financial_year'].nunique()}")
|
| 169 |
+
print(f"[generate] Missing values injected: ~{missing_rate*100:.0f}% per nullable column")
|
| 170 |
+
|
| 171 |
+
return df
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
def save(df: pd.DataFrame, path: str = "data/raw/mnrega_maharashtra_synthetic.csv") -> None:
|
| 175 |
+
os.makedirs(os.path.dirname(path), exist_ok=True)
|
| 176 |
+
df.to_csv(path, index=False)
|
| 177 |
+
print(f"[generate] Saved β {path}")
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
if __name__ == "__main__":
|
| 181 |
+
df = generate()
|
| 182 |
+
save(df)
|
| 183 |
+
print("\nSample:")
|
| 184 |
+
print(df.head(6).to_string(index=False))
|
src/model.py
ADDED
|
@@ -0,0 +1,656 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
model.py
|
| 3 |
+
--------
|
| 4 |
+
V4 Multi-Algorithm Model Selection for MNREGA district-level forecasting.
|
| 5 |
+
|
| 6 |
+
Algorithms compared via walk-forward CV:
|
| 7 |
+
- GradientBoostingRegressor (current champion)
|
| 8 |
+
- RandomForestRegressor
|
| 9 |
+
- XGBoost
|
| 10 |
+
- LightGBM
|
| 11 |
+
- Ridge (linear baseline)
|
| 12 |
+
- ElasticNet (regularised linear baseline)
|
| 13 |
+
|
| 14 |
+
Selection criterion: mean RΒ² across walk-forward CV years (excl. 2022 anomaly).
|
| 15 |
+
Best model is saved to models/mnrega_best_model.pkl.
|
| 16 |
+
|
| 17 |
+
W&B logging:
|
| 18 |
+
- Each algorithm gets its own W&B run (group="mnrega_model_selection")
|
| 19 |
+
- Per-year CV metrics logged as time-series
|
| 20 |
+
- Feature importance logged as bar chart
|
| 21 |
+
- Model comparison summary table logged
|
| 22 |
+
- Best model flagged with tag "champion"
|
| 23 |
+
|
| 24 |
+
Usage:
|
| 25 |
+
export WANDB_API_KEY=your_key # or wandb login
|
| 26 |
+
python main.py --stage 3
|
| 27 |
+
"""
|
| 28 |
+
|
| 29 |
+
import os
|
| 30 |
+
import pickle
|
| 31 |
+
import warnings
|
| 32 |
+
import numpy as np
|
| 33 |
+
import pandas as pd
|
| 34 |
+
import matplotlib
|
| 35 |
+
matplotlib.use("Agg")
|
| 36 |
+
import matplotlib.pyplot as plt
|
| 37 |
+
|
| 38 |
+
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
|
| 39 |
+
from sklearn.linear_model import Ridge, ElasticNet
|
| 40 |
+
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
|
| 41 |
+
from sklearn.preprocessing import StandardScaler
|
| 42 |
+
from sklearn.pipeline import Pipeline
|
| 43 |
+
|
| 44 |
+
warnings.filterwarnings("ignore")
|
| 45 |
+
|
| 46 |
+
# Optional imports β graceful fallback if not installed
|
| 47 |
+
try:
|
| 48 |
+
from xgboost import XGBRegressor
|
| 49 |
+
HAS_XGB = True
|
| 50 |
+
except ImportError:
|
| 51 |
+
HAS_XGB = False
|
| 52 |
+
print("[model] xgboost not installed β skipping")
|
| 53 |
+
|
| 54 |
+
try:
|
| 55 |
+
from lightgbm import LGBMRegressor
|
| 56 |
+
HAS_LGB = True
|
| 57 |
+
except ImportError:
|
| 58 |
+
HAS_LGB = False
|
| 59 |
+
print("[model] lightgbm not installed β skipping")
|
| 60 |
+
|
| 61 |
+
try:
|
| 62 |
+
import wandb
|
| 63 |
+
HAS_WANDB = True
|
| 64 |
+
except ImportError:
|
| 65 |
+
HAS_WANDB = False
|
| 66 |
+
print("[model] wandb not installed β metrics will be logged locally only")
|
| 67 |
+
|
| 68 |
+
from src.features import FEATURE_COLS
|
| 69 |
+
|
| 70 |
+
TARGET = "person_days_lakhs"
|
| 71 |
+
FIGURES_DIR = os.path.join("reports", "figures")
|
| 72 |
+
OUTPUT_DIR = os.path.join("data", "processed")
|
| 73 |
+
MODELS_DIR = "models"
|
| 74 |
+
MODEL_PATH = os.path.join(MODELS_DIR, "mnrega_best_model.pkl")
|
| 75 |
+
WANDB_PROJECT = "SchemeImpactNet"
|
| 76 |
+
WANDB_GROUP = "mnrega_model_selection"
|
| 77 |
+
|
| 78 |
+
os.makedirs(FIGURES_DIR, exist_ok=True)
|
| 79 |
+
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
| 80 |
+
os.makedirs(MODELS_DIR, exist_ok=True)
|
| 81 |
+
|
| 82 |
+
# Walk-forward CV test years
|
| 83 |
+
WF_TEST_YEARS = [2018, 2019, 2020, 2021, 2022, 2023, 2024]
|
| 84 |
+
|
| 85 |
+
# ββ Algorithm registry ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 86 |
+
def _build_candidates() -> dict:
|
| 87 |
+
"""
|
| 88 |
+
Returns dict of {name: estimator}.
|
| 89 |
+
Each estimator is either a plain sklearn estimator or a Pipeline
|
| 90 |
+
(for linear models that need scaling).
|
| 91 |
+
"""
|
| 92 |
+
candidates = {
|
| 93 |
+
"GradientBoosting": GradientBoostingRegressor(
|
| 94 |
+
n_estimators=200, max_depth=4, learning_rate=0.03,
|
| 95 |
+
subsample=0.7, min_samples_leaf=10, random_state=42,
|
| 96 |
+
),
|
| 97 |
+
"RandomForest": RandomForestRegressor(
|
| 98 |
+
n_estimators=300, max_depth=8, min_samples_leaf=10,
|
| 99 |
+
n_jobs=-1, random_state=42,
|
| 100 |
+
),
|
| 101 |
+
"Ridge": Pipeline([
|
| 102 |
+
("scaler", StandardScaler()),
|
| 103 |
+
("model", Ridge(alpha=10.0)),
|
| 104 |
+
]),
|
| 105 |
+
"ElasticNet": Pipeline([
|
| 106 |
+
("scaler", StandardScaler()),
|
| 107 |
+
("model", ElasticNet(alpha=0.1, l1_ratio=0.5, max_iter=2000)),
|
| 108 |
+
]),
|
| 109 |
+
}
|
| 110 |
+
if HAS_XGB:
|
| 111 |
+
candidates["XGBoost"] = XGBRegressor(
|
| 112 |
+
n_estimators=200, max_depth=4, learning_rate=0.03,
|
| 113 |
+
subsample=0.7, colsample_bytree=0.8,
|
| 114 |
+
reg_alpha=0.1, reg_lambda=1.0,
|
| 115 |
+
random_state=42, verbosity=0,
|
| 116 |
+
)
|
| 117 |
+
if HAS_LGB:
|
| 118 |
+
candidates["LightGBM"] = LGBMRegressor(
|
| 119 |
+
n_estimators=200, max_depth=4, learning_rate=0.03,
|
| 120 |
+
subsample=0.7, colsample_bytree=0.8,
|
| 121 |
+
reg_alpha=0.1, reg_lambda=1.0,
|
| 122 |
+
random_state=42, verbosity=-1,
|
| 123 |
+
)
|
| 124 |
+
return candidates
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
# ββ Main entry point ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 128 |
+
|
| 129 |
+
def run_model(df: pd.DataFrame) -> pd.DataFrame:
|
| 130 |
+
"""
|
| 131 |
+
Full model selection pipeline:
|
| 132 |
+
1. Walk-forward CV for each algorithm candidate
|
| 133 |
+
2. Select best by mean RΒ² (excl. 2022)
|
| 134 |
+
3. Train winner on all data
|
| 135 |
+
4. Save model + metadata pkl
|
| 136 |
+
5. Generate figures + W&B logs
|
| 137 |
+
6. Return predictions DataFrame
|
| 138 |
+
"""
|
| 139 |
+
print("\n[model] ββ V4 Multi-Algorithm Model Selection βββββββββββββββ")
|
| 140 |
+
|
| 141 |
+
features = _get_features(df)
|
| 142 |
+
print(f"[model] Features ({len(features)}): {features}")
|
| 143 |
+
print(f"[model] Algorithms: {list(_build_candidates().keys())}")
|
| 144 |
+
|
| 145 |
+
candidates = _build_candidates()
|
| 146 |
+
|
| 147 |
+
# ββ Walk-forward CV for all candidates βββββββοΏ½οΏ½οΏ½ββββββββββββββββββββββββ
|
| 148 |
+
all_cv_results = {}
|
| 149 |
+
for name, estimator in candidates.items():
|
| 150 |
+
print(f"\n[model] ββ {name} ββ")
|
| 151 |
+
cv = _walk_forward_cv(df, features, estimator, name)
|
| 152 |
+
all_cv_results[name] = cv
|
| 153 |
+
|
| 154 |
+
# ββ Select best model βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 155 |
+
best_name, best_cv = _select_best(all_cv_results)
|
| 156 |
+
print(f"\n[model] β Best model: {best_name}")
|
| 157 |
+
|
| 158 |
+
# ββ Print full comparison table βββββββββββββββββββββββββββββββββββββββ
|
| 159 |
+
_print_comparison_table(all_cv_results)
|
| 160 |
+
|
| 161 |
+
# ββ Train winner on all data ββββββββββββββββββββββββββββββββββββββββββ
|
| 162 |
+
print(f"\n[model] Training {best_name} on all {len(df):,} district-years...")
|
| 163 |
+
best_estimator = candidates[best_name]
|
| 164 |
+
X_all = df[features].fillna(0)
|
| 165 |
+
y_all = df[TARGET]
|
| 166 |
+
best_estimator.fit(X_all, y_all)
|
| 167 |
+
|
| 168 |
+
# ββ Log to W&B ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 169 |
+
if HAS_WANDB:
|
| 170 |
+
_wandb_log_all(all_cv_results, best_name, best_estimator, features, df)
|
| 171 |
+
|
| 172 |
+
# ββ Save best model βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 173 |
+
_save_model(best_name, best_estimator, features, best_cv, all_cv_results, df)
|
| 174 |
+
|
| 175 |
+
# ββ Figures βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 176 |
+
_plot_model_comparison(all_cv_results, best_name)
|
| 177 |
+
_plot_cv_per_year(all_cv_results, best_name)
|
| 178 |
+
_plot_feature_importance(best_name, best_estimator, features)
|
| 179 |
+
|
| 180 |
+
# ββ Predictions + report ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 181 |
+
predictions_df = _predict_all(best_estimator, df, features)
|
| 182 |
+
_save_predictions(predictions_df)
|
| 183 |
+
_save_model_report(best_name, best_cv, all_cv_results, features, best_estimator)
|
| 184 |
+
|
| 185 |
+
print("\n[model] ββ V4 Pipeline Complete βββββββββββββββββββββββββββββ\n")
|
| 186 |
+
return predictions_df
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
# ββ Walk-forward CV βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 190 |
+
|
| 191 |
+
def _walk_forward_cv(
|
| 192 |
+
df: pd.DataFrame,
|
| 193 |
+
features: list,
|
| 194 |
+
estimator,
|
| 195 |
+
name: str,
|
| 196 |
+
) -> pd.DataFrame:
|
| 197 |
+
"""Walk-forward CV: train on years < T, evaluate on T."""
|
| 198 |
+
print(f" {'Year':<6} {'n':>5} {'RΒ²':>8} {'MAE':>8} {'RMSE':>8} {'Naive RΒ²':>10} {'RΒ² gain':>8}")
|
| 199 |
+
print(f" {'-'*68}")
|
| 200 |
+
|
| 201 |
+
rows = []
|
| 202 |
+
for test_yr in WF_TEST_YEARS:
|
| 203 |
+
tr = df[df["financial_year"] < test_yr]
|
| 204 |
+
te = df[df["financial_year"] == test_yr]
|
| 205 |
+
if len(tr) < 200 or len(te) < 50:
|
| 206 |
+
continue
|
| 207 |
+
|
| 208 |
+
import copy
|
| 209 |
+
m = copy.deepcopy(estimator)
|
| 210 |
+
m.fit(tr[features].fillna(0), tr[TARGET])
|
| 211 |
+
pred = m.predict(te[features].fillna(0))
|
| 212 |
+
naive = te["lag1_pd"].fillna(te[TARGET].mean()).values
|
| 213 |
+
|
| 214 |
+
r2 = r2_score(te[TARGET], pred)
|
| 215 |
+
mae = mean_absolute_error(te[TARGET], pred)
|
| 216 |
+
rmse = np.sqrt(mean_squared_error(te[TARGET], pred))
|
| 217 |
+
naive_r2 = r2_score(te[TARGET], naive)
|
| 218 |
+
naive_mae = mean_absolute_error(te[TARGET], naive)
|
| 219 |
+
mape = np.mean(np.abs((te[TARGET].values - pred) / (te[TARGET].values + 1e-9))) * 100
|
| 220 |
+
|
| 221 |
+
print(f" {test_yr:<6} {len(te):>5} {r2:>8.4f} {mae:>8.3f} {rmse:>8.3f} "
|
| 222 |
+
f"{naive_r2:>10.4f} {r2-naive_r2:>+8.4f}")
|
| 223 |
+
|
| 224 |
+
rows.append({
|
| 225 |
+
"year": test_yr, "n": len(te),
|
| 226 |
+
"r2": round(r2, 4),
|
| 227 |
+
"mae": round(mae, 3),
|
| 228 |
+
"rmse": round(rmse, 3),
|
| 229 |
+
"mape": round(mape, 3),
|
| 230 |
+
"naive_r2": round(naive_r2, 4),
|
| 231 |
+
"naive_mae": round(naive_mae, 3),
|
| 232 |
+
"r2_gain": round(r2 - naive_r2, 4),
|
| 233 |
+
"mae_gain": round(naive_mae - mae, 3),
|
| 234 |
+
})
|
| 235 |
+
|
| 236 |
+
cv = pd.DataFrame(rows)
|
| 237 |
+
ex22 = cv[cv["year"] != 2022]
|
| 238 |
+
print(f" β Mean RΒ²={cv['r2'].mean():.4f} excl.2022 RΒ²={ex22['r2'].mean():.4f} "
|
| 239 |
+
f"MAE={cv['mae'].mean():.3f}L")
|
| 240 |
+
return cv
|
| 241 |
+
|
| 242 |
+
|
| 243 |
+
# ββ Model selection βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 244 |
+
|
| 245 |
+
def _select_best(all_cv: dict) -> tuple:
|
| 246 |
+
"""Select best model by mean RΒ² excluding 2022 anomaly year."""
|
| 247 |
+
scores = {}
|
| 248 |
+
for name, cv in all_cv.items():
|
| 249 |
+
ex22 = cv[cv["year"] != 2022]
|
| 250 |
+
scores[name] = ex22["r2"].mean()
|
| 251 |
+
|
| 252 |
+
best_name = max(scores, key=scores.get)
|
| 253 |
+
print(f"\n[model] Model selection (mean RΒ² excl. 2022):")
|
| 254 |
+
for name, score in sorted(scores.items(), key=lambda x: -x[1]):
|
| 255 |
+
marker = " β BEST" if name == best_name else ""
|
| 256 |
+
print(f" {name:<20}: {score:.4f}{marker}")
|
| 257 |
+
|
| 258 |
+
return best_name, all_cv[best_name]
|
| 259 |
+
|
| 260 |
+
|
| 261 |
+
def _print_comparison_table(all_cv: dict) -> None:
|
| 262 |
+
print(f"\n[model] Full comparison (all years):")
|
| 263 |
+
print(f" {'Model':<20} {'RΒ²':>8} {'excl22 RΒ²':>10} {'MAE':>8} {'RMSE':>8} {'RΒ²gain':>8}")
|
| 264 |
+
print(f" {'-'*72}")
|
| 265 |
+
for name, cv in all_cv.items():
|
| 266 |
+
ex22 = cv[cv["year"] != 2022]
|
| 267 |
+
print(f" {name:<20} {cv['r2'].mean():>8.4f} {ex22['r2'].mean():>10.4f} "
|
| 268 |
+
f"{cv['mae'].mean():>8.3f} {cv['rmse'].mean():>8.3f} "
|
| 269 |
+
f"{cv['r2_gain'].mean():>+8.4f}")
|
| 270 |
+
|
| 271 |
+
|
| 272 |
+
# ββ W&B logging βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 273 |
+
|
| 274 |
+
def _wandb_log_all(
|
| 275 |
+
all_cv: dict,
|
| 276 |
+
best_name: str,
|
| 277 |
+
best_estimator,
|
| 278 |
+
features: list,
|
| 279 |
+
df: pd.DataFrame,
|
| 280 |
+
) -> None:
|
| 281 |
+
"""Log all model results to W&B β one run per algorithm + one summary run."""
|
| 282 |
+
|
| 283 |
+
# ββ Per-algorithm runs ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 284 |
+
for name, cv in all_cv.items():
|
| 285 |
+
ex22 = cv[cv["year"] != 2022]
|
| 286 |
+
tags = ["champion"] if name == best_name else []
|
| 287 |
+
|
| 288 |
+
run = wandb.init(
|
| 289 |
+
project=WANDB_PROJECT,
|
| 290 |
+
group=WANDB_GROUP,
|
| 291 |
+
name=name,
|
| 292 |
+
tags=tags,
|
| 293 |
+
config={
|
| 294 |
+
"algorithm": name,
|
| 295 |
+
"n_features": len(features),
|
| 296 |
+
"features": features,
|
| 297 |
+
"wf_test_years": WF_TEST_YEARS,
|
| 298 |
+
"target": TARGET,
|
| 299 |
+
"is_best": name == best_name,
|
| 300 |
+
},
|
| 301 |
+
reinit=True,
|
| 302 |
+
)
|
| 303 |
+
|
| 304 |
+
# Per-year CV metrics as time series
|
| 305 |
+
for _, row in cv.iterrows():
|
| 306 |
+
run.log({
|
| 307 |
+
"year": int(row["year"]),
|
| 308 |
+
"r2": row["r2"],
|
| 309 |
+
"mae": row["mae"],
|
| 310 |
+
"rmse": row["rmse"],
|
| 311 |
+
"mape": row["mape"],
|
| 312 |
+
"naive_r2": row["naive_r2"],
|
| 313 |
+
"r2_gain": row["r2_gain"],
|
| 314 |
+
"mae_gain": row["mae_gain"],
|
| 315 |
+
"is_anomaly_year": int(row["year"]) == 2022,
|
| 316 |
+
})
|
| 317 |
+
|
| 318 |
+
# Summary metrics
|
| 319 |
+
run.summary.update({
|
| 320 |
+
"cv_mean_r2": round(cv["r2"].mean(), 4),
|
| 321 |
+
"cv_ex22_r2": round(ex22["r2"].mean(), 4),
|
| 322 |
+
"cv_mean_mae": round(cv["mae"].mean(), 3),
|
| 323 |
+
"cv_mean_rmse": round(cv["rmse"].mean(), 3),
|
| 324 |
+
"cv_mean_mape": round(cv["mape"].mean(), 3),
|
| 325 |
+
"cv_r2_gain": round(cv["r2_gain"].mean(), 4),
|
| 326 |
+
"n_districts": df["district"].nunique(),
|
| 327 |
+
"n_states": df["state"].nunique(),
|
| 328 |
+
"train_years": len(df["financial_year"].unique()),
|
| 329 |
+
})
|
| 330 |
+
|
| 331 |
+
# Feature importance (tree-based only)
|
| 332 |
+
fi = _get_feature_importance(name, best_estimator if name == best_name else None, features)
|
| 333 |
+
if fi is not None and name == best_name:
|
| 334 |
+
fi_table = wandb.Table(
|
| 335 |
+
columns=["feature", "importance"],
|
| 336 |
+
data=[[f, v] for f, v in sorted(fi.items(), key=lambda x: -x[1])]
|
| 337 |
+
)
|
| 338 |
+
run.log({"feature_importance": wandb.plot.bar(
|
| 339 |
+
fi_table, "feature", "importance",
|
| 340 |
+
title=f"Feature Importance β {name}"
|
| 341 |
+
)})
|
| 342 |
+
|
| 343 |
+
# CV RΒ² chart per year
|
| 344 |
+
cv_table = wandb.Table(dataframe=cv[["year","r2","naive_r2","mae","rmse","r2_gain"]])
|
| 345 |
+
run.log({
|
| 346 |
+
"cv_results_table": cv_table,
|
| 347 |
+
"cv_r2_chart": wandb.plot.line_series(
|
| 348 |
+
xs=cv["year"].tolist(),
|
| 349 |
+
ys=[cv["r2"].tolist(), cv["naive_r2"].tolist()],
|
| 350 |
+
keys=["Model RΒ²", "Naive RΒ²"],
|
| 351 |
+
title=f"Walk-Forward CV RΒ² β {name}",
|
| 352 |
+
xname="Financial Year",
|
| 353 |
+
),
|
| 354 |
+
})
|
| 355 |
+
|
| 356 |
+
run.finish()
|
| 357 |
+
|
| 358 |
+
# ββ Summary comparison run ββββββββββββββββββββββββββββββββββββββββββββ
|
| 359 |
+
run = wandb.init(
|
| 360 |
+
project=WANDB_PROJECT,
|
| 361 |
+
group=WANDB_GROUP,
|
| 362 |
+
name="model_selection_summary",
|
| 363 |
+
tags=["summary"],
|
| 364 |
+
reinit=True,
|
| 365 |
+
)
|
| 366 |
+
|
| 367 |
+
summary_rows = []
|
| 368 |
+
for name, cv in all_cv.items():
|
| 369 |
+
ex22 = cv[cv["year"] != 2022]
|
| 370 |
+
summary_rows.append([
|
| 371 |
+
name,
|
| 372 |
+
round(cv["r2"].mean(), 4),
|
| 373 |
+
round(ex22["r2"].mean(), 4),
|
| 374 |
+
round(cv["mae"].mean(), 3),
|
| 375 |
+
round(cv["rmse"].mean(), 3),
|
| 376 |
+
round(cv["mape"].mean(), 3),
|
| 377 |
+
round(cv["r2_gain"].mean(), 4),
|
| 378 |
+
name == best_name,
|
| 379 |
+
])
|
| 380 |
+
|
| 381 |
+
summary_table = wandb.Table(
|
| 382 |
+
columns=["model", "mean_r2", "ex22_r2", "mean_mae",
|
| 383 |
+
"mean_rmse", "mean_mape", "r2_gain", "is_best"],
|
| 384 |
+
data=summary_rows,
|
| 385 |
+
)
|
| 386 |
+
run.log({
|
| 387 |
+
"model_comparison": summary_table,
|
| 388 |
+
"best_model": best_name,
|
| 389 |
+
"best_ex22_r2": round(all_cv[best_name][all_cv[best_name]["year"] != 2022]["r2"].mean(), 4),
|
| 390 |
+
})
|
| 391 |
+
|
| 392 |
+
# Comparison bar chart
|
| 393 |
+
run.log({
|
| 394 |
+
"r2_comparison": wandb.plot.bar(
|
| 395 |
+
wandb.Table(
|
| 396 |
+
columns=["model", "ex22_r2"],
|
| 397 |
+
data=[[r[0], r[2]] for r in summary_rows]
|
| 398 |
+
),
|
| 399 |
+
"model", "ex22_r2",
|
| 400 |
+
title="Model Comparison β RΒ² excl. 2022",
|
| 401 |
+
)
|
| 402 |
+
})
|
| 403 |
+
|
| 404 |
+
run.finish()
|
| 405 |
+
print(f"[model] W&B logs complete β project: {WANDB_PROJECT} / group: {WANDB_GROUP}")
|
| 406 |
+
|
| 407 |
+
|
| 408 |
+
# ββ Figures βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 409 |
+
|
| 410 |
+
def _plot_model_comparison(all_cv: dict, best_name: str) -> None:
|
| 411 |
+
"""Bar chart comparing all models on mean RΒ² (all years and excl. 2022)."""
|
| 412 |
+
names = list(all_cv.keys())
|
| 413 |
+
mean_r2 = [all_cv[n]["r2"].mean() for n in names]
|
| 414 |
+
ex22_r2 = [all_cv[n][all_cv[n]["year"] != 2022]["r2"].mean() for n in names]
|
| 415 |
+
mean_mae = [all_cv[n]["mae"].mean() for n in names]
|
| 416 |
+
|
| 417 |
+
x = np.arange(len(names))
|
| 418 |
+
w = 0.35
|
| 419 |
+
|
| 420 |
+
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
|
| 421 |
+
|
| 422 |
+
bars1 = ax1.bar(x - w/2, mean_r2, w, label="All years", alpha=0.8, color="#42A5F5")
|
| 423 |
+
bars2 = ax1.bar(x + w/2, ex22_r2, w, label="excl. 2022", alpha=0.8, color="#26A69A")
|
| 424 |
+
ax1.set_xticks(x); ax1.set_xticklabels(names, rotation=20, ha="right")
|
| 425 |
+
ax1.set_ylabel("Mean RΒ² (Walk-Forward CV)")
|
| 426 |
+
ax1.set_title("Model Comparison β RΒ² Score")
|
| 427 |
+
ax1.set_ylim(0, 1)
|
| 428 |
+
ax1.legend()
|
| 429 |
+
# Annotate best
|
| 430 |
+
best_idx = names.index(best_name)
|
| 431 |
+
ax1.annotate("β
BEST", xy=(best_idx + w/2, ex22_r2[best_idx] + 0.01),
|
| 432 |
+
ha="center", color="#E53935", fontsize=9, fontweight="bold")
|
| 433 |
+
|
| 434 |
+
bars3 = ax2.bar(x, mean_mae, alpha=0.8,
|
| 435 |
+
color=["#E53935" if n == best_name else "#78909C" for n in names])
|
| 436 |
+
ax2.set_xticks(x); ax2.set_xticklabels(names, rotation=20, ha="right")
|
| 437 |
+
ax2.set_ylabel("Mean MAE (lakh person-days)")
|
| 438 |
+
ax2.set_title("Model Comparison β MAE")
|
| 439 |
+
for bar in bars3:
|
| 440 |
+
ax2.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.1,
|
| 441 |
+
f"{bar.get_height():.2f}", ha="center", va="bottom", fontsize=8)
|
| 442 |
+
|
| 443 |
+
plt.suptitle("SchemeImpactNet V4 β Algorithm Selection Results", fontsize=12, fontweight="bold")
|
| 444 |
+
plt.tight_layout()
|
| 445 |
+
path = os.path.join(FIGURES_DIR, "06_model_comparison.png")
|
| 446 |
+
plt.savefig(path, dpi=150, bbox_inches="tight")
|
| 447 |
+
plt.close()
|
| 448 |
+
print(f"[model] Saved: {path}")
|
| 449 |
+
|
| 450 |
+
|
| 451 |
+
def _plot_cv_per_year(all_cv: dict, best_name: str) -> None:
|
| 452 |
+
"""Line chart: RΒ² per year for every algorithm."""
|
| 453 |
+
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
|
| 454 |
+
|
| 455 |
+
colors = plt.cm.tab10(np.linspace(0, 1, len(all_cv)))
|
| 456 |
+
for (name, cv), color in zip(all_cv.items(), colors):
|
| 457 |
+
lw = 2.5 if name == best_name else 1.2
|
| 458 |
+
ls = "-" if name == best_name else "--"
|
| 459 |
+
alpha = 1.0 if name == best_name else 0.65
|
| 460 |
+
axes[0].plot(cv["year"], cv["r2"], marker="o", label=name,
|
| 461 |
+
linewidth=lw, linestyle=ls, alpha=alpha, color=color)
|
| 462 |
+
axes[1].plot(cv["year"], cv["mae"], marker="o", label=name,
|
| 463 |
+
linewidth=lw, linestyle=ls, alpha=alpha, color=color)
|
| 464 |
+
|
| 465 |
+
for ax in axes:
|
| 466 |
+
ax.axvspan(2021.5, 2022.5, alpha=0.08, color="red", label="2022 anomaly")
|
| 467 |
+
ax.axvspan(2019.5, 2020.5, alpha=0.05, color="orange", label="COVID-2020")
|
| 468 |
+
ax.set_xticks(WF_TEST_YEARS)
|
| 469 |
+
ax.set_xlabel("Financial Year")
|
| 470 |
+
ax.legend(fontsize=8)
|
| 471 |
+
|
| 472 |
+
axes[0].set_ylabel("RΒ²"); axes[0].set_title("Walk-Forward CV RΒ² by Year")
|
| 473 |
+
axes[1].set_ylabel("MAE (lakh PD)"); axes[1].set_title("Walk-Forward CV MAE by Year")
|
| 474 |
+
|
| 475 |
+
plt.suptitle("All Models β Walk-Forward CV Results", fontsize=12, fontweight="bold")
|
| 476 |
+
plt.tight_layout()
|
| 477 |
+
path = os.path.join(FIGURES_DIR, "07_cv_per_year.png")
|
| 478 |
+
plt.savefig(path, dpi=150, bbox_inches="tight")
|
| 479 |
+
plt.close()
|
| 480 |
+
print(f"[model] Saved: {path}")
|
| 481 |
+
|
| 482 |
+
|
| 483 |
+
def _plot_feature_importance(name: str, estimator, features: list) -> None:
|
| 484 |
+
fi = _get_feature_importance(name, estimator, features)
|
| 485 |
+
if fi is None:
|
| 486 |
+
return
|
| 487 |
+
imp = pd.Series(fi).sort_values()
|
| 488 |
+
fig, ax = plt.subplots(figsize=(8, max(5, len(imp) * 0.35)))
|
| 489 |
+
colors = ["#E53935" if imp[f] > imp.quantile(0.75) else "#42A5F5" for f in imp.index]
|
| 490 |
+
imp.plot(kind="barh", ax=ax, color=colors)
|
| 491 |
+
ax.set_title(f"Feature Importances β {name} (Best Model)")
|
| 492 |
+
ax.set_xlabel("Importance Score")
|
| 493 |
+
plt.tight_layout()
|
| 494 |
+
path = os.path.join(FIGURES_DIR, "08_feature_importance.png")
|
| 495 |
+
plt.savefig(path, dpi=150, bbox_inches="tight")
|
| 496 |
+
plt.close()
|
| 497 |
+
print(f"[model] Saved: {path}")
|
| 498 |
+
print(f"\n[model] Top 5 features ({name}):")
|
| 499 |
+
for feat, val in imp.sort_values(ascending=False).head(5).items():
|
| 500 |
+
print(f" {feat:<35}: {val:.4f}")
|
| 501 |
+
|
| 502 |
+
|
| 503 |
+
def _get_feature_importance(name: str, estimator, features: list):
|
| 504 |
+
"""Extract feature importance β works for tree models and linear models."""
|
| 505 |
+
if estimator is None:
|
| 506 |
+
return None
|
| 507 |
+
try:
|
| 508 |
+
# Tree-based: direct feature_importances_
|
| 509 |
+
if hasattr(estimator, "feature_importances_"):
|
| 510 |
+
return dict(zip(features, estimator.feature_importances_))
|
| 511 |
+
# Pipeline with tree inside
|
| 512 |
+
if hasattr(estimator, "named_steps"):
|
| 513 |
+
inner = list(estimator.named_steps.values())[-1]
|
| 514 |
+
if hasattr(inner, "feature_importances_"):
|
| 515 |
+
return dict(zip(features, inner.feature_importances_))
|
| 516 |
+
if hasattr(inner, "coef_"):
|
| 517 |
+
return dict(zip(features, np.abs(inner.coef_)))
|
| 518 |
+
# XGBoost / LightGBM
|
| 519 |
+
if hasattr(estimator, "feature_importances_"):
|
| 520 |
+
return dict(zip(features, estimator.feature_importances_))
|
| 521 |
+
except Exception:
|
| 522 |
+
pass
|
| 523 |
+
return None
|
| 524 |
+
|
| 525 |
+
|
| 526 |
+
# ββ Model persistence βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 527 |
+
|
| 528 |
+
def _save_model(
|
| 529 |
+
best_name: str,
|
| 530 |
+
best_estimator,
|
| 531 |
+
features: list,
|
| 532 |
+
best_cv: pd.DataFrame,
|
| 533 |
+
all_cv: dict,
|
| 534 |
+
df: pd.DataFrame,
|
| 535 |
+
) -> None:
|
| 536 |
+
ex22 = best_cv[best_cv["year"] != 2022]
|
| 537 |
+
|
| 538 |
+
# Build comparison summary for the bundle
|
| 539 |
+
comparison = {}
|
| 540 |
+
for name, cv in all_cv.items():
|
| 541 |
+
e22 = cv[cv["year"] != 2022]
|
| 542 |
+
comparison[name] = {
|
| 543 |
+
"mean_r2": round(cv["r2"].mean(), 4),
|
| 544 |
+
"ex22_r2": round(e22["r2"].mean(), 4),
|
| 545 |
+
"mean_mae": round(cv["mae"].mean(), 3),
|
| 546 |
+
"mean_rmse": round(cv["rmse"].mean(), 3),
|
| 547 |
+
}
|
| 548 |
+
|
| 549 |
+
bundle = {
|
| 550 |
+
"model": best_estimator,
|
| 551 |
+
"model_name": best_name,
|
| 552 |
+
"features": features,
|
| 553 |
+
"target": TARGET,
|
| 554 |
+
"covid_multiplier": 1.447,
|
| 555 |
+
"train_years": sorted(df["financial_year"].unique().tolist()),
|
| 556 |
+
"n_districts": df["district"].nunique(),
|
| 557 |
+
"n_states": df["state"].nunique(),
|
| 558 |
+
"feature_importance": _get_feature_importance(best_name, best_estimator, features),
|
| 559 |
+
"cv_results": best_cv.to_dict(),
|
| 560 |
+
"cv_mean_r2": round(best_cv["r2"].mean(), 4),
|
| 561 |
+
"cv_ex22_r2": round(ex22["r2"].mean(), 4),
|
| 562 |
+
"cv_mean_mae": round(best_cv["mae"].mean(), 3),
|
| 563 |
+
"all_model_comparison": comparison,
|
| 564 |
+
}
|
| 565 |
+
with open(MODEL_PATH, "wb") as f:
|
| 566 |
+
pickle.dump(bundle, f)
|
| 567 |
+
print(f"\n[model] Model saved β {MODEL_PATH}")
|
| 568 |
+
print(f"[model] Best: {best_name} | ex22 RΒ²={ex22['r2'].mean():.4f} | MAE={best_cv['mae'].mean():.3f}L")
|
| 569 |
+
|
| 570 |
+
|
| 571 |
+
def load_model(path: str = MODEL_PATH) -> dict:
|
| 572 |
+
"""Load the saved best model bundle."""
|
| 573 |
+
with open(path, "rb") as f:
|
| 574 |
+
bundle = pickle.load(f)
|
| 575 |
+
print(f"[model] Loaded: {bundle['model_name']} from {path}")
|
| 576 |
+
print(f"[model] ex22 RΒ²={bundle['cv_ex22_r2']} | MAE={bundle['cv_mean_mae']}L")
|
| 577 |
+
return bundle
|
| 578 |
+
|
| 579 |
+
|
| 580 |
+
# ββ Prediction helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 581 |
+
|
| 582 |
+
def _predict_all(estimator, df: pd.DataFrame, features: list) -> pd.DataFrame:
|
| 583 |
+
preds = estimator.predict(df[features].fillna(0))
|
| 584 |
+
out = df[["state", "district", "financial_year", TARGET]].copy()
|
| 585 |
+
out["predicted_persondays"] = preds.round(3)
|
| 586 |
+
out["prediction_error"] = (out[TARGET] - out["predicted_persondays"]).round(3)
|
| 587 |
+
out["abs_error"] = out["prediction_error"].abs()
|
| 588 |
+
return out
|
| 589 |
+
|
| 590 |
+
|
| 591 |
+
def _save_predictions(df: pd.DataFrame) -> None:
|
| 592 |
+
path = os.path.join(OUTPUT_DIR, "mnrega_predictions.csv")
|
| 593 |
+
df.to_csv(path, index=False)
|
| 594 |
+
print(f"[model] Predictions saved β {path}")
|
| 595 |
+
|
| 596 |
+
|
| 597 |
+
# ββ Report ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 598 |
+
|
| 599 |
+
def _save_model_report(
|
| 600 |
+
best_name: str,
|
| 601 |
+
best_cv: pd.DataFrame,
|
| 602 |
+
all_cv: dict,
|
| 603 |
+
features: list,
|
| 604 |
+
best_estimator,
|
| 605 |
+
) -> None:
|
| 606 |
+
ex22 = best_cv[best_cv["year"] != 2022]
|
| 607 |
+
path = os.path.join("reports", "model_report.txt")
|
| 608 |
+
os.makedirs("reports", exist_ok=True)
|
| 609 |
+
with open(path, "w") as f:
|
| 610 |
+
f.write("SchemeImpactNet β V4 Model Selection Report\n")
|
| 611 |
+
f.write("=" * 60 + "\n\n")
|
| 612 |
+
f.write(f"Best Model : {best_name}\n")
|
| 613 |
+
f.write(f"Selection : max mean RΒ² excl. 2022 (walk-forward CV)\n")
|
| 614 |
+
f.write(f"Features : {len(features)}\n")
|
| 615 |
+
f.write(f"Evaluation : Walk-forward CV (2018β2024)\n\n")
|
| 616 |
+
|
| 617 |
+
f.write("Algorithm Comparison:\n")
|
| 618 |
+
f.write(f" {'Model':<20} {'RΒ²':>8} {'ex22 RΒ²':>10} {'MAE':>8} {'RMSE':>8}\n")
|
| 619 |
+
f.write(f" {'-'*60}\n")
|
| 620 |
+
for name, cv in all_cv.items():
|
| 621 |
+
e22 = cv[cv["year"] != 2022]
|
| 622 |
+
marker = " β BEST" if name == best_name else ""
|
| 623 |
+
f.write(f" {name:<20} {cv['r2'].mean():>8.4f} "
|
| 624 |
+
f"{e22['r2'].mean():>10.4f} {cv['mae'].mean():>8.3f} "
|
| 625 |
+
f"{cv['rmse'].mean():>8.3f}{marker}\n")
|
| 626 |
+
|
| 627 |
+
f.write(f"\nBest Model ({best_name}) Walk-Forward CV:\n")
|
| 628 |
+
f.write(f" Mean RΒ² : {best_cv['r2'].mean():.4f}\n")
|
| 629 |
+
f.write(f" excl.2022 RΒ²: {ex22['r2'].mean():.4f}\n")
|
| 630 |
+
f.write(f" Mean MAE : {best_cv['mae'].mean():.3f} lakh\n")
|
| 631 |
+
f.write(f" Mean RMSE : {best_cv['rmse'].mean():.3f} lakh\n")
|
| 632 |
+
f.write(f" RΒ² gain : {best_cv['r2_gain'].mean():+.4f} vs naive lag-1\n\n")
|
| 633 |
+
|
| 634 |
+
f.write(f"Previous (leaked) RΒ²: 0.9963\n")
|
| 635 |
+
f.write(f"Leakage source: works_completed (r=1.0 with target)\n\n")
|
| 636 |
+
f.write(f"2022 anomaly: West Bengal -93 to -98% reporting drop. Excl. RΒ²={ex22['r2'].mean():.4f}\n\n")
|
| 637 |
+
|
| 638 |
+
fi = _get_feature_importance(best_name, best_estimator, features)
|
| 639 |
+
if fi:
|
| 640 |
+
f.write("Feature Importances:\n")
|
| 641 |
+
for feat, val in sorted(fi.items(), key=lambda x: -x[1]):
|
| 642 |
+
f.write(f" {feat:<35} {val:.4f}\n")
|
| 643 |
+
|
| 644 |
+
f.write(f"\nYear-by-year CV ({best_name}):\n")
|
| 645 |
+
f.write(best_cv.to_string(index=False))
|
| 646 |
+
print(f"[model] Report saved β {path}")
|
| 647 |
+
|
| 648 |
+
|
| 649 |
+
# ββ Feature list helper βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 650 |
+
|
| 651 |
+
def _get_features(df: pd.DataFrame) -> list:
|
| 652 |
+
available = [f for f in FEATURE_COLS if f in df.columns]
|
| 653 |
+
missing = [f for f in FEATURE_COLS if f not in df.columns]
|
| 654 |
+
if missing:
|
| 655 |
+
print(f"[model] Warning: {len(missing)} features not in df: {missing}")
|
| 656 |
+
return available
|
src/optimize.py
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
optimize.py (v2 β proportional rank-based LP)
|
| 3 |
+
-----------------------------------------------
|
| 4 |
+
Fixes the LP bang-bang problem caused by low efficiency variance (~7.7% CV).
|
| 5 |
+
|
| 6 |
+
Root cause: With efficiency ranging only 0.0026β0.0039, pure LP pushes
|
| 7 |
+
every district to either MIN_FRACTION floor or MAX_FRACTION ceiling.
|
| 8 |
+
462 districts hit -60%, 262 hit +150%, only 1 in-between.
|
| 9 |
+
|
| 10 |
+
Fix: Two-stage allocation
|
| 11 |
+
Stage 1 β Proportional rank allocation
|
| 12 |
+
Compute efficiency percentile rank (0β1) per district.
|
| 13 |
+
Assign multiplier: rank 0 β 0.60Γ, rank 1 β 1.80Γ
|
| 14 |
+
Rescale to preserve total budget.
|
| 15 |
+
β Produces a continuous, meaningful spread of -40% to +80%
|
| 16 |
+
|
| 17 |
+
Stage 2 β LP refinement within Β±15% of stage1
|
| 18 |
+
Tighter LP bounds around the proportional solution.
|
| 19 |
+
LP fills in genuine optimality within the constrained band.
|
| 20 |
+
β Adds economic rigour without collapsing to bang-bang.
|
| 21 |
+
|
| 22 |
+
Result: 725 unique budget_change_pct values, realistic distribution,
|
| 23 |
+
same total budget, higher total employment.
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
import os
|
| 27 |
+
import numpy as np
|
| 28 |
+
import pandas as pd
|
| 29 |
+
import matplotlib.pyplot as plt
|
| 30 |
+
import matplotlib.patches as mpatches
|
| 31 |
+
from scipy.optimize import linprog
|
| 32 |
+
|
| 33 |
+
FIGURES_DIR = os.path.join("reports", "figures")
|
| 34 |
+
OUTPUT_DIR = os.path.join("data", "processed")
|
| 35 |
+
os.makedirs(FIGURES_DIR, exist_ok=True)
|
| 36 |
+
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
| 37 |
+
|
| 38 |
+
# Stage 1 bounds
|
| 39 |
+
RANK_FLOOR = 0.60 # worst district keeps 60% of budget β -40%
|
| 40 |
+
RANK_CEIL = 1.80 # best district gets 180% of budget β +80%
|
| 41 |
+
|
| 42 |
+
# Stage 2 LP refinement band around stage1
|
| 43 |
+
LP_REFINE_BAND = 0.15 # Β±15% around stage1 solution
|
| 44 |
+
|
| 45 |
+
# Hard absolute limits
|
| 46 |
+
ABS_MIN_FRACTION = 0.40
|
| 47 |
+
ABS_MAX_FRACTION = 2.00
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def run_optimizer(
|
| 51 |
+
predictions_path: str = "data/processed/mnrega_predictions.csv",
|
| 52 |
+
raw_path: str = "data/raw/mnrega_real_data_final_clean.csv",
|
| 53 |
+
scope_state: str = None,
|
| 54 |
+
total_budget_override: float = None,
|
| 55 |
+
target_year: int = 2024,
|
| 56 |
+
) -> pd.DataFrame:
|
| 57 |
+
|
| 58 |
+
print("\n[optimizer-v2] ββ Budget Allocation Optimizer (Proportional-LP) ββ")
|
| 59 |
+
|
| 60 |
+
df = _prepare_data(predictions_path, raw_path, scope_state, target_year)
|
| 61 |
+
result = _optimize(df, total_budget_override)
|
| 62 |
+
_print_summary(result)
|
| 63 |
+
_plot_allocation_comparison(result, scope_state or "All-India")
|
| 64 |
+
_plot_efficiency_gain(result, scope_state or "All-India")
|
| 65 |
+
_save_results(result)
|
| 66 |
+
|
| 67 |
+
print("[optimizer-v2] ββ Optimization Complete ββββββββββββββββββββββββββββ\n")
|
| 68 |
+
return result
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def _prepare_data(predictions_path, raw_path, scope_state, target_year):
|
| 72 |
+
preds = pd.read_csv(predictions_path)
|
| 73 |
+
preds = preds[preds["financial_year"] == target_year].copy()
|
| 74 |
+
|
| 75 |
+
raw = pd.read_csv(raw_path)
|
| 76 |
+
raw["financial_year"] = raw["financial_year"].apply(
|
| 77 |
+
lambda v: int(str(v).split("-")[0])
|
| 78 |
+
)
|
| 79 |
+
budget = raw[raw["financial_year"] == target_year][
|
| 80 |
+
["state", "district", "budget_allocated_lakhs", "expenditure_lakhs"]
|
| 81 |
+
].copy()
|
| 82 |
+
|
| 83 |
+
df = preds.merge(budget, on=["state", "district"], how="inner")
|
| 84 |
+
df = df.dropna(subset=["budget_allocated_lakhs", "predicted_persondays"])
|
| 85 |
+
df = df[df["budget_allocated_lakhs"] > 0].reset_index(drop=True)
|
| 86 |
+
|
| 87 |
+
if scope_state:
|
| 88 |
+
df = df[df["state"] == scope_state].reset_index(drop=True)
|
| 89 |
+
|
| 90 |
+
print(f"[optimizer-v2] Scope: {scope_state or 'All-India'} | Districts: {len(df)} | Year: {target_year}")
|
| 91 |
+
df["persondays_per_lakh"] = df["predicted_persondays"] / df["budget_allocated_lakhs"]
|
| 92 |
+
print(f"[optimizer-v2] Efficiency CV: {df['persondays_per_lakh'].std()/df['persondays_per_lakh'].mean()*100:.1f}%")
|
| 93 |
+
print(f"[optimizer-v2] Total budget: βΉ{df['budget_allocated_lakhs'].sum():,.0f} lakh")
|
| 94 |
+
return df
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def _optimize(df: pd.DataFrame, total_budget_override: float = None) -> pd.DataFrame:
|
| 98 |
+
current_budgets = df["budget_allocated_lakhs"].values
|
| 99 |
+
efficiency = df["persondays_per_lakh"].values
|
| 100 |
+
total_budget = total_budget_override or current_budgets.sum()
|
| 101 |
+
|
| 102 |
+
# ββ Stage 1: Proportional rank allocation ββββββββββββββββββββββββββββββ
|
| 103 |
+
eff_rank = pd.Series(efficiency).rank(pct=True).values # 0 β 1
|
| 104 |
+
|
| 105 |
+
# Linear interpolation: worst district β RANK_FLOORΓ, best β RANK_CEILΓ
|
| 106 |
+
multipliers = RANK_FLOOR + eff_rank * (RANK_CEIL - RANK_FLOOR)
|
| 107 |
+
stage1_raw = current_budgets * multipliers
|
| 108 |
+
|
| 109 |
+
# Rescale to preserve total budget
|
| 110 |
+
scale = total_budget / stage1_raw.sum()
|
| 111 |
+
stage1 = stage1_raw * scale
|
| 112 |
+
|
| 113 |
+
print(f"[optimizer-v2] Stage 1 (proportional rank) range: "
|
| 114 |
+
f"{((stage1-current_budgets)/current_budgets*100).min():.1f}% to "
|
| 115 |
+
f"{((stage1-current_budgets)/current_budgets*100).max():.1f}%")
|
| 116 |
+
|
| 117 |
+
# ββ Stage 2: LP refinement within Β±LP_REFINE_BAND of stage1 ββββββββββ
|
| 118 |
+
lb = np.maximum(stage1 * (1 - LP_REFINE_BAND),
|
| 119 |
+
current_budgets * ABS_MIN_FRACTION)
|
| 120 |
+
ub = np.minimum(stage1 * (1 + LP_REFINE_BAND),
|
| 121 |
+
current_budgets * ABS_MAX_FRACTION)
|
| 122 |
+
|
| 123 |
+
res = linprog(
|
| 124 |
+
-efficiency,
|
| 125 |
+
A_ub=[np.ones(len(df))],
|
| 126 |
+
b_ub=[total_budget],
|
| 127 |
+
bounds=list(zip(lb, ub)),
|
| 128 |
+
method="highs",
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
if res.success:
|
| 132 |
+
optimized = res.x
|
| 133 |
+
print(f"[optimizer-v2] Stage 2 LP converged β | Unique values: {pd.Series(optimized.round(2)).nunique()}")
|
| 134 |
+
else:
|
| 135 |
+
print(f"[optimizer-v2] LP failed, using stage1 allocation")
|
| 136 |
+
optimized = stage1
|
| 137 |
+
|
| 138 |
+
df = df.copy()
|
| 139 |
+
df["optimized_budget"] = optimized.round(2)
|
| 140 |
+
df["budget_change"] = df["optimized_budget"] - df["budget_allocated_lakhs"]
|
| 141 |
+
df["budget_change_pct"] = (df["budget_change"] / df["budget_allocated_lakhs"] * 100).round(2)
|
| 142 |
+
df["sq_persondays"] = df["predicted_persondays"]
|
| 143 |
+
df["opt_persondays"] = (df["persondays_per_lakh"] * df["optimized_budget"]).round(3)
|
| 144 |
+
df["persondays_gain"] = (df["opt_persondays"] - df["sq_persondays"]).round(3)
|
| 145 |
+
df["persondays_gain_pct"] = (df["persondays_gain"] / df["sq_persondays"] * 100).round(2)
|
| 146 |
+
return df
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
def _print_summary(df):
|
| 150 |
+
sq = df["sq_persondays"].sum()
|
| 151 |
+
opt = df["opt_persondays"].sum()
|
| 152 |
+
gain = opt - sq
|
| 153 |
+
|
| 154 |
+
print(f"\n[optimizer-v2] ββ Results βββββββββββββββββββββββββββββββββββββββ")
|
| 155 |
+
print(f" budget_change_pct β min: {df['budget_change_pct'].min():.1f}% "
|
| 156 |
+
f"max: {df['budget_change_pct'].max():.1f}% "
|
| 157 |
+
f"std: {df['budget_change_pct'].std():.1f}% "
|
| 158 |
+
f"unique: {df['budget_change_pct'].nunique()}")
|
| 159 |
+
print(f" Status quo : {sq:>10,.2f} lakh PD")
|
| 160 |
+
print(f" Optimized : {opt:>10,.2f} lakh PD")
|
| 161 |
+
print(f" Net gain : {gain:>+10,.2f} lakh PD ({gain/sq*100:+.2f}%)")
|
| 162 |
+
print(f" Budget : βΉ{df['budget_allocated_lakhs'].sum():,.0f} lakh (unchanged)")
|
| 163 |
+
print(f"[optimizer-v2] ββββββββββββββββββββββββββββββββββββββββββββββββββββ")
|
| 164 |
+
|
| 165 |
+
print("\n[optimizer-v2] Top 5 districts to INCREASE:")
|
| 166 |
+
print(df.nlargest(5, "persondays_gain")[
|
| 167 |
+
["state","district","budget_allocated_lakhs","optimized_budget","budget_change_pct","persondays_gain"]
|
| 168 |
+
].to_string(index=False))
|
| 169 |
+
print("\n[optimizer-v2] Top 5 districts to REDUCE:")
|
| 170 |
+
print(df.nsmallest(5, "budget_change")[
|
| 171 |
+
["state","district","budget_allocated_lakhs","optimized_budget","budget_change_pct","persondays_gain"]
|
| 172 |
+
].to_string(index=False))
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
def _plot_allocation_comparison(df, scope):
|
| 176 |
+
show = pd.concat([df.nlargest(10,"budget_change"), df.nsmallest(10,"budget_change")]).drop_duplicates()
|
| 177 |
+
show = show.sort_values("budget_change")
|
| 178 |
+
fig, ax = plt.subplots(figsize=(12, max(7, len(show)*0.4)))
|
| 179 |
+
x = np.arange(len(show)); w = 0.38
|
| 180 |
+
ax.barh(x-w/2, show["budget_allocated_lakhs"].values, height=w, color="#90CAF9", label="Status Quo")
|
| 181 |
+
ax.barh(x+w/2, show["optimized_budget"].values, height=w, color="#1565C0", label="Optimized")
|
| 182 |
+
ax.set_yticks(x); ax.set_yticklabels(show["district"], fontsize=8)
|
| 183 |
+
ax.set_xlabel("Budget (Rs. lakh)"); ax.set_title(f"Budget Reallocation β {scope}"); ax.legend()
|
| 184 |
+
plt.tight_layout(); _save_fig("08_budget_allocation_comparison.png")
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
def _plot_efficiency_gain(df, scope):
|
| 188 |
+
fig, ax = plt.subplots(figsize=(10, 7))
|
| 189 |
+
colors = df["budget_change"].apply(lambda v: "#2E7D32" if v > 0 else "#C62828")
|
| 190 |
+
ax.scatter(df["persondays_per_lakh"], df["budget_change_pct"], c=colors, alpha=0.55, s=40)
|
| 191 |
+
ax.axhline(0, color="black", linewidth=0.8, linestyle="--")
|
| 192 |
+
ax.set_xlabel("Efficiency (PD per βΉ lakh)"); ax.set_ylabel("Budget Change (%)")
|
| 193 |
+
ax.set_title(f"Efficiency vs Budget Change β {scope}")
|
| 194 |
+
gain = mpatches.Patch(color="#2E7D32", label="Increase"); cut = mpatches.Patch(color="#C62828", label="Decrease")
|
| 195 |
+
ax.legend(handles=[gain, cut]); plt.tight_layout(); _save_fig("09_efficiency_gain_by_district.png")
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
def _save_results(df):
|
| 199 |
+
cols = ["state","district","budget_allocated_lakhs","optimized_budget",
|
| 200 |
+
"budget_change","budget_change_pct","sq_persondays","opt_persondays",
|
| 201 |
+
"persondays_gain","persondays_gain_pct","persondays_per_lakh"]
|
| 202 |
+
path = os.path.join(OUTPUT_DIR, "optimized_budget_allocation.csv")
|
| 203 |
+
df[cols].sort_values("persondays_gain", ascending=False).to_csv(path, index=False)
|
| 204 |
+
print(f"[optimizer-v2] Saved β {path}")
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
def _save_fig(filename):
|
| 208 |
+
path = os.path.join(FIGURES_DIR, filename)
|
| 209 |
+
plt.savefig(path, bbox_inches="tight"); plt.close()
|
| 210 |
+
print(f"[optimizer-v2] Saved: {path}")
|
src/pipeline.py
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
pipeline.py
|
| 3 |
+
-----------
|
| 4 |
+
V3 pipeline orchestrator for SchemeImpactNet.
|
| 5 |
+
|
| 6 |
+
Changes from original:
|
| 7 |
+
- RAW_PATH now points to the real Dataful government CSV
|
| 8 |
+
(confirmed 99% match with mnrega_india_unified.csv, <0.005L diff)
|
| 9 |
+
- Feature engineering uses V3 leak-free features (src/features.py)
|
| 10 |
+
- Model uses GBR V3 with walk-forward CV (src/model.py)
|
| 11 |
+
- Model saved to models/mnrega_gbr_v3.pkl
|
| 12 |
+
- Removed generate_synthetic dependency from Stage 1
|
| 13 |
+
- Stage 3 model comparison retained but flags honest metrics
|
| 14 |
+
|
| 15 |
+
Data sources:
|
| 16 |
+
Real: data/raw/20063- Dataful/mnrega-...-persondays-...csv
|
| 17 |
+
β person_days_lakhs, households_availed (real gov data)
|
| 18 |
+
β avg_wage_rate (official wage schedule, exogenous)
|
| 19 |
+
Synthetic: all other columns (rainfall, poverty, pmkisan, pmay)
|
| 20 |
+
β EXCLUDED from V3 model features
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
import os
|
| 24 |
+
import pandas as pd
|
| 25 |
+
import numpy as np
|
| 26 |
+
|
| 27 |
+
from src.clean import clean
|
| 28 |
+
from src.features import build_features
|
| 29 |
+
from src.eda import run_eda
|
| 30 |
+
from src.model import run_model
|
| 31 |
+
|
| 32 |
+
# ββ Data paths ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 33 |
+
DATAFUL_PATH = os.path.join(
|
| 34 |
+
"data", "raw", "20063- Dataful",
|
| 35 |
+
"mnrega-year-month-state-and-district-wise-total-persondays-"
|
| 36 |
+
"and-households-engaged-in-work.csv"
|
| 37 |
+
)
|
| 38 |
+
UNIFIED_PATH = os.path.join("data", "raw", "mnrega_india_unified.csv")
|
| 39 |
+
PROCESSED_PATH = os.path.join("data", "processed", "mnrega_cleaned.csv")
|
| 40 |
+
MODEL_PATH = os.path.join("models", "mnrega_best_model.pkl")
|
| 41 |
+
|
| 42 |
+
SCOPE_LABEL = {
|
| 43 |
+
1: "Maharashtra",
|
| 44 |
+
2: "All-India",
|
| 45 |
+
3: "All-India (V3 leak-free)",
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def run_pipeline(stage: int = 3) -> pd.DataFrame:
|
| 50 |
+
assert stage in [1, 2, 3], "Stage must be 1, 2, or 3"
|
| 51 |
+
|
| 52 |
+
print("\n" + "=" * 60)
|
| 53 |
+
print(f" SchemeImpactNet V3 β Stage {stage} Pipeline")
|
| 54 |
+
print(f" Scope : {SCOPE_LABEL[stage]}")
|
| 55 |
+
print("=" * 60)
|
| 56 |
+
|
| 57 |
+
# ββ Extract βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 58 |
+
print(f"\n[pipeline] Step 1: Extract (real government data)")
|
| 59 |
+
df = _load_real_data(state_filter="Maharashtra" if stage == 1 else None)
|
| 60 |
+
|
| 61 |
+
# ββ Clean βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 62 |
+
print(f"\n[pipeline] Step 2: Clean")
|
| 63 |
+
df = _clean_real(df)
|
| 64 |
+
|
| 65 |
+
# ββ Features ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 66 |
+
print(f"\n[pipeline] Step 3: V3 Feature Engineering (leak-free)")
|
| 67 |
+
df = build_features(df)
|
| 68 |
+
|
| 69 |
+
# ββ Save processed ββββββββββββββββββββββββββββββββββββββββββββ
|
| 70 |
+
os.makedirs(os.path.dirname(PROCESSED_PATH), exist_ok=True)
|
| 71 |
+
df.to_csv(PROCESSED_PATH, index=False)
|
| 72 |
+
print(f"\n[pipeline] Processed data saved β {PROCESSED_PATH}")
|
| 73 |
+
|
| 74 |
+
# ββ EDA βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 75 |
+
print(f"\n[pipeline] Step 4: EDA")
|
| 76 |
+
run_eda(df, scope=SCOPE_LABEL[stage])
|
| 77 |
+
|
| 78 |
+
# ββ Model βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 79 |
+
print(f"\n[pipeline] Step 5: V3 Model (walk-forward CV + pkl save)")
|
| 80 |
+
predictions = run_model(df)
|
| 81 |
+
|
| 82 |
+
print("\n" + "=" * 60)
|
| 83 |
+
print(f" Stage {stage} Complete!")
|
| 84 |
+
print(f" Processed : {PROCESSED_PATH}")
|
| 85 |
+
print(f" Model : {MODEL_PATH}")
|
| 86 |
+
print(f" Figures : reports/figures/")
|
| 87 |
+
print(f" Predictions : data/processed/mnrega_predictions.csv")
|
| 88 |
+
print(f" Report : reports/model_report.txt")
|
| 89 |
+
print("=" * 60 + "\n")
|
| 90 |
+
|
| 91 |
+
return predictions
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
# ββ Real data loader ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 95 |
+
|
| 96 |
+
def _load_real_data(state_filter: str = None) -> pd.DataFrame:
|
| 97 |
+
"""
|
| 98 |
+
Load and pivot the Dataful government CSV from long format
|
| 99 |
+
(one row per district-month-category) to annual wide format
|
| 100 |
+
(one row per district-year with person_days_lakhs + households_availed).
|
| 101 |
+
|
| 102 |
+
Falls back to unified CSV if Dataful not found.
|
| 103 |
+
"""
|
| 104 |
+
if os.path.exists(DATAFUL_PATH):
|
| 105 |
+
print(f"[pipeline] Loading Dataful government CSV: {DATAFUL_PATH}")
|
| 106 |
+
df_raw = pd.read_csv(DATAFUL_PATH)
|
| 107 |
+
df_raw.columns = [c.strip().lower().replace(" ", "_") for c in df_raw.columns]
|
| 108 |
+
|
| 109 |
+
# Parse fiscal year start integer from '2014-15' β 2014
|
| 110 |
+
df_raw["fy"] = df_raw["fiscal_year"].apply(
|
| 111 |
+
lambda v: int(str(v).split("-")[0]) if "-" in str(v) else int(v)
|
| 112 |
+
)
|
| 113 |
+
# Exclude incomplete current fiscal year
|
| 114 |
+
df_raw = df_raw[df_raw["fy"] <= 2024]
|
| 115 |
+
|
| 116 |
+
# Pivot: sum monthly values to annual per district
|
| 117 |
+
pivot = df_raw.pivot_table(
|
| 118 |
+
index=["fiscal_year", "fy", "state", "district"],
|
| 119 |
+
columns="category",
|
| 120 |
+
values="value",
|
| 121 |
+
aggfunc="sum"
|
| 122 |
+
).reset_index()
|
| 123 |
+
pivot.columns.name = None
|
| 124 |
+
|
| 125 |
+
# Rename to match model schema
|
| 126 |
+
pivot = pivot.rename(columns={
|
| 127 |
+
"Persondays": "person_days",
|
| 128 |
+
"Household": "households_availed",
|
| 129 |
+
"fy": "financial_year",
|
| 130 |
+
})
|
| 131 |
+
pivot["person_days_lakhs"] = (pivot["person_days"] / 1e5).round(3)
|
| 132 |
+
|
| 133 |
+
# Bring in avg_wage_rate from unified CSV (official schedule, exogenous)
|
| 134 |
+
if os.path.exists(UNIFIED_PATH):
|
| 135 |
+
df_uni = pd.read_csv(UNIFIED_PATH)
|
| 136 |
+
df_uni.columns = [c.strip().lower().replace(" ", "_") for c in df_uni.columns]
|
| 137 |
+
df_uni["financial_year"] = df_uni["financial_year"].apply(
|
| 138 |
+
lambda v: int(str(v).split("-")[0]) if "-" in str(v) else int(v)
|
| 139 |
+
)
|
| 140 |
+
wage_map = df_uni[["state", "financial_year", "avg_wage_rate"]].drop_duplicates()
|
| 141 |
+
pivot = pivot.merge(wage_map, on=["state", "financial_year"], how="left")
|
| 142 |
+
|
| 143 |
+
# Keep only needed columns
|
| 144 |
+
keep = ["state", "district", "financial_year",
|
| 145 |
+
"person_days_lakhs", "households_availed", "avg_wage_rate"]
|
| 146 |
+
df = pivot[[c for c in keep if c in pivot.columns]].copy()
|
| 147 |
+
|
| 148 |
+
else:
|
| 149 |
+
print(f"[pipeline] Dataful CSV not found, falling back to unified CSV")
|
| 150 |
+
print(f"[pipeline] NOTE: unified CSV contains synthetic columns β "
|
| 151 |
+
f"V3 features ignore them")
|
| 152 |
+
df = pd.read_csv(UNIFIED_PATH)
|
| 153 |
+
df.columns = [c.strip().lower().replace(" ", "_") for c in df.columns]
|
| 154 |
+
df["financial_year"] = df["financial_year"].apply(
|
| 155 |
+
lambda v: int(str(v).split("-")[0]) if "-" in str(v) else int(v)
|
| 156 |
+
)
|
| 157 |
+
|
| 158 |
+
if state_filter:
|
| 159 |
+
before = len(df)
|
| 160 |
+
df = df[df["state"] == state_filter].reset_index(drop=True)
|
| 161 |
+
print(f"[pipeline] Filtered to {state_filter}: {before} β {len(df)} rows")
|
| 162 |
+
|
| 163 |
+
print(f"[pipeline] Loaded {len(df):,} rows | "
|
| 164 |
+
f"{df['state'].nunique()} states | "
|
| 165 |
+
f"{df['district'].nunique()} districts | "
|
| 166 |
+
f"{df['financial_year'].nunique()} years "
|
| 167 |
+
f"({df['financial_year'].min()}β{df['financial_year'].max()})")
|
| 168 |
+
return df
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
def _clean_real(df: pd.DataFrame) -> pd.DataFrame:
|
| 172 |
+
"""
|
| 173 |
+
Lightweight clean for the real Dataful data.
|
| 174 |
+
The full clean() from src/clean.py expects synthetic columns β
|
| 175 |
+
we do a minimal version here.
|
| 176 |
+
"""
|
| 177 |
+
df = df.sort_values(["state", "district", "financial_year"]).reset_index(drop=True)
|
| 178 |
+
|
| 179 |
+
# Strip strings
|
| 180 |
+
for col in df.select_dtypes(include="object").columns:
|
| 181 |
+
df[col] = df[col].str.strip()
|
| 182 |
+
|
| 183 |
+
# Numeric cast
|
| 184 |
+
for col in ["person_days_lakhs", "households_availed", "avg_wage_rate"]:
|
| 185 |
+
if col in df.columns:
|
| 186 |
+
df[col] = pd.to_numeric(df[col], errors="coerce")
|
| 187 |
+
|
| 188 |
+
# Forward-fill wage within state (official schedule rarely changes mid-year)
|
| 189 |
+
if "avg_wage_rate" in df.columns:
|
| 190 |
+
df["avg_wage_rate"] = df.groupby("state")["avg_wage_rate"].transform(
|
| 191 |
+
lambda s: s.ffill().bfill()
|
| 192 |
+
)
|
| 193 |
+
|
| 194 |
+
# Drop rows with no person_days_lakhs
|
| 195 |
+
before = len(df)
|
| 196 |
+
df = df.dropna(subset=["person_days_lakhs"]).reset_index(drop=True)
|
| 197 |
+
if len(df) < before:
|
| 198 |
+
print(f"[pipeline] Dropped {before - len(df)} rows with null person_days_lakhs")
|
| 199 |
+
|
| 200 |
+
print(f"[pipeline] Cleaned. Shape: {df.shape}")
|
| 201 |
+
return df
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
def run_optimizer_step(scope_state: str = None) -> None:
|
| 205 |
+
"""Run the budget optimizer after predictions are generated."""
|
| 206 |
+
from src.optimize import run_optimizer
|
| 207 |
+
run_optimizer(
|
| 208 |
+
predictions_path=os.path.join("data", "processed", "mnrega_predictions.csv"),
|
| 209 |
+
raw_path=UNIFIED_PATH,
|
| 210 |
+
scope_state=scope_state,
|
| 211 |
+
target_year=2024,
|
| 212 |
+
)
|
src/streamlit_app.py
DELETED
|
@@ -1,40 +0,0 @@
|
|
| 1 |
-
import altair as alt
|
| 2 |
-
import numpy as np
|
| 3 |
-
import pandas as pd
|
| 4 |
-
import streamlit as st
|
| 5 |
-
|
| 6 |
-
"""
|
| 7 |
-
# Welcome to Streamlit!
|
| 8 |
-
|
| 9 |
-
Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
|
| 10 |
-
If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
|
| 11 |
-
forums](https://discuss.streamlit.io).
|
| 12 |
-
|
| 13 |
-
In the meantime, below is an example of what you can do with just a few lines of code:
|
| 14 |
-
"""
|
| 15 |
-
|
| 16 |
-
num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
|
| 17 |
-
num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
|
| 18 |
-
|
| 19 |
-
indices = np.linspace(0, 1, num_points)
|
| 20 |
-
theta = 2 * np.pi * num_turns * indices
|
| 21 |
-
radius = indices
|
| 22 |
-
|
| 23 |
-
x = radius * np.cos(theta)
|
| 24 |
-
y = radius * np.sin(theta)
|
| 25 |
-
|
| 26 |
-
df = pd.DataFrame({
|
| 27 |
-
"x": x,
|
| 28 |
-
"y": y,
|
| 29 |
-
"idx": indices,
|
| 30 |
-
"rand": np.random.randn(num_points),
|
| 31 |
-
})
|
| 32 |
-
|
| 33 |
-
st.altair_chart(alt.Chart(df, height=700, width=700)
|
| 34 |
-
.mark_point(filled=True)
|
| 35 |
-
.encode(
|
| 36 |
-
x=alt.X("x", axis=None),
|
| 37 |
-
y=alt.Y("y", axis=None),
|
| 38 |
-
color=alt.Color("idx", legend=None, scale=alt.Scale()),
|
| 39 |
-
size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
|
| 40 |
-
))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
start.sh
ADDED
|
@@ -0,0 +1,239 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# ============================================================
|
| 3 |
+
# SchemeImpactNet β Start Script
|
| 4 |
+
# Usage: ./start.sh [options]
|
| 5 |
+
#
|
| 6 |
+
# Options:
|
| 7 |
+
# --skip-pipeline Skip data generation even if files missing
|
| 8 |
+
# --backend-only Start only the FastAPI backend
|
| 9 |
+
# --frontend-only Start only the Streamlit frontend
|
| 10 |
+
# --port-backend N Backend port (default: 8000)
|
| 11 |
+
# --port-frontend N Frontend port (default: 8501)
|
| 12 |
+
# --stage N Pipeline stage to run if needed (1|2|3, default: 3)
|
| 13 |
+
# ============================================================
|
| 14 |
+
|
| 15 |
+
set -euo pipefail
|
| 16 |
+
|
| 17 |
+
# ββ Defaults ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 18 |
+
BACKEND_PORT=8000
|
| 19 |
+
FRONTEND_PORT=8501
|
| 20 |
+
PIPELINE_STAGE=3
|
| 21 |
+
SKIP_PIPELINE=false
|
| 22 |
+
BACKEND_ONLY=false
|
| 23 |
+
FRONTEND_ONLY=false
|
| 24 |
+
BACKEND_PID=""
|
| 25 |
+
FRONTEND_PID=""
|
| 26 |
+
|
| 27 |
+
# ββ Always resolve project root (where this script lives) βββββββββββββββββββββ
|
| 28 |
+
PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
| 29 |
+
|
| 30 |
+
# ββ Colours βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 31 |
+
RED='\033[0;31m'
|
| 32 |
+
GREEN='\033[0;32m'
|
| 33 |
+
AMBER='\033[0;33m'
|
| 34 |
+
BLUE='\033[0;34m'
|
| 35 |
+
BOLD='\033[1m'
|
| 36 |
+
RESET='\033[0m'
|
| 37 |
+
ok() { echo -e "${GREEN} β${RESET} $*"; }
|
| 38 |
+
info() { echo -e "${BLUE} β${RESET} $*"; }
|
| 39 |
+
warn() { echo -e "${AMBER} β ${RESET} $*"; }
|
| 40 |
+
err() { echo -e "${RED} β${RESET} $*"; }
|
| 41 |
+
hr() { echo -e "${BOLD}ββββββββββββββββββββββββββββββββββββββββββββββββββ${RESET}"; }
|
| 42 |
+
|
| 43 |
+
# ββ Arg parsing βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 44 |
+
while [[ $# -gt 0 ]]; do
|
| 45 |
+
case $1 in
|
| 46 |
+
--skip-pipeline) SKIP_PIPELINE=true ;;
|
| 47 |
+
--backend-only) BACKEND_ONLY=true ;;
|
| 48 |
+
--frontend-only) FRONTEND_ONLY=true ;;
|
| 49 |
+
--port-backend)
|
| 50 |
+
BACKEND_PORT="$2"
|
| 51 |
+
shift
|
| 52 |
+
;;
|
| 53 |
+
--port-frontend)
|
| 54 |
+
FRONTEND_PORT="$2"
|
| 55 |
+
shift
|
| 56 |
+
;;
|
| 57 |
+
--stage)
|
| 58 |
+
PIPELINE_STAGE="$2"
|
| 59 |
+
shift
|
| 60 |
+
;;
|
| 61 |
+
*) warn "Unknown option: $1" ;;
|
| 62 |
+
esac
|
| 63 |
+
shift
|
| 64 |
+
done
|
| 65 |
+
|
| 66 |
+
# ββ Cleanup handler βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 67 |
+
cleanup() {
|
| 68 |
+
echo ""
|
| 69 |
+
hr
|
| 70 |
+
info "Shutting down servicesβ¦"
|
| 71 |
+
[[ -n "$BACKEND_PID" ]] && kill "$BACKEND_PID" 2>/dev/null && ok "Backend stopped"
|
| 72 |
+
[[ -n "$FRONTEND_PID" ]] && kill "$FRONTEND_PID" 2>/dev/null && ok "Frontend stopped"
|
| 73 |
+
hr
|
| 74 |
+
}
|
| 75 |
+
trap cleanup EXIT INT TERM
|
| 76 |
+
|
| 77 |
+
# ββ Banner ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 78 |
+
echo ""
|
| 79 |
+
echo -e "${BOLD} β SchemeImpactNet β Service Manager${RESET}"
|
| 80 |
+
hr
|
| 81 |
+
echo ""
|
| 82 |
+
|
| 83 |
+
# ββ Prerequisite checks βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 84 |
+
info "Checking prerequisitesβ¦"
|
| 85 |
+
|
| 86 |
+
if ! command -v python &>/dev/null && ! command -v python3 &>/dev/null; then
|
| 87 |
+
err "Python not found. Install Python 3.9+."
|
| 88 |
+
exit 1
|
| 89 |
+
fi
|
| 90 |
+
PYTHON=$(command -v python3 2>/dev/null || command -v python)
|
| 91 |
+
ok "Python β $($PYTHON --version 2>&1)"
|
| 92 |
+
|
| 93 |
+
if ! $PYTHON -m uvicorn --version &>/dev/null; then
|
| 94 |
+
warn "uvicorn not found β attempting installβ¦"
|
| 95 |
+
$PYTHON -m pip install "uvicorn[standard]" --quiet || {
|
| 96 |
+
err "uvicorn install failed."
|
| 97 |
+
exit 1
|
| 98 |
+
}
|
| 99 |
+
fi
|
| 100 |
+
ok "uvicorn ready"
|
| 101 |
+
|
| 102 |
+
if ! $PYTHON -m streamlit --version &>/dev/null; then
|
| 103 |
+
warn "streamlit not found β attempting installβ¦"
|
| 104 |
+
$PYTHON -m pip install streamlit --quiet || {
|
| 105 |
+
err "streamlit install failed."
|
| 106 |
+
exit 1
|
| 107 |
+
}
|
| 108 |
+
fi
|
| 109 |
+
STREAMLIT_VER=$($PYTHON -m streamlit --version 2>&1 | awk '{print $3}')
|
| 110 |
+
ok "streamlit $STREAMLIT_VER ready"
|
| 111 |
+
|
| 112 |
+
STREAMLIT_MAJOR=$(echo "$STREAMLIT_VER" | cut -d. -f1)
|
| 113 |
+
STREAMLIT_MINOR=$(echo "$STREAMLIT_VER" | cut -d. -f2)
|
| 114 |
+
if [[ "$STREAMLIT_MAJOR" -lt 1 ]] || { [[ "$STREAMLIT_MAJOR" -eq 1 ]] && [[ "$STREAMLIT_MINOR" -lt 36 ]]; }; then
|
| 115 |
+
warn "Streamlit $STREAMLIT_VER β upgrade to 1.36+ for st.navigation():"
|
| 116 |
+
warn " pip install --upgrade streamlit"
|
| 117 |
+
fi
|
| 118 |
+
|
| 119 |
+
if [[ ! -f "$PROJECT_ROOT/frontend/app.py" ]]; then
|
| 120 |
+
err "frontend/app.py not found at $PROJECT_ROOT/frontend/app.py"
|
| 121 |
+
exit 1
|
| 122 |
+
fi
|
| 123 |
+
ok "frontend/app.py found"
|
| 124 |
+
|
| 125 |
+
if [[ ! -f "$PROJECT_ROOT/backend/main.py" ]]; then
|
| 126 |
+
err "backend/main.py not found at $PROJECT_ROOT/backend/main.py"
|
| 127 |
+
exit 1
|
| 128 |
+
fi
|
| 129 |
+
ok "backend/main.py found"
|
| 130 |
+
|
| 131 |
+
echo ""
|
| 132 |
+
|
| 133 |
+
# ββ Data pipeline βββββββββοΏ½οΏ½βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 134 |
+
if [[ "$FRONTEND_ONLY" == false && "$SKIP_PIPELINE" == false ]]; then
|
| 135 |
+
PROCESSED_FILES=(
|
| 136 |
+
"$PROJECT_ROOT/data/processed/mnrega_cleaned.csv"
|
| 137 |
+
"$PROJECT_ROOT/data/processed/mnrega_predictions.csv"
|
| 138 |
+
"$PROJECT_ROOT/data/processed/optimized_budget_allocation.csv"
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
MISSING=false
|
| 142 |
+
for f in "${PROCESSED_FILES[@]}"; do
|
| 143 |
+
if [[ ! -f "$f" ]]; then
|
| 144 |
+
warn "Missing: $f"
|
| 145 |
+
MISSING=true
|
| 146 |
+
fi
|
| 147 |
+
done
|
| 148 |
+
|
| 149 |
+
if [[ "$MISSING" == true ]]; then
|
| 150 |
+
hr
|
| 151 |
+
info "Processed data not found β running Stage $PIPELINE_STAGE pipelineβ¦"
|
| 152 |
+
info "This may take several minutes on first run."
|
| 153 |
+
hr
|
| 154 |
+
echo ""
|
| 155 |
+
cd "$PROJECT_ROOT" && $PYTHON main.py --stage "$PIPELINE_STAGE" || {
|
| 156 |
+
err "Pipeline failed. Check errors above."
|
| 157 |
+
exit 1
|
| 158 |
+
}
|
| 159 |
+
echo ""
|
| 160 |
+
ok "Pipeline complete"
|
| 161 |
+
hr
|
| 162 |
+
echo ""
|
| 163 |
+
else
|
| 164 |
+
ok "Processed data found β skipping pipeline"
|
| 165 |
+
for f in "${PROCESSED_FILES[@]}"; do
|
| 166 |
+
info " $(basename $f) ($(wc -l <"$f") rows)"
|
| 167 |
+
done
|
| 168 |
+
echo ""
|
| 169 |
+
fi
|
| 170 |
+
fi
|
| 171 |
+
|
| 172 |
+
# ββ Start backend βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 173 |
+
if [[ "$FRONTEND_ONLY" == false ]]; then
|
| 174 |
+
if lsof -i ":$BACKEND_PORT" &>/dev/null 2>&1; then
|
| 175 |
+
warn "Port $BACKEND_PORT already in use β stopping existing processβ¦"
|
| 176 |
+
lsof -ti ":$BACKEND_PORT" | xargs kill -9 2>/dev/null || true
|
| 177 |
+
sleep 1
|
| 178 |
+
fi
|
| 179 |
+
|
| 180 |
+
info "Starting FastAPI backend on port $BACKEND_PORTβ¦"
|
| 181 |
+
# Backend must run from project root so 'backend.main' import resolves
|
| 182 |
+
(cd "$PROJECT_ROOT" && $PYTHON -m uvicorn backend.main:app \
|
| 183 |
+
--host 0.0.0.0 \
|
| 184 |
+
--port "$BACKEND_PORT" \
|
| 185 |
+
--reload \
|
| 186 |
+
--log-level warning \
|
| 187 |
+
2>&1 | sed "s/^/ [backend] /") &
|
| 188 |
+
BACKEND_PID=$!
|
| 189 |
+
|
| 190 |
+
info "Waiting for backend health checkβ¦"
|
| 191 |
+
MAX_WAIT=15
|
| 192 |
+
WAITED=0
|
| 193 |
+
until curl -sf "http://localhost:$BACKEND_PORT/health" &>/dev/null; do
|
| 194 |
+
sleep 1
|
| 195 |
+
WAITED=$((WAITED + 1))
|
| 196 |
+
if [[ $WAITED -ge $MAX_WAIT ]]; then
|
| 197 |
+
warn "Backend health check timed out after ${MAX_WAIT}s β continuing anyway"
|
| 198 |
+
break
|
| 199 |
+
fi
|
| 200 |
+
done
|
| 201 |
+
curl -sf "http://localhost:$BACKEND_PORT/health" &>/dev/null && ok "Backend live β http://localhost:$BACKEND_PORT"
|
| 202 |
+
echo ""
|
| 203 |
+
fi
|
| 204 |
+
|
| 205 |
+
# ββ Start frontend ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 206 |
+
if [[ "$BACKEND_ONLY" == false ]]; then
|
| 207 |
+
if lsof -i ":$FRONTEND_PORT" &>/dev/null 2>&1; then
|
| 208 |
+
warn "Port $FRONTEND_PORT already in use β stopping existing processβ¦"
|
| 209 |
+
lsof -ti ":$FRONTEND_PORT" | xargs kill -9 2>/dev/null || true
|
| 210 |
+
sleep 1
|
| 211 |
+
fi
|
| 212 |
+
|
| 213 |
+
info "Starting Streamlit frontend on port $FRONTEND_PORTβ¦"
|
| 214 |
+
cd "$PROJECT_ROOT/frontend"
|
| 215 |
+
$PYTHON -m streamlit run app.py --server.port "$FRONTEND_PORT" --server.headless true --browser.gatherUsageStats false &
|
| 216 |
+
FRONTEND_PID=$!
|
| 217 |
+
cd "$PROJECT_ROOT"
|
| 218 |
+
|
| 219 |
+
sleep 2
|
| 220 |
+
ok "Frontend live β http://localhost:$FRONTEND_PORT"
|
| 221 |
+
echo ""
|
| 222 |
+
fi
|
| 223 |
+
|
| 224 |
+
# ββ Ready banner ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 225 |
+
hr
|
| 226 |
+
echo ""
|
| 227 |
+
echo -e "${BOLD} β SchemeImpactNet is running${RESET}"
|
| 228 |
+
echo ""
|
| 229 |
+
[[ "$FRONTEND_ONLY" == false ]] && echo -e " ${GREEN}Backend${RESET} http://localhost:$BACKEND_PORT"
|
| 230 |
+
[[ "$FRONTEND_ONLY" == false ]] && echo -e " ${GREEN}API docs${RESET} http://localhost:$BACKEND_PORT/docs"
|
| 231 |
+
[[ "$BACKEND_ONLY" == false ]] && echo -e " ${GREEN}Dashboard${RESET} http://localhost:$FRONTEND_PORT"
|
| 232 |
+
echo ""
|
| 233 |
+
echo -e " ${BOLD}Press Ctrl+C to stop all services${RESET}"
|
| 234 |
+
echo ""
|
| 235 |
+
hr
|
| 236 |
+
echo ""
|
| 237 |
+
|
| 238 |
+
# ββ Keep alive ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 239 |
+
wait
|