sammeeer commited on
Commit
f87e795
Β·
1 Parent(s): c218206

Inital schemeimpactnet deployment

Browse files
.dockerignore ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ .venv/
3
+ wandb/
4
+
5
+ data/schemeimpactnet.db
6
+ data/raw/
7
+
8
+ reports/figures/
9
+
.gitignore ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ .venv/
3
+ wandb/
4
+
5
+ data/schemeimpactnet.db
6
+ data/raw/
7
+
8
+ reports/figures/
9
+
.streamlit/config.toml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [server]
2
+ headless = true
3
+ fileWatcherType = "auto"
4
+
5
+ [browser]
6
+ gatherUsageStats = false
7
+
8
+ [theme]
9
+ base = "light"
10
+ backgroundColor = "#FAF9F7"
11
+ secondaryBackgroundColor = "#F5F5F4"
12
+ textColor = "#1C1917"
13
+ font = "serif"
14
+
15
+ [client]
16
+ showSidebarNavigation = true
Dockerfile CHANGED
@@ -1,20 +1,31 @@
1
- FROM python:3.13.5-slim
 
 
 
2
 
3
  WORKDIR /app
4
 
5
- RUN apt-get update && apt-get install -y \
 
6
  build-essential \
7
  curl \
8
- git \
9
  && rm -rf /var/lib/apt/lists/*
10
 
11
- COPY requirements.txt ./
12
- COPY src/ ./src/
 
 
 
 
13
 
14
- RUN pip3 install -r requirements.txt
 
 
15
 
16
- EXPOSE 8501
17
 
18
- HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
 
19
 
20
- ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
 
1
+ FROM python:3.11-slim
2
+
3
+ # HF Spaces runs as non-root user 1000
4
+ RUN useradd -m -u 1000 appuser
5
 
6
  WORKDIR /app
7
 
8
+ # Install system deps
9
+ RUN apt-get update && apt-get install -y --no-install-recommends \
10
  build-essential \
11
  curl \
 
12
  && rm -rf /var/lib/apt/lists/*
13
 
14
+ # Copy requirements first for layer caching
15
+ COPY requirements.txt .
16
+ RUN pip install --no-cache-dir -r requirements.txt
17
+
18
+ # Copy entire project
19
+ COPY --chown=appuser:appuser . .
20
 
21
+ # Create necessary directories
22
+ RUN mkdir -p data/raw data/processed data/db models reports/figures \
23
+ && chown -R appuser:appuser /app
24
 
25
+ USER appuser
26
 
27
+ # HF Spaces exposes port 7860
28
+ EXPOSE 7860
29
 
30
+ # Entrypoint: generate synthetic data, seed DB, start both services
31
+ CMD ["bash", "hf_start.sh"]
README.md DELETED
@@ -1,19 +0,0 @@
1
- ---
2
- title: SchemeImpactNet
3
- emoji: πŸš€
4
- colorFrom: red
5
- colorTo: red
6
- sdk: docker
7
- app_port: 8501
8
- tags:
9
- - streamlit
10
- pinned: false
11
- short_description: Predictive analysis of government schemes...
12
- ---
13
-
14
- # Welcome to Streamlit!
15
-
16
- Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
17
-
18
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
19
- forums](https://discuss.streamlit.io).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/crud.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ crud.py
3
+ -------
4
+ Database query functions. All queries return plain dicts/lists
5
+ so FastAPI routers stay thin.
6
+
7
+ V3 update: expenditure_lakhs, expenditure_per_personday, demand_fulfillment_rate
8
+ removed β€” these were synthetic columns dropped in the leak-free pipeline.
9
+ """
10
+
11
+ import pandas as pd
12
+ from sqlalchemy.orm import Session
13
+ from sqlalchemy import text
14
+ from typing import Optional, List
15
+
16
+
17
+ # ── Districts ─────────────────────────────────────────────────────────────────
18
+
19
+ def get_states(db: Session) -> List[str]:
20
+ rows = db.execute(text("SELECT DISTINCT state FROM district_data ORDER BY state")).fetchall()
21
+ return [r[0] for r in rows]
22
+
23
+
24
+ def get_districts(db: Session, state: str) -> List[str]:
25
+ rows = db.execute(
26
+ text("SELECT DISTINCT district FROM district_data WHERE state=:s ORDER BY district"),
27
+ {"s": state}
28
+ ).fetchall()
29
+ return [r[0] for r in rows]
30
+
31
+
32
+ def get_district_history(db: Session, state: str, district: str) -> List[dict]:
33
+ rows = db.execute(text("""
34
+ SELECT state, district, financial_year,
35
+ person_days_lakhs, avg_wage_rate
36
+ FROM district_data
37
+ WHERE state=:s AND district=:d
38
+ ORDER BY financial_year
39
+ """), {"s": state, "d": district}).fetchall()
40
+ return [dict(r._mapping) for r in rows]
41
+
42
+
43
+ def get_top_districts(db: Session, state: Optional[str], metric: str, n: int) -> List[dict]:
44
+ # Only allow metrics that actually exist in V3 data
45
+ valid = {"person_days_lakhs"}
46
+ if metric not in valid:
47
+ metric = "person_days_lakhs"
48
+ where = "WHERE state=:s" if state else ""
49
+ params = {"s": state} if state else {}
50
+ rows = db.execute(text(f"""
51
+ SELECT state, district,
52
+ AVG(person_days_lakhs) as avg_persondays
53
+ FROM district_data
54
+ {where}
55
+ GROUP BY state, district
56
+ ORDER BY AVG({metric}) DESC
57
+ LIMIT :n
58
+ """), {**params, "n": n}).fetchall()
59
+ return [dict(r._mapping) for r in rows]
60
+
61
+
62
+ def get_yearly_trend(db: Session, state: Optional[str]) -> List[dict]:
63
+ where = "WHERE state=:s" if state else ""
64
+ params = {"s": state} if state else {}
65
+ rows = db.execute(text(f"""
66
+ SELECT financial_year,
67
+ SUM(person_days_lakhs) as total_persondays,
68
+ AVG(avg_wage_rate) as avg_wage
69
+ FROM district_data
70
+ {where}
71
+ GROUP BY financial_year
72
+ ORDER BY financial_year
73
+ """), params).fetchall()
74
+ return [dict(r._mapping) for r in rows]
75
+
76
+
77
+ def get_stats(db: Session) -> dict:
78
+ r = db.execute(text("""
79
+ SELECT
80
+ COUNT(DISTINCT district) as total_districts,
81
+ COUNT(DISTINCT state) as total_states,
82
+ MIN(financial_year)||' – '||MAX(financial_year) as year_range,
83
+ SUM(person_days_lakhs) as total_persondays_lakhs
84
+ FROM district_data
85
+ """)).fetchone()
86
+ base = dict(r._mapping)
87
+ base["total_expenditure_lakhs"] = 0.0 # removed in V3 (synthetic column)
88
+
89
+ # COVID spike
90
+ pre = db.execute(text("SELECT AVG(person_days_lakhs) FROM district_data WHERE financial_year=2019")).scalar()
91
+ post = db.execute(text("SELECT AVG(person_days_lakhs) FROM district_data WHERE financial_year=2020")).scalar()
92
+ base["covid_spike_pct"] = round((post - pre) / pre * 100, 2) if pre else 0.0
93
+ return base
94
+
95
+
96
+ # ── Predictions ───────────────────────────────────────────────────────────────
97
+
98
+ def get_predictions(
99
+ db: Session,
100
+ state: Optional[str],
101
+ district: Optional[str],
102
+ year: Optional[int]
103
+ ) -> List[dict]:
104
+ clauses, params = [], {}
105
+ if state:
106
+ clauses.append("state=:state"); params["state"] = state
107
+ if district:
108
+ clauses.append("district=:district"); params["district"] = district
109
+ if year:
110
+ clauses.append("financial_year=:year"); params["year"] = year
111
+ where = ("WHERE " + " AND ".join(clauses)) if clauses else ""
112
+ rows = db.execute(text(f"""
113
+ SELECT state, district, financial_year,
114
+ person_days_lakhs, predicted_persondays, prediction_error
115
+ FROM predictions {where}
116
+ ORDER BY state, district, financial_year
117
+ """), params).fetchall()
118
+ return [dict(r._mapping) for r in rows]
119
+
120
+
121
+ # ── Optimizer ─────────────────────────────────────────────────────────────────
122
+
123
+ def get_optimizer_results(db: Session, state: Optional[str]) -> List[dict]:
124
+ where = "WHERE state=:s" if state else ""
125
+ params = {"s": state} if state else {}
126
+ rows = db.execute(text(f"""
127
+ SELECT * FROM optimizer {where}
128
+ ORDER BY persondays_gain DESC
129
+ """), params).fetchall()
130
+ return [dict(r._mapping) for r in rows]
backend/database.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ database.py
3
+ -----------
4
+ SQLite database setup using SQLAlchemy.
5
+ Seeds from processed CSVs on first run.
6
+ """
7
+
8
+ import os
9
+ import pandas as pd
10
+ from sqlalchemy import create_engine, text
11
+ from sqlalchemy.orm import declarative_base, sessionmaker
12
+
13
+ BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
14
+ DB_PATH = os.path.join(BASE_DIR, "data", "schemeimpactnet.db")
15
+ DB_URL = f"sqlite:///{DB_PATH}"
16
+
17
+ engine = create_engine(DB_URL, connect_args={"check_same_thread": False})
18
+ SessionLocal = sessionmaker(bind=engine, autocommit=False, autoflush=False)
19
+ Base = declarative_base()
20
+
21
+
22
+ def get_db():
23
+ db = SessionLocal()
24
+ try:
25
+ yield db
26
+ finally:
27
+ db.close()
28
+
29
+
30
+ def seed_database() -> None:
31
+ """Load processed CSVs into SQLite tables on startup."""
32
+ processed = os.path.join(BASE_DIR, "data", "processed")
33
+
34
+ files = {
35
+ "district_data": os.path.join(processed, "mnrega_cleaned.csv"),
36
+ "predictions": os.path.join(processed, "mnrega_predictions.csv"),
37
+ "optimizer": os.path.join(processed, "optimized_budget_allocation.csv"),
38
+ }
39
+
40
+ with engine.connect() as conn:
41
+ for table, path in files.items():
42
+ if not os.path.exists(path):
43
+ print(f"[db] WARNING: {path} not found, skipping")
44
+ continue
45
+ df = pd.read_csv(path)
46
+ df.to_sql(table, conn, if_exists="replace", index=False)
47
+ print(f"[db] Seeded '{table}': {len(df)} rows")
48
+ conn.commit()
49
+
50
+ print("[db] Database ready βœ“")
backend/main.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ backend/main.py
3
+ ---------------
4
+ FastAPI application entry point.
5
+
6
+ Run with:
7
+ uvicorn backend.main:app --reload --port 8000
8
+ """
9
+
10
+ from fastapi import FastAPI
11
+ from fastapi.middleware.cors import CORSMiddleware
12
+ from backend.database import seed_database
13
+ from backend.routers.districts import router as districts_router
14
+ from backend.routers.predictions import router as predictions_router
15
+ from backend.routers.optimizer import router as optimizer_router
16
+
17
+ app = FastAPI(
18
+ title="SchemeImpactNet API",
19
+ description="MNREGA district-level forecasting and budget optimization",
20
+ version="1.0.0"
21
+ )
22
+
23
+ app.add_middleware(
24
+ CORSMiddleware,
25
+ allow_origins=["*"],
26
+ allow_methods=["*"],
27
+ allow_headers=["*"],
28
+ )
29
+
30
+ @app.on_event("startup")
31
+ def startup():
32
+ seed_database()
33
+
34
+ app.include_router(districts_router)
35
+ app.include_router(predictions_router)
36
+ app.include_router(optimizer_router)
37
+
38
+ @app.get("/")
39
+ def root():
40
+ return {"project": "SchemeImpactNet", "version": "1.0.0", "docs": "/docs"}
41
+
42
+ @app.get("/health")
43
+ def health():
44
+ return {"status": "ok"}
backend/routers/districts.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """routers/districts.py β€” District data endpoints."""
2
+
3
+ from fastapi import APIRouter, Depends, Query
4
+ from sqlalchemy.orm import Session
5
+ from typing import Optional, List
6
+ from backend.database import get_db
7
+ from backend import crud
8
+
9
+ router = APIRouter(prefix="/districts", tags=["Districts"])
10
+
11
+
12
+ @router.get("/states")
13
+ def list_states(db: Session = Depends(get_db)):
14
+ return crud.get_states(db)
15
+
16
+
17
+ @router.get("/list")
18
+ def list_districts(state: str = Query(...), db: Session = Depends(get_db)):
19
+ return crud.get_districts(db, state)
20
+
21
+
22
+ @router.get("/history")
23
+ def district_history(
24
+ state: str = Query(...),
25
+ district: str = Query(...),
26
+ db: Session = Depends(get_db)
27
+ ):
28
+ return crud.get_district_history(db, state, district)
29
+
30
+
31
+ @router.get("/top")
32
+ def top_districts(
33
+ state: Optional[str] = Query(None),
34
+ metric: str = Query("person_days_lakhs"),
35
+ n: int = Query(10),
36
+ db: Session = Depends(get_db)
37
+ ):
38
+ return crud.get_top_districts(db, state, metric, n)
39
+
40
+
41
+ @router.get("/trend")
42
+ def yearly_trend(
43
+ state: Optional[str] = Query(None),
44
+ db: Session = Depends(get_db)
45
+ ):
46
+ return crud.get_yearly_trend(db, state)
47
+
48
+
49
+ @router.get("/stats")
50
+ def stats(db: Session = Depends(get_db)):
51
+ return crud.get_stats(db)
backend/routers/optimizer.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """routers/optimizer.py β€” Budget optimizer endpoints."""
2
+
3
+ from fastapi import APIRouter, Depends, Query
4
+ from sqlalchemy.orm import Session
5
+ from typing import Optional
6
+ from backend.database import get_db
7
+ from backend import crud
8
+ from backend.schemas import OptimizerRequest, OptimizerResponse
9
+
10
+ router = APIRouter(prefix="/optimizer", tags=["Optimizer"])
11
+
12
+
13
+ @router.get("/results")
14
+ def get_optimizer_results(
15
+ state: Optional[str] = Query(None),
16
+ db: Session = Depends(get_db)
17
+ ):
18
+ return crud.get_optimizer_results(db, state)
19
+
20
+
21
+ @router.post("/run", response_model=OptimizerResponse)
22
+ def run_optimizer_live(req: OptimizerRequest, db: Session = Depends(get_db)):
23
+ """
24
+ Run LP optimizer live with custom parameters.
25
+ Reads predictions from DB, runs scipy LP, returns results.
26
+ """
27
+ import numpy as np
28
+ from scipy.optimize import linprog
29
+ from sqlalchemy import text
30
+
31
+ # Get latest year predictions + budget
32
+ state_clause = "AND p.state=:s" if req.state else ""
33
+ params = {"s": req.state} if req.state else {}
34
+
35
+ rows = db.execute(text(f"""
36
+ SELECT p.state, p.district,
37
+ p.predicted_persondays,
38
+ o.budget_allocated_lakhs,
39
+ o.persondays_per_lakh
40
+ FROM predictions p
41
+ JOIN optimizer o ON p.district = o.district AND p.state = o.state
42
+ WHERE p.financial_year = (SELECT MAX(financial_year) FROM predictions)
43
+ {state_clause}
44
+ """), params).fetchall()
45
+
46
+ if not rows:
47
+ return OptimizerResponse(
48
+ scope=req.state or "All-India",
49
+ total_budget_lakhs=0, sq_persondays_total=0,
50
+ opt_persondays_total=0, gain_lakhs=0, gain_pct=0, districts=[]
51
+ )
52
+
53
+ import pandas as pd
54
+ df = pd.DataFrame([dict(r._mapping) for r in rows]).dropna()
55
+
56
+ budgets = df["budget_allocated_lakhs"].values * req.budget_scale
57
+ efficiency = df["persondays_per_lakh"].values
58
+ total_bud = budgets.sum()
59
+
60
+ lb = budgets * req.min_fraction
61
+ ub = budgets * req.max_fraction
62
+
63
+ res = linprog(-efficiency, A_ub=[np.ones(len(df))],
64
+ b_ub=[total_bud], bounds=list(zip(lb, ub)), method="highs")
65
+
66
+ opt_budgets = res.x if res.success else budgets
67
+ sq_total = float((efficiency * budgets).sum())
68
+ opt_total = float((efficiency * opt_budgets).sum())
69
+
70
+ districts_out = []
71
+ for i, row in df.iterrows():
72
+ orig = budgets[df.index.get_loc(i)]
73
+ opt = opt_budgets[df.index.get_loc(i)]
74
+ sq_pd = float(efficiency[df.index.get_loc(i)] * orig)
75
+ opt_pd = float(efficiency[df.index.get_loc(i)] * opt)
76
+ districts_out.append({
77
+ "state": row["state"],
78
+ "district": row["district"],
79
+ "budget_allocated_lakhs": round(orig, 2),
80
+ "optimized_budget": round(opt, 2),
81
+ "budget_change": round(opt - orig, 2),
82
+ "budget_change_pct": round((opt - orig) / orig * 100, 2),
83
+ "sq_persondays": round(sq_pd, 3),
84
+ "opt_persondays": round(opt_pd, 3),
85
+ "persondays_gain": round(opt_pd - sq_pd, 3),
86
+ "persondays_gain_pct": round((opt_pd - sq_pd) / sq_pd * 100, 2) if sq_pd else 0,
87
+ "persondays_per_lakh": round(float(efficiency[df.index.get_loc(i)]), 4),
88
+ })
89
+
90
+ gain = opt_total - sq_total
91
+ return OptimizerResponse(
92
+ scope=req.state or "All-India",
93
+ total_budget_lakhs=round(total_bud, 2),
94
+ sq_persondays_total=round(sq_total, 2),
95
+ opt_persondays_total=round(opt_total, 2),
96
+ gain_lakhs=round(gain, 2),
97
+ gain_pct=round(gain / sq_total * 100, 2) if sq_total else 0,
98
+ districts=districts_out
99
+ )
backend/routers/predictions.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """routers/predictions.py β€” Model prediction endpoints."""
2
+
3
+ from fastapi import APIRouter, Depends, Query
4
+ from sqlalchemy.orm import Session
5
+ from typing import Optional
6
+ from backend.database import get_db
7
+ from backend import crud
8
+
9
+ router = APIRouter(prefix="/predictions", tags=["Predictions"])
10
+
11
+
12
+ @router.get("/")
13
+ def get_predictions(
14
+ state: Optional[str] = Query(None),
15
+ district: Optional[str] = Query(None),
16
+ year: Optional[int] = Query(None),
17
+ db: Session = Depends(get_db)
18
+ ):
19
+ return crud.get_predictions(db, state, district, year)
backend/schemas.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ schemas.py
3
+ ----------
4
+ Pydantic schemas for API request/response validation.
5
+
6
+ V3 update: expenditure_lakhs, expenditure_per_personday, demand_fulfillment_rate
7
+ removed from DistrictSummary β€” synthetic columns dropped in leak-free pipeline.
8
+ """
9
+
10
+ from pydantic import BaseModel
11
+ from typing import Optional, List
12
+
13
+
14
+ class DistrictSummary(BaseModel):
15
+ state: str
16
+ district: str
17
+ financial_year: int
18
+ person_days_lakhs: float
19
+ avg_wage_rate: float
20
+
21
+ class Config:
22
+ from_attributes = True
23
+
24
+
25
+ class PredictionOut(BaseModel):
26
+ state: str
27
+ district: str
28
+ financial_year: int
29
+ person_days_lakhs: float
30
+ predicted_persondays: float
31
+ prediction_error: float
32
+
33
+ class Config:
34
+ from_attributes = True
35
+
36
+
37
+ class OptimizerOut(BaseModel):
38
+ state: str
39
+ district: str
40
+ budget_allocated_lakhs: float
41
+ optimized_budget: float
42
+ budget_change: float
43
+ budget_change_pct: float
44
+ sq_persondays: float
45
+ opt_persondays: float
46
+ persondays_gain: float
47
+ persondays_gain_pct: float
48
+ persondays_per_lakh: float
49
+
50
+ class Config:
51
+ from_attributes = True
52
+
53
+
54
+ class OptimizerRequest(BaseModel):
55
+ state: Optional[str] = None
56
+ budget_scale: float = 1.0
57
+ min_fraction: float = 0.40
58
+ max_fraction: float = 2.50
59
+
60
+
61
+ class OptimizerResponse(BaseModel):
62
+ scope: str
63
+ total_budget_lakhs: float
64
+ sq_persondays_total: float
65
+ opt_persondays_total: float
66
+ gain_lakhs: float
67
+ gain_pct: float
68
+ districts: List[OptimizerOut]
69
+
70
+
71
+ class StatsOut(BaseModel):
72
+ total_districts: int
73
+ total_states: int
74
+ year_range: str
75
+ total_persondays_lakhs: float
76
+ total_expenditure_lakhs: float # kept for API compat, always 0.0 in V3
77
+ covid_spike_pct: float
data/processed/mnrega_cleaned.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/processed/mnrega_predictions.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/processed/optimized_budget_allocation.csv ADDED
@@ -0,0 +1,707 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ state,district,budget_allocated_lakhs,optimized_budget,budget_change,budget_change_pct,sq_persondays,opt_persondays,persondays_gain,persondays_gain_pct,persondays_per_lakh
2
+ Rajasthan,Jhalawar,34513.79,61420.08,26906.29,77.96,142.07,252.825,110.755,77.96,0.004116325677359687
3
+ Rajasthan,Bhilwara,63021.65,96309.49,33287.840000000004,52.82,207.529,317.145,109.616,52.82,0.0032929794761006733
4
+ Rajasthan,Jodhpur,43157.28,72509.08,29351.800000000003,68.01,154.84,260.149,105.309,68.01,0.00358780720193673
5
+ Odisha,Ganjam,22347.28,40981.25,18633.97,83.38,115.335,211.505,96.17,83.38,0.005161030783164663
6
+ Tamil Nadu,Chengalpattu,22102.08,40171.85,18069.769999999997,81.76,103.364,187.87,84.506,81.76,0.00467666391579435
7
+ Andhra Pradesh,Prakasam,44170.36,66462.53,22292.17,50.47,143.216,215.495,72.279,50.47,0.0032423552807810487
8
+ Gujarat,Dohad,28223.4,47928.91,19705.510000000002,69.82,103.06,175.016,71.956,69.82,0.003651579894697308
9
+ Uttar Pradesh,Sitapur,34734.96,55845.88,21110.92,60.78,117.944,189.627,71.683,60.78,0.0033955415523725953
10
+ Tamil Nadu,Ramanathapuram,24985.21,43423.92,18438.71,73.8,96.415,167.568,71.153,73.8,0.0038588829151325926
11
+ Uttar Pradesh,Siddharth Nagar,33779.28,54065.01,20285.730000000003,60.05,114.38,183.069,68.689,60.05,0.003386099407684237
12
+ Tamil Nadu,Dindigul,33099.97,53277.05,20177.08,60.96,112.531,181.128,68.597,60.96,0.003399731178004089
13
+ Rajasthan,Ajmer,64856.7,86446.01,21589.309999999998,33.29,198.554,264.648,66.094,33.29,0.0030614261903550446
14
+ Odisha,Koraput,24902.06,42423.72,17521.66,70.36,92.406,157.425,65.019,70.36,0.0037107773413123254
15
+ Rajasthan,Nagaur,85238.46,106521.25,21282.789999999994,24.97,255.112,318.81,63.698,24.97,0.00299292127051568
16
+ Tamil Nadu,Kanchipuram,20938.58,36656.01,15717.43,75.06,82.417,144.283,61.866,75.06,0.003936131294481287
17
+ Andhra Pradesh,Kurnool,29973.18,47160.09,17186.909999999996,57.34,100.049,157.418,57.369,57.34,0.003337950794677108
18
+ Rajasthan,Bikaner,34206.5,51717.5,17511.0,51.19,111.376,168.392,57.016,51.19,0.0032559893587476066
19
+ Rajasthan,Banswara,60504.32,78675.2,18170.879999999997,30.03,183.15,238.154,55.004,30.03,0.003027056580422687
20
+ Maharashtra,Palghar,21062.69,35501.96,14439.27,68.55,76.026,128.145,52.119,68.55,0.0036095104661370415
21
+ Odisha,Kendujhar,22443.48,37179.9,14736.420000000002,65.66,78.262,129.649,51.387,65.66,0.0034870706325400517
22
+ Andhra Pradesh,Nellore,33897.52,49717.74,15820.220000000001,46.67,108.913,159.743,50.83,46.67,0.0032130079132632714
23
+ Tamil Nadu,Sivagangai,28850.77,44141.83,15291.060000000001,53.0,95.105,145.511,50.406,53.0,0.0032964458140978562
24
+ Tamil Nadu,Pudukkottai,42900.64,59043.35,16142.71,37.63,132.89,182.894,50.004,37.63,0.0030976227860470143
25
+ Tamil Nadu,Tirupathur,14446.82,25866.05,11419.23,79.04,61.585,110.264,48.679,79.04,0.0042628758439573556
26
+ Madhya Pradesh,Balaghat,29396.28,44285.34,14889.059999999998,50.65,95.607,144.031,48.424,50.65,0.0032523502973845673
27
+ Tamil Nadu,Karur,15300.44,27117.68,11817.24,77.23,62.237,110.306,48.069,77.24,0.004067660799297275
28
+ Uttar Pradesh,Kheri,26011.01,40361.48,14350.470000000005,55.17,86.324,133.95,47.626,55.17,0.0033187484838151232
29
+ Odisha,Bolangir,28454.31,43072.11,14617.8,51.37,92.675,140.285,47.61,51.37,0.0032569758324837252
30
+ Tamil Nadu,Vellore,21076.41,34419.69,13343.280000000002,63.31,72.252,117.994,45.742,63.31,0.003428098048956155
31
+ Tamil Nadu,Nagapattinam,11359.29,20728.36,9369.07,82.48,55.317,100.942,45.625,82.48,0.004869758585263691
32
+ Andhra Pradesh,Palnadu,25215.12,38852.88,13637.759999999998,54.09,83.554,128.745,45.191,54.09,0.0033136467325953637
33
+ Andhra Pradesh,Anantapur,37192.75,51524.01,14331.260000000002,38.53,115.475,159.97,44.495,38.53,0.0031047717633140865
34
+ Tamil Nadu,Thoothukkudi,21805.35,34860.85,13055.5,59.87,73.829,118.033,44.204,59.87,0.0033858204523201873
35
+ Chhattisgarh,Kanker,18867.61,31119.62,12252.009999999998,64.94,65.337,107.765,42.428,64.94,0.003462918726855177
36
+ Maharashtra,Parbhani,21829.87,34110.45,12280.579999999998,56.26,72.635,113.496,40.861,56.26,0.0033273216927082027
37
+ Uttar Pradesh,Prayagraj,20807.27,32926.51,12119.240000000002,58.25,69.884,110.588,40.704,58.25,0.0033586337852106497
38
+ Assam,Dhubri,9215.78,16916.9,7701.120000000001,83.56,48.286,88.636,40.35,83.56,0.0052394913941088
39
+ Andhra Pradesh,Tirupati,33787.19,46622.89,12835.699999999997,37.99,104.842,144.671,39.829,37.99,0.003103010342085269
40
+ Rajasthan,Karauli,8015.52,14858.61,6843.09,85.37,46.365,85.948,39.583,85.37,0.005784403257680101
41
+ Madhya Pradesh,Dindori,31667.27,44213.05,12545.780000000002,39.62,99.078,138.33,39.252,39.62,0.0031287193370315787
42
+ Tamil Nadu,Tenkasi,13610.81,23532.3,9921.49,72.89,52.101,90.08,37.979,72.89,0.003827913254244237
43
+ Odisha,Nabarangapur,17937.75,28807.36,10869.61,60.6,60.902,97.806,36.904,60.6,0.0033951861298100374
44
+ Chhattisgarh,Rajnandagon,17478.81,28291.6,10812.789999999997,61.86,59.602,96.473,36.871,61.86,0.0034099575428762022
45
+ Uttar Pradesh,Fatehpur,14174.0,24121.52,9947.52,70.18,52.156,88.76,36.604,70.18,0.003679695216593763
46
+ Odisha,Dhenkanal,10094.64,18219.82,8125.18,80.49,45.315,81.789,36.474,80.49,0.004489015953020613
47
+ Tamil Nadu,Tiruvarur,18963.31,29562.69,10599.379999999997,55.89,63.079,98.336,35.257,55.89,0.003326370765441265
48
+ Uttar Pradesh,Sant Kabeer Nagar,21731.81,32581.65,10849.84,49.93,70.258,105.335,35.077,49.93,0.0032329566658276503
49
+ Tamil Nadu,Tirunelveli,12376.02,21419.81,9043.79,73.08,47.512,82.231,34.719,73.07,0.003839037105628465
50
+ Tamil Nadu,Erode,18763.56,29149.49,10385.93,55.35,62.324,96.821,34.497,55.35,0.003321544525665705
51
+ Himachal Pradesh,Mandi,36291.24,47584.16,11292.920000000006,31.12,110.375,144.721,34.346,31.12,0.0030413675586725615
52
+ Odisha,Mayurbhanj,35253.13,46478.04,11224.910000000003,31.84,107.416,141.618,34.202,31.84,0.0030469918557586234
53
+ Karnataka,Vijayanagara,22187.48,32662.92,10475.439999999999,47.21,71.342,105.025,33.683,47.21,0.0032154169829110833
54
+ Rajasthan,Alwar,10564.01,18780.4,8216.390000000001,77.78,43.177,76.759,33.582,77.78,0.0040871790163015745
55
+ Uttar Pradesh,Banda,10978.28,19377.9,8399.62,76.51,43.867,77.43,33.563,76.51,0.003995798977617622
56
+ Karnataka,Belagavi,35936.08,46923.51,10987.43,30.57,109.043,142.383,33.34,30.58,0.003034359896794531
57
+ Maharashtra,Hingoli,11616.64,20231.56,8614.920000000002,74.16,44.881,78.165,33.284,74.16,0.0038635095862486917
58
+ Odisha,Jajpur,4483.12,8415.9,3932.7799999999997,87.72,36.597,68.701,32.104,87.72,0.008163288067238888
59
+ Rajasthan,Tonk,12727.92,21545.47,8817.550000000001,69.28,46.331,78.428,32.097,69.28,0.0036401077316639326
60
+ Andhra Pradesh,Bapatla,22944.75,33030.79,10086.04,43.96,72.956,105.026,32.07,43.96,0.003179638043561163
61
+ Jharkhand,Giridih,31209.94,41542.57,10332.630000000001,33.11,95.496,127.112,31.616,33.11,0.0030597944116521852
62
+ Uttar Pradesh,Amethi,13338.55,22217.28,8878.73,66.56,47.308,78.798,31.49,66.56,0.003546712348793535
63
+ Uttar Pradesh,Barabanki,27689.45,37858.09,10168.639999999996,36.72,85.624,117.068,31.444,36.72,0.003092296885636948
64
+ Assam,Barpeta,5231.46,9754.48,4523.0199999999995,86.46,35.876,66.894,31.018,86.46,0.006857741433557744
65
+ Tamil Nadu,Krishnagiri,34206.25,44293.61,10087.36,29.49,103.378,133.864,30.486,29.49,0.00302219623606797
66
+ Odisha,Angul,8309.35,15027.6,6718.25,80.85,37.567,67.941,30.374,80.85,0.004521051586465849
67
+ Uttar Pradesh,Rae Bareli,12541.8,20980.9,8439.100000000002,67.29,44.797,74.94,30.143,67.29,0.0035718158478049403
68
+ Tamil Nadu,Tiruppur,19337.98,28677.91,9339.93,48.3,62.249,92.314,30.065,48.3,0.00321900219154224
69
+ Uttar Pradesh,Balrampur,16982.83,26075.96,9093.129999999997,53.54,56.06,86.076,30.016,53.54,0.0033009810496837095
70
+ Uttar Pradesh,Jhansi,13783.26,22434.53,8651.269999999999,62.77,47.131,76.713,29.582,62.77,0.0034194377817729623
71
+ Uttar Pradesh,Ballia,13814.85,22460.97,8646.12,62.59,47.224,76.779,29.555,62.58,0.003418350543075024
72
+ Andhra Pradesh,Y.S.R,25967.38,35456.65,9489.27,36.54,80.296,109.639,29.343,36.54,0.0030921871979383364
73
+ Rajasthan,Jalore,22667.38,31934.59,9267.21,40.88,71.417,100.615,29.198,40.88,0.0031506508471645157
74
+ Tamil Nadu,Mayiladuthurai,22279.07,31306.94,9027.869999999999,40.52,69.986,98.346,28.36,40.52,0.003141333996437015
75
+ Madhya Pradesh,Bhind,1558.31,2973.23,1414.92,90.8,30.47,58.136,27.666,90.8,0.019553233952166127
76
+ Rajasthan,Sikar,11102.46,18613.22,7510.760000000002,67.65,39.731,66.609,26.878,67.65,0.0035785762794912123
77
+ Rajasthan,Dholpur,4089.41,7662.02,3572.6100000000006,87.36,30.37,56.902,26.532,87.36,0.007426499177142914
78
+ Rajasthan,Jaipur,15021.74,23037.68,8015.9400000000005,53.36,49.572,76.025,26.453,53.36,0.0033000171751075445
79
+ Odisha,Bargarh,10240.79,17353.84,7113.049999999999,69.46,37.278,63.171,25.893,69.46,0.0036401488557035147
80
+ Jharkhand,Sahebganj,8898.15,15529.22,6631.07,74.52,34.723,60.599,25.876,74.52,0.0039022718205469677
81
+ Madhya Pradesh,Rajgarh,19739.9,27881.66,8141.759999999998,41.25,62.223,87.887,25.664,41.25,0.0031521436278805865
82
+ Kerala,Palakkad,26115.51,34289.23,8173.720000000005,31.3,79.43,104.29,24.86,31.3,0.003041487606407074
83
+ Rajasthan,Sawai Madhopur,5839.71,10698.51,4858.8,83.2,29.433,53.922,24.489,83.2,0.005040147541573126
84
+ Odisha,Rayagada,14561.44,22068.4,7506.960000000001,51.55,47.501,71.99,24.489,51.55,0.0032621086925468906
85
+ Odisha,Bhadrak,5067.22,9356.6,4289.38,84.65,28.68,52.957,24.277,84.65,0.005659908194236681
86
+ Andhra Pradesh,Guntur,8888.24,15270.81,6382.57,71.81,33.552,57.645,24.093,71.81,0.003774875565916312
87
+ Telangana,Mahabubabad,15590.57,22895.0,7304.43,46.85,50.113,73.592,23.479,46.85,0.0032143148069634403
88
+ Assam,Nalbari,6273.44,11379.66,5106.22,81.39,28.501,51.699,23.198,81.39,0.004543121477211865
89
+ Uttar Pradesh,Chitrakoot,6662.45,11916.61,5254.160000000001,78.86,28.349,50.706,22.357,78.86,0.0042550413136308715
90
+ Kerala,Thrissur,15471.21,22467.9,6996.690000000002,45.22,49.421,71.771,22.35,45.22,0.003194384925290265
91
+ Chhattisgarh,Gariyaband,18272.41,25379.3,7106.889999999999,38.89,56.97,79.128,22.158,38.89,0.0031178153292313383
92
+ Maharashtra,Chandrapur,22202.39,29432.44,7230.049999999999,32.56,67.848,89.942,22.094,32.56,0.0030558872265553393
93
+ Kerala,Kottayam,11619.93,18240.88,6620.950000000001,56.98,38.75,60.829,22.079,56.98,0.003334787731079275
94
+ Andhra Pradesh,West Godavari,12840.14,19552.6,6712.459999999999,52.28,42.123,64.144,22.021,52.28,0.0032805717071620714
95
+ Uttar Pradesh,Ambedkar Nagar,14554.18,21399.36,6845.18,47.03,46.794,68.802,22.008,47.03,0.003215158806610884
96
+ Karnataka,Bagalkote,14315.73,21152.33,6836.600000000002,47.76,46.059,68.055,21.996,47.76,0.0032173699839267715
97
+ Odisha,Nayagarh,6035.34,10904.1,4868.76,80.67,27.266,49.262,21.996,80.67,0.004517723939330675
98
+ Assam,Darrang,9428.32,15653.07,6224.75,66.02,33.135,55.011,21.876,66.02,0.0035144118994688343
99
+ Telangana,Sangareddy,17704.1,24621.97,6917.870000000003,39.07,55.268,76.864,21.596,39.08,0.003121762755519908
100
+ Madhya Pradesh,Chhatarpur,13676.71,20356.55,6679.84,48.84,44.083,65.614,21.531,48.84,0.0032232166946582915
101
+ Uttar Pradesh,Aligarh,10684.99,17043.76,6358.769999999999,59.51,36.134,57.638,21.504,59.51,0.003381753281940367
102
+ Tamil Nadu,Perambalur,12799.26,19328.31,6529.050000000001,51.01,41.663,62.916,21.253,51.01,0.003255110061050404
103
+ Madhya Pradesh,Rewa,8667.3,14546.36,5879.060000000001,67.83,31.095,52.187,21.092,67.83,0.0035876224429753213
104
+ Kerala,Ernakulam,14145.08,20695.53,6550.449999999999,46.31,45.419,66.452,21.033,46.31,0.00321093977552619
105
+ Karnataka,Davanagere,9489.86,15532.13,6042.269999999999,63.67,32.601,53.358,20.757,63.67,0.0034353509956943514
106
+ Uttar Pradesh,Mau,15310.59,21846.99,6536.4000000000015,42.69,48.487,69.187,20.7,42.69,0.003166892980610153
107
+ Uttar Pradesh,Pilibhit,11020.85,17200.79,6179.9400000000005,56.07,36.668,57.23,20.562,56.08,0.0033271480874887144
108
+ Madhya Pradesh,Shivpuri,25035.6,31739.39,6703.790000000001,26.78,75.217,95.358,20.141,26.78,0.0030044017319337266
109
+ Uttar Pradesh,Kashganj,7366.96,12683.76,5316.8,72.17,27.865,47.975,20.11,72.17,0.003782428572979899
110
+ Uttar Pradesh,Gonda,16865.32,23302.93,6437.610000000001,38.17,52.354,72.338,19.984,38.17,0.003104239943268198
111
+ Madhya Pradesh,Satna,10580.94,16571.61,5990.67,56.62,35.268,55.236,19.968,56.62,0.0033331632161225752
112
+ Chhattisgarh,Durg,13500.48,19703.59,6203.110000000001,45.95,43.314,63.216,19.902,45.95,0.0032083303704757166
113
+ Bihar,Sitamarhi,24346.93,30954.38,6607.450000000001,27.14,73.172,93.03,19.858,27.14,0.0030053891804839457
114
+ Uttar Pradesh,Varanasi,6283.68,11102.77,4819.09,76.69,25.208,44.541,19.333,76.69,0.004011661956051231
115
+ Andhra Pradesh,Krishna,25808.32,32158.93,6350.610000000001,24.61,77.234,96.239,19.005,24.61,0.002992600835699495
116
+ Uttar Pradesh,Amroha,12087.7,17882.14,5794.439999999999,47.94,38.897,57.543,18.646,47.94,0.0032178991867766404
117
+ Jharkhand,Jamtara,16680.67,22715.96,6035.290000000001,36.18,51.466,70.087,18.621,36.18,0.003085367674080238
118
+ Chhattisgarh,Dhamtari,12806.5,18621.26,5814.759999999998,45.4,40.929,59.513,18.584,45.41,0.0031959551790106586
119
+ Jammu and Kashmir,Kishtwar,10612.18,16198.31,5586.129999999999,52.64,34.874,53.231,18.357,52.64,0.003286223942677188
120
+ Nagaland,Dimapur,3211.34,5976.19,2764.8499999999995,86.1,20.958,39.002,18.044,86.1,0.006526247610031948
121
+ Kerala,Pathanamthitta,10115.32,15567.96,5452.639999999999,53.9,33.417,51.43,18.013,53.9,0.003303602851911754
122
+ Uttarakhand,Tehri Garhwal,8177.31,13369.08,5191.7699999999995,63.49,28.057,45.87,17.813,63.49,0.0034310794136458074
123
+ Assam,Bongaigaon,2252.34,4236.33,1983.9899999999998,88.09,19.666,36.989,17.323,88.09,0.008731363826065337
124
+ Uttar Pradesh,Unnao,13438.3,18908.04,5469.740000000002,40.7,42.253,59.451,17.198,40.7,0.003144222111427785
125
+ Uttar Pradesh,Mirzapur,14179.63,19669.03,5489.4,38.71,44.175,61.277,17.102,38.71,0.0031153845340111132
126
+ Odisha,Cuttack,2650.32,4960.91,2310.5899999999997,87.18,19.577,36.645,17.068,87.18,0.007386655196353648
127
+ Andhra Pradesh,Ntr,25273.44,30989.65,5716.210000000003,22.62,75.382,92.431,17.049,22.62,0.0029826568919782987
128
+ Madhya Pradesh,Sidhi,13475.68,18887.52,5411.84,40.16,42.249,59.216,16.967,40.16,0.0031352035667216796
129
+ Jammu and Kashmir,Shopian,5659.65,9938.74,4279.09,75.61,22.312,39.181,16.869,75.61,0.003942293251349466
130
+ Bihar,Gopalganj,12758.18,18066.46,5308.279999999999,41.61,40.227,56.964,16.737,41.61,0.003153035934592551
131
+ Madhya Pradesh,Tikamgarh,7415.88,12204.69,4788.81,64.58,25.663,42.235,16.572,64.58,0.0034605468265398037
132
+ Chhattisgarh,Korea,6020.12,10386.66,4366.54,72.53,22.84,39.406,16.566,72.53,0.003793944306758005
133
+ Maharashtra,Latur,22446.52,27929.3,5482.779999999999,24.43,67.103,83.494,16.391,24.43,0.002989461172600474
134
+ Himachal Pradesh,Kullu,11981.7,17140.26,5158.559999999998,43.05,37.981,54.333,16.352,43.05,0.0031699174574559537
135
+ Madhya Pradesh,Jhabua,11658.77,16741.56,5082.790000000001,43.6,37.018,53.156,16.138,43.6,0.0031751205315826625
136
+ Uttar Pradesh,Bareilly,13486.6,18658.92,5172.319999999998,38.35,41.867,57.924,16.057,38.35,0.003104340604748417
137
+ Uttar Pradesh,Kanpur Dehat,7449.55,12098.43,4648.88,62.4,25.46,41.348,15.888,62.4,0.003417656100032888
138
+ Maharashtra,Nandurbar,11131.85,16065.45,4933.6,44.32,35.428,51.13,15.702,44.32,0.0031825797149620232
139
+ Madhya Pradesh,Dewas,7487.25,12091.95,4604.700000000001,61.5,25.519,41.213,15.694,61.5,0.0034083274900664462
140
+ Uttar Pradesh,Farrukhabad,7500.79,12100.25,4599.46,61.32,25.523,41.174,15.651,61.32,0.003402708248064537
141
+ Uttar Pradesh,Etawah,7791.32,12413.94,4622.620000000001,59.33,26.343,41.972,15.629,59.33,0.0033810702166000113
142
+ Uttar Pradesh,Lalitpur,15202.69,20290.81,5088.120000000001,33.47,46.549,62.128,15.579,33.47,0.0030618923361589298
143
+ Andhra Pradesh,Visakhapatanam,5614.6,9697.17,4082.5699999999997,72.71,21.379,36.924,15.545,72.71,0.0038077512200334843
144
+ Nagaland,Kohima,1107.67,2109.41,1001.7399999999998,90.44,17.134,32.629,15.495,90.43,0.015468505962967309
145
+ Karnataka,Gadag,10844.21,15689.55,4845.34,44.68,34.581,50.032,15.451,44.68,0.0031888906614681942
146
+ Uttar Pradesh,Lucknow,3488.61,6416.47,2927.86,83.93,18.406,33.853,15.447,83.92,0.005276026841636067
147
+ Karnataka,Yadgir,14626.58,19654.15,5027.5700000000015,34.37,44.896,60.328,15.432,34.37,0.0030694803569939113
148
+ Bihar,Arwal,6966.35,11439.68,4473.33,64.21,23.964,39.352,15.388,64.21,0.003439964974484486
149
+ Assam,Lakhimpur,8387.74,12984.98,4597.24,54.81,27.821,43.069,15.248,54.81,0.0033168648527493703
150
+ Kerala,Idukki,18292.61,23356.24,5063.630000000001,27.68,55.061,70.303,15.242,27.68,0.0030100133332531553
151
+ Himachal Pradesh,Shimla,15169.78,20082.28,4912.499999999998,32.38,46.348,61.357,15.009,32.38,0.003055284915140496
152
+ Assam,Nagaon,14691.04,19501.64,4810.5999999999985,32.75,44.895,59.596,14.701,32.75,0.0030559443034666026
153
+ Rajasthan,Dausa,5954.55,10025.85,4071.3,68.37,21.453,36.121,14.668,68.37,0.003602791142907524
154
+ Telangana,Nizamabad,17319.99,22177.04,4857.049999999999,28.04,52.188,66.823,14.635,28.04,0.003013165711989441
155
+ Assam,South Salmara-Mankachar,2479.84,4619.38,2139.54,86.28,16.95,31.574,14.624,86.28,0.006835118394735143
156
+ Jharkhand,Gumla,10877.43,15481.88,4604.449999999999,42.33,34.387,48.943,14.556,42.33,0.003161316597762523
157
+ Tripura,South Tripura,14826.45,19600.95,4774.5,32.2,45.189,59.741,14.552,32.2,0.003047863783980656
158
+ Uttar Pradesh,Sonbhadra,15638.71,20363.67,4724.959999999999,30.21,47.388,61.705,14.317,30.21,0.00303017320482316
159
+ Madhya Pradesh,Singrauli,12966.41,17587.48,4621.07,35.64,39.947,54.184,14.237,35.64,0.003080806483830143
160
+ Assam,Goalpara,5604.63,9467.09,3862.46,68.92,20.334,34.347,14.013,68.91,0.0036280717906445204
161
+ Chhattisgarh,Balod,16100.06,20702.35,4602.289999999999,28.59,48.606,62.5,13.894,28.58,0.003018994960267229
162
+ Jharkhand,Godda,12771.04,17276.29,4505.25,35.28,39.341,53.219,13.878,35.28,0.0030804852228166224
163
+ Jammu and Kashmir,Doda,14984.69,19539.15,4554.460000000001,30.39,45.412,59.215,13.803,30.4,0.003030559858095162
164
+ Uttar Pradesh,Kaushambi,6734.04,10802.44,4068.4000000000005,60.42,22.824,36.613,13.789,60.41,0.0033893472566245524
165
+ Telangana,Wanaparthy,8460.08,12699.16,4239.08,50.11,27.362,41.072,13.71,50.11,0.0032342483759018824
166
+ Maharashtra,Nagpur,10300.81,14605.28,4304.470000000001,41.79,32.5,46.081,13.581,41.79,0.0031550916869644233
167
+ Nagaland,Tuensang,3000.07,5523.34,2523.27,84.11,16.012,29.479,13.467,84.11,0.0053372087984613695
168
+ Odisha,Kendrapara,3344.96,6097.81,2752.8500000000004,82.3,16.251,29.625,13.374,82.3,0.00485835406103511
169
+ Madhya Pradesh,Ratlam,9117.03,13273.08,4156.049999999999,45.59,29.206,42.52,13.314,45.59,0.0032034555112794407
170
+ Telangana,Medak,15931.06,20283.36,4352.300000000001,27.32,47.9,60.986,13.086,27.32,0.0030067051407753156
171
+ Madhya Pradesh,Sagar,11895.11,16112.87,4217.76,35.46,36.643,49.636,12.993,35.46,0.003080509553925941
172
+ Madhya Pradesh,Barwani,8970.35,12994.65,4024.2999999999993,44.86,28.641,41.49,12.849,44.86,0.003192852006889363
173
+ Gujarat,Tapi,3899.74,6954.01,3054.2700000000004,78.32,16.375,29.2,12.825,78.32,0.0041989978819100765
174
+ Meghalaya,West Khasi Hills,3881.82,6915.03,3033.2099999999996,78.14,16.229,28.91,12.681,78.14,0.004180770875517154
175
+ Punjab,Ludhiana,6475.7,10224.06,3748.3599999999997,57.88,21.668,34.21,12.542,57.88,0.0033460475315409918
176
+ Rajasthan,Jhunjhunu,6332.88,10044.39,3711.5099999999993,58.61,21.317,33.81,12.493,58.61,0.0033660830459443414
177
+ Madhya Pradesh,Shahdol,11662.84,15713.87,4051.0300000000007,34.73,35.823,48.266,12.443,34.73,0.00307155032564967
178
+ Uttar Pradesh,Deoria,14311.2,18376.25,4065.0499999999993,28.4,43.132,55.384,12.252,28.41,0.0030138632679300127
179
+ Odisha,Jagatsinghapur,3460.13,6232.67,2772.54,80.13,15.256,27.48,12.224,80.13,0.004409082895729351
180
+ Bihar,Sheohar,6402.54,10062.24,3659.7,57.16,21.352,33.557,12.205,57.16,0.003334926451064734
181
+ Odisha,Baleshwar,8882.63,12722.99,3840.3600000000006,43.23,28.177,40.359,12.182,43.23,0.0031721460873637653
182
+ Uttar Pradesh,Kanpur Nagar,3931.49,6953.74,3022.25,76.87,15.83,27.999,12.169,76.87,0.004026463249302428
183
+ Tripura,Khowai,14176.43,18177.57,4001.1399999999994,28.22,42.721,54.779,12.058,28.22,0.0030135231507509293
184
+ Nagaland,Phek,1343.53,2536.71,1193.18,88.81,13.513,25.514,12.001,88.81,0.010057832724241364
185
+ Tripura,Sepahijala,13196.1,17135.34,3939.24,29.85,39.933,51.854,11.921,29.85,0.0030261213540364196
186
+ Odisha,Khordha,3048.73,5552.28,2503.5499999999997,82.12,14.453,26.321,11.868,82.11,0.004740662505371088
187
+ Uttar Pradesh,Etah,10481.7,14255.16,3773.459999999999,36.0,32.324,43.961,11.637,36.0,0.0030838509020483314
188
+ Odisha,Puri,5557.95,8956.02,3398.0700000000006,61.14,18.896,30.449,11.553,61.14,0.003399814679872975
189
+ Gujarat,Mahisagar,5123.16,8440.71,3317.5499999999993,64.76,17.733,29.216,11.483,64.75,0.0034613402665542364
190
+ Assam,Sribhumi,8827.24,12452.09,3624.8500000000004,41.06,27.822,39.247,11.425,41.06,0.003151834548511199
191
+ Puducherry,Pondicherry,2212.97,4106.25,1893.2800000000002,85.55,13.259,24.603,11.344,85.56,0.0059914955918968634
192
+ Himachal Pradesh,Sirmaur,7647.51,11175.17,3527.66,46.13,24.536,35.854,11.318,46.13,0.0032083645526452403
193
+ Gujarat,Narmada,10815.13,14454.36,3639.2300000000014,33.65,33.129,44.277,11.148,33.65,0.003063208671555497
194
+ Karnataka,Haveri,14379.75,18022.17,3642.4199999999983,25.33,43.054,53.96,10.906,25.33,0.0029940715241920063
195
+ Nagaland,Peren,3174.85,5695.84,2520.9900000000002,79.41,13.718,24.611,10.893,79.41,0.004320834055152212
196
+ Punjab,Moga,3582.72,6310.95,2728.23,76.15,14.243,25.089,10.846,76.15,0.003975471150410861
197
+ Karnataka,Dharwar,7112.71,10483.72,3371.0099999999993,47.39,22.879,33.722,10.843,47.39,0.003216636134469141
198
+ Meghalaya,West Jaintia Hills,4522.22,7540.59,3018.37,66.75,16.099,26.844,10.745,66.74,0.003559977179349965
199
+ Karnataka,Mandya,9428.79,12857.3,3428.5099999999984,36.36,29.144,39.741,10.597,36.36,0.0030909586489888943
200
+ Nagaland,Zunheboto,2292.32,4224.47,1932.15,84.29,12.53,23.091,10.561,84.29,0.005466078034480351
201
+ Arunachal Pradesh,Kra-Daadi,4983.38,8075.22,3091.84,62.04,17.02,27.58,10.56,62.04,0.003415352632149264
202
+ Madhya Pradesh,Niwari,3872.87,6660.95,2788.08,71.99,14.623,25.15,10.527,71.99,0.0037757528654460385
203
+ Odisha,Jharsuguda,3890.94,6663.88,2772.94,71.27,14.582,24.974,10.392,71.27,0.0037476805090800682
204
+ Bihar,Sheikhpura,6078.73,9234.53,3155.800000000001,51.92,19.874,30.192,10.318,51.92,0.003269432924311493
205
+ Chhattisgarh,Sukma,4038.09,6850.17,2812.08,69.64,14.718,24.967,10.249,69.64,0.003644792463763809
206
+ Tripura,North Tripura,12865.36,16263.79,3398.4300000000003,26.42,38.608,48.806,10.198,26.41,0.003000926518962547
207
+ Nagaland,Mokokchung,3842.4,6573.8,2731.4,71.09,14.309,24.481,10.172,71.09,0.0037239745992088276
208
+ Odisha,Malkangiri,12477.78,15841.53,3363.75,26.96,37.491,47.598,10.107,26.96,0.0030046210143150464
209
+ Madhya Pradesh,Gwalior,5057.16,8039.3,2982.1400000000003,58.97,17.024,27.063,10.039,58.97,0.003366316272374218
210
+ Telangana,Mahabubnagar,11128.87,14430.88,3302.0099999999984,29.67,33.673,43.664,9.991,29.67,0.003025733969396713
211
+ Jharkhand,Pakur,9536.88,12780.48,3243.6000000000004,34.01,29.252,39.201,9.949,34.01,0.0030672505054063805
212
+ Mizoram,Lunglei,6347.94,9436.85,3088.9100000000008,48.66,20.444,30.392,9.948,48.66,0.0032205723431538423
213
+ Assam,Jorhat,4964.01,7900.2,2936.1899999999996,59.15,16.777,26.701,9.924,59.15,0.00337972727693941
214
+ Jharkhand,Ramgarh,6137.5,9190.61,3053.1100000000006,49.75,19.84,29.709,9.869,49.74,0.0032325865580448065
215
+ Meghalaya,South Garo Hills,9418.37,12604.63,3186.2599999999984,33.83,28.861,38.625,9.764,33.83,0.0030643306644355657
216
+ Madhya Pradesh,Vidisha,8923.06,12054.72,3131.66,35.1,27.434,37.062,9.628,35.1,0.0030745058309593347
217
+ Bihar,Khagaria,6666.11,9668.73,3002.62,45.04,21.293,30.884,9.591,45.04,0.0031942167170958776
218
+ Punjab,Hoshiarpur,4430.25,7226.99,2796.74,63.13,15.184,24.769,9.585,63.13,0.0034273460865639634
219
+ Chhattisgarh,Dantewada,3361.18,5835.6,2474.4200000000005,73.62,12.969,22.516,9.547,73.61,0.003858466371928906
220
+ Jharkhand,Lohardaga,4142.99,6855.79,2712.8,65.48,14.429,23.877,9.448,65.48,0.0034827503807636517
221
+ Telangana,Rangareddy,8245.53,11288.52,3042.99,36.9,25.502,34.913,9.411,36.9,0.0030928272651970217
222
+ Uttar Pradesh,Kannauj,9807.18,12894.4,3087.2199999999993,31.48,29.848,39.244,9.396,31.48,0.0030434844675023805
223
+ Uttar Pradesh,Mahoba,5033.44,7837.74,2804.3,55.71,16.732,26.054,9.322,55.71,0.0033241679646524047
224
+ Assam,Bajali,1271.6,2389.4,1117.8000000000002,87.91,10.582,19.884,9.302,87.9,0.00832179930795848
225
+ Jammu and Kashmir,Kathua,3457.39,5927.61,2470.22,71.45,13.005,22.297,9.292,71.45,0.003761507958315377
226
+ Himachal Pradesh,Solan,4048.04,6691.35,2643.3100000000004,65.3,14.078,23.271,9.193,65.3,0.0034777324334739775
227
+ Assam,Sivasagar,5089.67,7860.86,2771.1899999999996,54.45,16.88,26.071,9.191,54.45,0.0033165215033587638
228
+ Manipur,Kangpokpi,2730.61,4879.09,2148.48,78.68,11.553,20.643,9.09,78.68,0.004230922760848309
229
+ Uttarakhand,Pauri Garhwal,4913.98,7642.83,2728.8500000000004,55.53,16.327,25.394,9.067,55.53,0.003322561345386022
230
+ Jharkhand,Simdega,9968.62,12872.29,2903.67,29.13,30.106,38.875,8.769,29.13,0.0030200770016311183
231
+ Maharashtra,Sangli,3172.43,5467.74,2295.31,72.35,12.031,20.736,8.705,72.35,0.0037923610607641465
232
+ Madhya Pradesh,Burhanpur,4455.28,7042.21,2586.9300000000003,58.06,14.921,23.585,8.664,58.07,0.0033490599917401376
233
+ Himachal Pradesh,Hamirpur,6272.55,8995.8,2723.249999999999,43.42,19.902,28.543,8.641,43.42,0.003172872276825215
234
+ Jammu and Kashmir,Reasi,2091.67,3828.21,1736.54,83.02,10.382,19.001,8.619,83.02,0.004963498066138539
235
+ Karnataka,Dakshina Kannada,4426.96,6981.43,2554.4700000000003,57.7,14.794,23.331,8.537,57.71,0.0033417966279342937
236
+ Maharashtra,Ratnagiri,2905.4,5075.81,2170.4100000000003,74.7,11.366,19.857,8.491,74.71,0.003912025882838852
237
+ Gujarat,Junagadh,1065.34,2005.68,940.3400000000001,88.27,9.602,18.077,8.475,88.26,0.009013085024499223
238
+ Telangana,Narayanpet,7173.69,9885.99,2712.3,37.81,22.237,30.645,8.408,37.81,0.0030997994058845585
239
+ Jharkhand,Saraikela Kharsawan,9831.93,12535.75,2703.8199999999997,27.5,29.592,37.73,8.138,27.5,0.003009785464298464
240
+ Nagaland,Kiphire,816.18,1543.97,727.7900000000001,89.17,9.032,17.086,8.054,89.17,0.011066186380455293
241
+ Punjab,Faridkot,3407.44,5669.42,2261.98,66.38,12.032,20.019,7.987,66.38,0.003531096659075435
242
+ Punjab,Gurdaspur,3426.32,5682.24,2255.9199999999996,65.84,12.034,19.957,7.923,65.84,0.0035122230264540383
243
+ Punjab,Ferozepur,5265.56,7713.51,2447.95,46.49,16.909,24.77,7.861,46.49,0.003211244388061288
244
+ Madhya Pradesh,Shajapur,4845.8,7238.83,2393.0299999999997,49.38,15.653,23.383,7.73,49.38,0.003230219984316315
245
+ Tamil Nadu,Theni,8174.12,10702.92,2528.8,30.94,24.85,32.538,7.688,30.94,0.003040082602163903
246
+ Madhya Pradesh,Indore,2547.95,4478.99,1931.04,75.79,10.069,17.7,7.631,75.79,0.00395180439176593
247
+ Punjab,Fatehgarh Sahib,6003.1,8424.81,2421.709999999999,40.34,18.839,26.439,7.6,40.34,0.0031382119238393493
248
+ Assam,Hojai,6011.53,8414.89,2403.3599999999997,39.98,18.844,26.378,7.534,39.98,0.0031346429278403336
249
+ Gujarat,Navsari,4433.67,6727.41,2293.74,51.73,14.493,21.991,7.498,51.74,0.0032688495084207893
250
+ Haryana,Karnal,2122.95,3816.35,1693.4,79.77,9.291,16.702,7.411,79.77,0.004376457288207448
251
+ Uttar Pradesh,Sant Ravidas Nagar,6400.93,8786.33,2385.3999999999996,37.27,19.802,27.182,7.38,37.27,0.003093612959366842
252
+ Madhya Pradesh,Neemuch,4648.57,6860.13,2211.5600000000004,47.58,14.956,22.071,7.115,47.57,0.0032173335025610025
253
+ Assam,Tamulpur,3439.66,5511.53,2071.87,60.23,11.649,18.666,7.017,60.24,0.0033866719385055497
254
+ Manipur,Kamjong,2350.65,4119.4,1768.7499999999995,75.25,9.258,16.224,6.966,75.24,0.003938485099865994
255
+ Punjab,Mansa,4454.01,6613.28,2159.2699999999995,48.48,14.34,21.292,6.952,48.48,0.003219570679006109
256
+ Gujarat,Chhotaudepur,6059.22,8306.32,2247.0999999999995,37.09,18.743,25.694,6.951,37.09,0.0030933024382676314
257
+ Karnataka,Uttara Kannada,5944.05,8169.94,2225.8899999999994,37.45,18.412,25.307,6.895,37.45,0.00309755133284545
258
+ Maharashtra,Pune,3191.43,5200.35,2008.9200000000005,62.95,10.937,17.822,6.885,62.95,0.003426990408688268
259
+ Himachal Pradesh,Una,4882.35,7037.36,2155.0099999999993,44.14,15.532,22.388,6.856,44.14,0.003181254928466824
260
+ Gujarat,Patan,2741.33,4625.57,1884.2399999999998,68.73,9.94,16.772,6.832,68.73,0.003625977171664849
261
+ Gujarat,Surat,5414.61,7549.95,2135.34,39.44,16.93,23.607,6.677,39.44,0.003126725655218012
262
+ Arunachal Pradesh,Papum Pare,5919.32,8039.6,2120.2800000000007,35.82,18.238,24.771,6.533,35.82,0.003081097153051364
263
+ Gujarat,Surendranagar,2257.0,3934.88,1677.88,74.34,8.783,15.312,6.529,74.34,0.003891448825875055
264
+ Maharashtra,Sindhudurg,1861.05,3348.91,1487.86,79.95,8.155,14.675,6.52,79.95,0.0043819349292066306
265
+ Arunachal Pradesh,Kurung Kumey,4870.3,6923.1,2052.8,42.15,15.395,21.884,6.489,42.15,0.003160996242531261
266
+ Uttar Pradesh,Hathras,4062.16,6053.5,1991.3400000000001,49.02,13.103,19.526,6.423,49.02,0.003225623806053922
267
+ Maharashtra,Raigad,1328.72,2455.88,1127.16,84.83,7.548,13.951,6.403,84.83,0.005680655066530194
268
+ Jammu and Kashmir,Ramban,2849.95,4685.15,1835.1999999999998,64.39,9.829,16.158,6.329,64.39,0.0034488324356567665
269
+ Uttar Pradesh,Meerut,2873.55,4708.36,1834.8099999999995,63.85,9.883,16.193,6.31,63.85,0.003439299820779175
270
+ Punjab,Barnala,2131.6,3727.82,1596.2200000000003,74.88,8.384,14.662,6.278,74.88,0.003933195721523739
271
+ Maharashtra,Satara,4626.42,6593.16,1966.7399999999998,42.51,14.635,20.856,6.221,42.51,0.0031633530894298397
272
+ Jammu and Kashmir,Badgam,6410.72,8440.36,2029.6400000000003,31.66,19.518,25.697,6.179,31.66,0.0030445878154091895
273
+ Haryana,Kaithal,1498.38,2736.94,1238.56,82.66,7.316,13.363,6.047,82.65,0.004882606548405611
274
+ Assam,Golaghat,7105.04,9084.66,1979.62,27.86,21.405,27.369,5.964,27.86,0.003012650175086981
275
+ Uttar Pradesh,Saharanpur,6696.44,8634.88,1938.4399999999996,28.95,20.223,26.077,5.854,28.95,0.003019962845930076
276
+ Assam,Morigaon,8064.82,10020.15,1955.33,24.25,24.108,29.953,5.845,24.25,0.0029892793639535666
277
+ Madhya Pradesh,Umaria,8236.58,10159.07,1922.4899999999998,23.34,24.59,30.33,5.74,23.34,0.0029854624127975448
278
+ Mizoram,Khawzawl,2931.71,4596.88,1665.17,56.8,9.773,15.324,5.551,56.8,0.0033335493619764574
279
+ Himachal Pradesh,Kinnaur,2235.16,3759.36,1524.2000000000003,68.19,8.04,13.523,5.483,68.2,0.003597057928738882
280
+ Maharashtra,Thane,2251.89,3771.21,1519.3200000000002,67.47,8.055,13.49,5.435,67.47,0.0035769953239279005
281
+ Jammu and Kashmir,Udhampur,2678.67,4277.62,1598.9499999999998,59.69,9.068,14.481,5.413,59.69,0.0033852620890217904
282
+ Gujarat,Kheda,2455.68,4028.11,1572.4300000000003,64.03,8.447,13.856,5.409,64.03,0.003439780427417253
283
+ Nagaland,Longleng,2238.52,3736.67,1498.15,66.93,7.979,13.319,5.34,66.93,0.0035644086271286388
284
+ Uttarakhand,Nainital,2349.6,3879.6,1530.0,65.12,8.144,13.447,5.303,65.12,0.0034661218930881854
285
+ Assam,Chirang,5967.72,7716.8,1749.08,29.31,18.028,23.312,5.284,29.31,0.0030209192120273735
286
+ Assam,Cachar,7488.69,9250.16,1761.4700000000003,23.52,22.366,27.627,5.261,23.52,0.002986637182204097
287
+ Chhattisgarh,Narayanpur,1998.18,3407.76,1409.5800000000002,70.54,7.422,12.658,5.236,70.55,0.003714380085878149
288
+ Uttarakhand,Rudra Prayag,3212.41,4827.86,1615.4499999999998,50.29,10.392,15.618,5.226,50.29,0.003234954442303442
289
+ Gujarat,Anand,1480.39,2658.57,1178.18,79.59,6.403,11.499,5.096,79.59,0.004325211599646038
290
+ Ladakh,Leh (Ladakh),2869.19,4410.63,1541.44,53.72,9.474,14.564,5.09,53.73,0.003301977213081044
291
+ Dadra and Nagar Haveli,Dadra And Nagar Haveli,1016.46,1882.4,865.94,85.19,5.874,10.878,5.004,85.19,0.005778879641107372
292
+ Himachal Pradesh,Bilaspur,5114.77,6752.61,1637.8399999999992,32.02,15.589,20.581,4.992,32.02,0.0030478398833183114
293
+ Manipur,Tamenglong,2916.99,4436.63,1519.6400000000003,52.1,9.566,14.55,4.984,52.1,0.0032794078827832803
294
+ Mizoram,Saitual,3981.55,5566.14,1584.5900000000001,39.8,12.464,17.424,4.96,39.79,0.00313043915058206
295
+ Uttarakhand,Almora,3196.79,4735.01,1538.2200000000003,48.12,10.287,15.237,4.95,48.12,0.003217915471457306
296
+ Jharkhand,Khunti,5276.33,6899.11,1622.7799999999997,30.76,16.019,20.946,4.927,30.76,0.0030360117733348744
297
+ Karnataka,Udupi,2698.69,4182.7,1484.0099999999998,54.99,8.953,13.876,4.923,54.99,0.0033175355450236962
298
+ Uttarakhand,Chamoli,6109.62,7734.55,1624.9300000000003,26.6,18.354,23.235,4.881,26.59,0.0030041148221984343
299
+ Punjab,Pathankot,2516.6,3936.89,1420.29,56.44,8.384,13.116,4.732,56.44,0.0033314789795756182
300
+ Madhya Pradesh,Agar-Malwa,6097.96,7675.67,1577.71,25.87,18.282,23.012,4.73,25.87,0.0029980518074897177
301
+ Arunachal Pradesh,Lohit,1281.83,2322.85,1041.02,81.21,5.802,10.514,4.712,81.21,0.0045263412464991454
302
+ Gujarat,Mahesana,2617.51,4037.94,1420.4299999999998,54.27,8.676,13.384,4.708,54.26,0.0033146005172855117
303
+ Arunachal Pradesh,Tawang,1294.84,2334.71,1039.8700000000001,80.31,5.761,10.388,4.627,80.32,0.0044491983565537055
304
+ Odisha,Sonepur,6089.58,7621.07,1531.4899999999998,25.15,18.226,22.81,4.584,25.15,0.002992981453564942
305
+ Madhya Pradesh,Narsinghpur,5832.08,7330.45,1498.37,25.69,17.483,21.975,4.492,25.69,0.0029977297979451585
306
+ Assam,Charaideo,2716.13,4062.36,1346.23,49.56,8.778,13.129,4.351,49.57,0.003231804074179071
307
+ Uttar Pradesh,Muzaffarnagar,3513.82,4893.2,1379.3799999999997,39.26,10.978,15.288,4.31,39.26,0.003124235162871177
308
+ Arunachal Pradesh,Longding,1406.09,2479.36,1073.2700000000002,76.33,5.615,9.901,4.286,76.33,0.003993343242608937
309
+ Karnataka,Kodagu,1619.25,2764.45,1145.1999999999998,70.72,6.02,10.278,4.258,70.73,0.003717770572796047
310
+ Jammu and Kashmir,Jammu,4006.04,5390.27,1384.2300000000005,34.55,12.299,16.549,4.25,34.56,0.0030701141276672224
311
+ Gujarat,Bharuch,1404.45,2463.77,1059.32,75.43,5.536,9.712,4.176,75.43,0.00394175655950728
312
+ Arunachal Pradesh,Anjaw,1744.47,2915.13,1170.66,67.11,6.222,10.397,4.175,67.1,0.0035666993413472287
313
+ Karnataka,Bengaluru Rural,5547.12,6922.12,1375.0,24.79,16.601,20.716,4.115,24.79,0.0029927241523529328
314
+ Gujarat,Gir Somnath,934.42,1716.95,782.5300000000001,83.74,4.905,9.013,4.108,83.75,0.0052492455212859316
315
+ Punjab,Bhatinda,5752.63,7116.15,1363.5199999999995,23.7,17.188,21.262,4.074,23.7,0.002987850774341475
316
+ Uttar Pradesh,Bulandshahr,2760.56,4023.97,1263.4099999999999,45.77,8.853,12.905,4.052,45.77,0.0032069580085200104
317
+ Manipur,Jiribam,1339.39,2356.91,1017.5199999999998,75.97,5.295,9.318,4.023,75.98,0.003953292170316338
318
+ Gujarat,Botad,1175.7,2107.14,931.4399999999998,79.22,5.037,9.028,3.991,79.23,0.004284256187803011
319
+ Mizoram,Hnahthial,2314.37,3528.44,1214.0700000000002,52.46,7.598,11.584,3.986,52.46,0.0032829668549108396
320
+ Haryana,Jind,2757.91,3985.19,1227.2800000000002,44.5,8.785,12.694,3.909,44.5,0.003185383134329982
321
+ Gujarat,Bhavnagar,2359.39,3558.68,1199.29,50.83,7.676,11.578,3.902,50.83,0.0032533832897486217
322
+ Gujarat,Ahmadabad,1441.0,2473.16,1032.1599999999999,71.63,5.426,9.313,3.887,71.64,0.003765440666204025
323
+ Arunachal Pradesh,Siang,1002.84,1824.54,821.6999999999999,81.94,4.728,8.602,3.874,81.94,0.004714610506162498
324
+ Sikkim,Gyalshing District,2195.12,3362.52,1167.4,53.18,7.242,11.093,3.851,53.18,0.003299136265898903
325
+ Gujarat,Jamnagar,1201.34,2133.54,932.2,77.6,4.905,8.711,3.806,77.59,0.004082940716200243
326
+ Uttar Pradesh,Hapur,1001.94,1819.27,817.3299999999999,81.57,4.597,8.347,3.75,81.57,0.004588099087769727
327
+ Gujarat,Kachchh,2315.28,3454.46,1139.1799999999998,49.2,7.469,11.144,3.675,49.2,0.0032259597111364497
328
+ Arunachal Pradesh,Changlang,3594.93,4778.6,1183.6700000000005,32.93,10.998,14.619,3.621,32.92,0.003059308526174362
329
+ Assam,Kamrup (Metro),869.13,1589.13,720.0000000000001,82.84,4.252,7.774,3.522,82.83,0.00489224857040949
330
+ Uttar Pradesh,Shamli,1635.85,2644.87,1009.02,61.68,5.576,9.015,3.439,61.68,0.00340862548522175
331
+ Madhya Pradesh,Bhopal,4848.55,5971.47,1122.92,23.16,14.465,17.815,3.35,23.16,0.002983366161017211
332
+ Punjab,Sas Nagar Mohali,1735.13,2733.21,998.0799999999999,57.52,5.793,9.125,3.332,57.52,0.003338654740566989
333
+ Himachal Pradesh,Lahul And Spiti,293.45,556.71,263.26000000000005,89.71,3.618,6.864,3.246,89.72,0.012329187255069006
334
+ Arunachal Pradesh,West Siang,304.72,576.99,272.27,89.35,3.621,6.856,3.235,89.34,0.011883040168023102
335
+ Gujarat,Rajkot,1126.2,1953.25,827.05,73.44,4.338,7.524,3.186,73.44,0.003851891315929675
336
+ Haryana,Sirsa,3028.77,4064.37,1035.6,34.19,9.292,12.469,3.177,34.19,0.0030679120567094894
337
+ Arunachal Pradesh,Leparada,509.71,952.24,442.53000000000003,86.82,3.621,6.765,3.144,86.83,0.007104039551902063
338
+ Sikkim,Gangtok District,1249.02,2112.05,863.0300000000002,69.1,4.534,7.667,3.133,69.1,0.003630045956029527
339
+ Arunachal Pradesh,Namsai,3982.09,5026.77,1044.6800000000003,26.23,11.942,15.075,3.133,26.24,0.002998927698771248
340
+ Arunachal Pradesh,Shi Yomi,173.29,330.95,157.66,90.98,3.432,6.554,3.122,90.97,0.019804951237809454
341
+ Arunachal Pradesh,Upper Siang,1091.95,1891.87,799.9199999999998,73.26,4.2,7.277,3.077,73.26,0.003846329960163011
342
+ Arunachal Pradesh,Lower Subansiri,648.37,1196.04,547.67,84.47,3.637,6.709,3.072,84.47,0.0056094513934944554
343
+ Punjab,Malerkotla,1160.05,1982.58,822.53,70.9,4.317,7.378,3.061,70.91,0.0037213913193396837
344
+ Arunachal Pradesh,Upper Dibang Valley,567.05,1054.23,487.18000000000006,85.91,3.539,6.58,3.041,85.93,0.006241072215853982
345
+ Punjab,Ropar,3775.12,4758.68,983.5600000000004,26.05,11.319,14.268,2.949,26.05,0.0029983152853419234
346
+ Gujarat,Gandhinagar,1119.64,1903.39,783.75,70.0,4.116,6.997,2.881,70.0,0.003676181629809581
347
+ Madhya Pradesh,Narmadapuram,3678.48,4616.9,938.4199999999996,25.51,11.024,13.836,2.812,25.51,0.002996890019790783
348
+ Madhya Pradesh,Harda,2615.76,3529.06,913.2999999999997,34.92,8.042,10.85,2.808,34.92,0.0030744410802214265
349
+ Haryana,Sonipat,1313.62,2131.01,817.3900000000003,62.22,4.489,7.282,2.793,62.22,0.003417274402034074
350
+ Madhya Pradesh,Datia,4044.75,4974.2,929.4499999999998,22.98,12.066,14.839,2.773,22.98,0.002983126274800668
351
+ Bihar,Begusarai,14196.08,15109.62,913.5400000000009,6.44,42.323,45.047,2.724,6.44,0.0029813159689153624
352
+ Haryana,Jhajjar,1151.25,1913.41,762.1600000000001,66.2,4.049,6.73,2.681,66.21,0.0035170466883821935
353
+ Haryana,Panipat,1346.3,2132.89,786.5899999999999,58.43,4.53,7.177,2.647,58.43,0.003364777538438684
354
+ Jammu and Kashmir,Srinagar,323.32,609.87,286.55,88.63,2.953,5.57,2.617,88.62,0.009133366324384511
355
+ Nagaland,Wokha,3828.59,4701.44,872.8499999999995,22.8,11.42,14.024,2.604,22.8,0.002982821351985979
356
+ Uttar Pradesh,Baghpat,126.15,241.15,115.0,91.16,2.822,5.395,2.573,91.18,0.022370194213238208
357
+ Jammu and Kashmir,Samba,820.55,1452.82,632.27,77.05,3.321,5.88,2.559,77.06,0.004047285357382244
358
+ Haryana,Yamunanagar,641.4,1161.14,519.7400000000001,81.03,2.902,5.254,2.352,81.05,0.004524477705020269
359
+ Karnataka,Bengaluru,219.37,415.78,196.40999999999997,89.53,2.619,4.964,2.345,89.54,0.011938733646350915
360
+ Haryana,Mahendragarh,721.88,1280.73,558.85,77.42,2.943,5.221,2.278,77.4,0.004076854878927246
361
+ Gujarat,Devbhumi Dwarka,680.46,1214.63,534.1700000000001,78.5,2.869,5.121,2.252,78.49,0.004216265467477883
362
+ Manipur,Pherzawl,1108.37,1759.96,651.5900000000001,58.79,3.731,5.924,2.193,58.78,0.003366204426319731
363
+ Haryana,Ambala,313.94,588.77,274.83,87.54,2.411,4.522,2.111,87.56,0.007679811428935465
364
+ Gujarat,Amreli,1589.45,2256.52,667.0699999999999,41.97,5.017,7.123,2.106,41.98,0.0031564377614898237
365
+ Haryana,Bhiwani,2422.68,3119.6,696.9200000000001,28.77,7.316,9.421,2.105,28.77,0.003019796258688725
366
+ Haryana,Palwal,172.23,327.37,155.14000000000001,90.08,2.311,4.393,2.082,90.09,0.013418103698542646
367
+ Haryana,Faridabad,85.33,163.27,77.94000000000001,91.34,2.273,4.349,2.076,91.33,0.026637759287472168
368
+ Punjab,Kapurthala,2853.6,3540.3,686.7000000000003,24.06,8.53,10.583,2.053,24.07,0.002989206616204093
369
+ Haryana,Charki Dadri,390.84,725.93,335.09,85.74,2.381,4.422,2.041,85.72,0.006092006959369563
370
+ Haryana,Panchkula,414.45,766.78,352.33,85.01,2.381,4.405,2.024,85.01,0.0057449632042465915
371
+ Haryana,Gurugram,212.8,402.17,189.37,88.99,2.273,4.296,2.023,89.0,0.01068139097744361
372
+ Haryana,Rewari,321.95,602.05,280.09999999999997,87.0,2.306,4.312,2.006,86.99,0.007162602888647306
373
+ Haryana,Kurukshetra,1416.86,2024.31,607.45,42.87,4.489,6.414,1.925,42.88,0.0031682735062038593
374
+ Telangana,Medchal,230.3,433.99,203.69,88.45,2.089,3.937,1.848,88.46,0.00907077724706904
375
+ Gujarat,Vadodara,1362.87,1927.45,564.5800000000002,41.43,4.297,6.077,1.78,41.42,0.0031529052660928775
376
+ Gujarat,Morbi,585.58,1018.79,433.2099999999999,73.98,2.26,3.932,1.672,73.98,0.0038594214283274694
377
+ Goa,South Goa,124.16,236.22,112.06,90.25,1.789,3.404,1.615,90.27,0.014408827319587628
378
+ Gujarat,Porbandar,875.9,1354.39,478.4900000000001,54.63,2.905,4.492,1.587,54.63,0.0033165886516725654
379
+ Goa,North Goa,133.18,252.9,119.72,89.89,1.754,3.331,1.577,89.91,0.013170145667517645
380
+ Arunachal Pradesh,East Siang,1080.35,1553.3,472.95000000000005,43.78,3.432,4.934,1.502,43.76,0.00317674827602166
381
+ Haryana,Rohtak,1728.22,2140.98,412.76,23.88,5.165,6.399,1.234,23.89,0.002988624133501522
382
+ Andaman and Nicobar Islands,North And Middle Andaman,199.7,372.72,173.02000000000004,86.64,1.398,2.609,1.211,86.62,0.00700050075112669
383
+ Andaman and Nicobar Islands,South Andaman,71.59,136.46,64.87,90.61,1.331,2.537,1.206,90.61,0.018591982120407878
384
+ Andaman and Nicobar Islands,Nicobars,27.23,52.15,24.919999999999998,91.52,1.289,2.469,1.18,91.54,0.04733749540947484
385
+ Sikkim,Mangan District,716.09,641.34,-74.75,-10.44,2.127,1.905,-0.222,-10.44,0.00297029702970297
386
+ Arunachal Pradesh,Lower Siang,1167.08,1043.69,-123.38999999999987,-10.57,3.465,3.099,-0.366,-10.56,0.0029689481440860954
387
+ Manipur,Noney,1876.03,1632.54,-243.49,-12.98,5.521,4.804,-0.717,-12.99,0.00294291669109769
388
+ Arunachal Pradesh,Lower Dibang Valley,2574.59,2233.55,-341.03999999999996,-13.25,7.574,6.571,-1.003,-13.24,0.002941827630807235
389
+ Uttarakhand,Udam Singh Nagar,3159.23,2795.65,-363.5799999999999,-11.51,9.341,8.266,-1.075,-11.51,0.00295673312800904
390
+ Manipur,Tengnoupal,2469.75,2033.66,-436.0899999999999,-17.66,7.116,5.86,-1.256,-17.65,0.0028812632857576678
391
+ Assam,Majuli,2831.61,2392.18,-439.4300000000003,-15.52,8.245,6.965,-1.28,-15.52,0.0029117710419160826
392
+ Puducherry,Karaikal,1455.97,961.44,-494.53,-33.97,3.879,2.561,-1.318,-33.98,0.0026642032459460014
393
+ Sikkim,Namchi District,3315.86,2850.04,-465.82000000000016,-14.05,9.717,8.352,-1.365,-14.05,0.0029304614790733024
394
+ Gujarat,Dang,3925.72,3463.43,-462.28999999999996,-11.78,11.591,10.226,-1.365,-11.78,0.0029525793994477447
395
+ Sikkim,Soreng,2153.73,1563.27,-590.46,-27.42,5.971,4.334,-1.637,-27.42,0.0027723995115450867
396
+ Uttarakhand,Bageshwar,2463.99,1870.82,-593.1699999999998,-24.07,6.946,5.274,-1.672,-24.07,0.002819004947260338
397
+ Sikkim,Pakyong,1894.76,1210.67,-684.0899999999999,-36.1,4.971,3.176,-1.795,-36.11,0.002623551267706728
398
+ Assam,West Karbi Anglong,1659.93,860.92,-799.0100000000001,-48.14,3.839,1.991,-1.848,-48.14,0.0023127481279331054
399
+ Punjab,Sangrur,5891.34,5260.59,-630.75,-10.71,17.477,15.606,-1.871,-10.71,0.0029665576931563954
400
+ Manipur,Chandel,2039.1,1321.98,-717.1199999999999,-35.17,5.386,3.492,-1.894,-35.17,0.002641361384924722
401
+ Assam,Dima Hasao,1746.74,917.62,-829.12,-47.47,4.099,2.153,-1.946,-47.47,0.0023466572014152077
402
+ Uttarakhand,Pithoragarh,4955.61,4279.31,-676.2999999999993,-13.65,14.564,12.576,-1.988,-13.65,0.0029388914785465365
403
+ Arunachal Pradesh,Pakke Kessang,1949.78,966.95,-982.8299999999999,-50.41,3.99,1.979,-2.011,-50.4,0.002046384720327422
404
+ Jammu and Kashmir,Bandipora,3305.25,2571.41,-733.8400000000001,-22.2,9.377,7.295,-2.082,-22.2,0.0028370017396566072
405
+ Arunachal Pradesh,West Kameng,2025.85,1126.53,-899.3199999999999,-44.39,4.963,2.76,-2.203,-44.39,0.002449835871362638
406
+ Mizoram,Champhai,6114.68,5353.76,-760.9200000000001,-12.44,18.024,15.781,-2.243,-12.44,0.00294766038451726
407
+ Gujarat,Valsad,3907.92,3066.39,-841.5300000000002,-21.53,11.113,8.72,-2.393,-21.53,0.0028437122561362563
408
+ Maharashtra,Kolhapur,3185.13,2277.85,-907.2800000000002,-28.48,8.773,6.274,-2.499,-28.49,0.002754361674405754
409
+ Tamil Nadu,Coimbatore,8418.98,7573.88,-845.0999999999995,-10.04,25.057,22.542,-2.515,-10.04,0.002976251279846252
410
+ Odisha,Boudh,6544.09,5685.99,-858.1000000000004,-13.11,19.256,16.731,-2.525,-13.11,0.002942502318886201
411
+ Madhya Pradesh,Sehore,5612.05,4711.13,-900.9200000000001,-16.05,16.261,13.651,-2.61,-16.05,0.0028975151682540247
412
+ Jammu and Kashmir,Pulwama,5663.08,4761.54,-901.54,-15.92,16.422,13.808,-2.614,-15.92,0.0028998354252456264
413
+ Chhattisgarh,Bijapur,4206.81,3261.55,-945.2600000000002,-22.47,11.927,9.247,-2.68,-22.47,0.0028351648874087487
414
+ Haryana,Mewat,2465.14,1265.37,-1199.77,-48.67,5.556,2.852,-2.704,-48.67,0.002253827368830979
415
+ Mizoram,Aizawl,6893.53,5906.68,-986.8499999999995,-14.32,20.163,17.277,-2.886,-14.31,0.002924916552187341
416
+ Uttarakhand,Champawat,3206.03,2121.37,-1084.6600000000003,-33.83,8.545,5.654,-2.891,-33.83,0.0026652900939791576
417
+ Arunachal Pradesh,Kamle,3008.77,1459.95,-1548.82,-51.48,5.802,2.815,-2.987,-51.48,0.0019283627528857306
418
+ Uttar Pradesh,Shravasti,10766.0,9714.08,-1051.92,-9.77,32.073,28.939,-3.134,-9.77,0.0029791008731190785
419
+ Punjab,Jalandhar,3084.25,1764.56,-1319.69,-42.79,7.645,4.374,-3.271,-42.79,0.0024787225419469886
420
+ Meghalaya,East Jaintia Hills,4430.38,3180.23,-1250.15,-28.22,12.214,8.767,-3.447,-28.22,0.0027568741281786212
421
+ Tamil Nadu,The Nilgiris,3672.72,2351.62,-1321.1,-35.97,9.652,6.18,-3.472,-35.97,0.0026280250059901107
422
+ Jammu and Kashmir,Ganderbal,3554.17,2213.95,-1340.2200000000003,-37.71,9.267,5.773,-3.494,-37.7,0.002607359805524215
423
+ Madhya Pradesh,Mandsaur,10840.0,9635.95,-1204.0499999999993,-11.11,32.12,28.552,-3.568,-11.11,0.0029630996309963097
424
+ Mizoram,Serchhip,5365.91,4081.31,-1284.6,-23.94,15.145,11.519,-3.626,-23.94,0.002822447637027084
425
+ Tamil Nadu,Kanniyakumari,6268.92,4960.87,-1308.0500000000002,-20.87,17.853,14.128,-3.725,-20.86,0.002847858961352195
426
+ Ladakh,Kargil,4686.31,3313.83,-1372.4800000000005,-29.29,12.869,9.1,-3.769,-29.29,0.0027460838058088344
427
+ Karnataka,Shivamogga,8906.5,7607.67,-1298.83,-14.58,26.02,22.226,-3.794,-14.58,0.002921461853702352
428
+ Telangana,Yadadri Bhuvanagiri,10163.11,8871.22,-1291.8900000000012,-12.71,29.954,26.146,-3.808,-12.71,0.0029473261629560242
429
+ Tripura,Unakoti,9581.79,8248.52,-1333.2700000000004,-13.91,28.11,24.199,-3.911,-13.91,0.00293368984292079
430
+ Punjab,Fazilka,9289.54,7947.27,-1342.2700000000004,-14.45,27.145,23.223,-3.922,-14.45,0.002922103785548046
431
+ Meghalaya,Ri Bhoi,5965.18,4521.16,-1444.0200000000004,-24.21,16.787,12.723,-4.064,-24.21,0.002814164870129652
432
+ Uttar Pradesh,Sambhal,9062.69,7632.05,-1430.6400000000003,-15.79,26.342,22.184,-4.158,-15.78,0.0029066425090122244
433
+ Telangana,Jayashanker Bhopalapally,6966.9,5503.9,-1463.0,-21.0,19.837,15.671,-4.166,-21.0,0.0028473209031276464
434
+ Punjab,Nawanshahr,4721.0,3104.87,-1616.13,-34.23,12.55,8.254,-4.296,-34.23,0.0026583350984960815
435
+ Odisha,Gajapati,10972.96,9504.79,-1468.1699999999983,-13.38,32.268,27.951,-4.317,-13.38,0.0029406832796255527
436
+ Punjab,Patiala,8603.83,7096.12,-1507.71,-17.52,24.823,20.473,-4.35,-17.52,0.002885110468244956
437
+ Arunachal Pradesh,Tirap,4475.82,2165.83,-2309.99,-51.61,8.448,4.088,-4.36,-51.61,0.0018874753676421305
438
+ Bihar,Banka,14499.86,13024.97,-1474.8900000000012,-10.17,43.151,38.762,-4.389,-10.17,0.002975959767887414
439
+ Madhya Pradesh,Raisen,6336.35,4743.19,-1593.1600000000008,-25.14,17.773,13.304,-4.469,-25.14,0.002804927126815911
440
+ Jharkhand,Koderma,7022.2,5406.79,-1615.4099999999999,-23.0,19.879,15.306,-4.573,-23.0,0.002830879211643075
441
+ Madhya Pradesh,Jabalpur,8190.32,6579.9,-1610.42,-19.66,23.443,18.834,-4.609,-19.66,0.00286228132722531
442
+ Assam,Kokrajhar,13384.29,11790.29,-1594.0,-11.91,39.489,34.786,-4.703,-11.91,0.002950399311431536
443
+ Karnataka,Chikkamagaluru,10039.32,8306.89,-1732.4300000000003,-17.26,28.99,23.987,-5.003,-17.26,0.0028876457768056
444
+ Jammu and Kashmir,Anantnag,13839.33,12135.64,-1703.6900000000005,-12.31,40.803,35.78,-5.023,-12.31,0.002948336371775223
445
+ Meghalaya,South West Khasi Hills,5519.03,3614.95,-1904.08,-34.5,14.626,9.58,-5.046,-34.5,0.0026501033696138634
446
+ Uttarakhand,Haridwar,5148.42,3213.92,-1934.5,-37.57,13.443,8.392,-5.051,-37.57,0.002611092335124174
447
+ Odisha,Sambalpur,7997.72,6179.28,-1818.4400000000005,-22.74,22.647,17.498,-5.149,-22.74,0.0028316820293783724
448
+ Mizoram,Kolasib,6923.91,5053.45,-1870.46,-27.01,19.277,14.069,-5.208,-27.02,0.0027841205330514118
449
+ Gujarat,Sabar Kantha,6503.95,4581.75,-1922.1999999999998,-29.55,17.818,12.552,-5.266,-29.55,0.0027395659560728483
450
+ Uttar Pradesh,Agra,15464.1,13663.75,-1800.3500000000004,-11.64,45.714,40.392,-5.322,-11.64,0.0029561371175820124
451
+ Punjab,Amritsar,4924.64,2751.66,-2172.9800000000005,-44.12,12.08,6.75,-5.33,-44.12,0.0024529711816498262
452
+ Haryana,Fatehabad,7344.91,5429.44,-1915.4700000000003,-26.08,20.531,15.177,-5.354,-26.08,0.0027952691047269467
453
+ Meghalaya,East Garo Hills,9457.67,7572.77,-1884.8999999999996,-19.93,27.066,21.672,-5.394,-19.93,0.002861804228737099
454
+ Assam,Dibrugarh,6840.57,4873.75,-1966.8199999999997,-28.75,18.815,13.405,-5.41,-28.75,0.002750501785669908
455
+ Manipur,Senapati,5451.86,3330.46,-2121.3999999999996,-38.91,14.018,8.563,-5.455,-38.91,0.002571232570168713
456
+ Manipur,Ukhrul,5886.34,3729.64,-2156.7000000000003,-36.64,15.418,9.769,-5.649,-36.64,0.002619284648864999
457
+ Mizoram,Siaha,6120.58,3959.88,-2160.7,-35.3,16.165,10.458,-5.707,-35.3,0.0026410895699427177
458
+ Maharashtra,Dharashiv,10958.88,8877.33,-2081.5499999999993,-18.99,31.468,25.491,-5.977,-18.99,0.002871461317214898
459
+ Madhya Pradesh,Katni,14855.97,12808.68,-2047.289999999999,-13.78,43.63,37.617,-6.013,-13.78,0.002936866458400226
460
+ Odisha,Deogarh,5432.19,2831.93,-2600.2599999999998,-47.87,12.612,6.575,-6.037,-47.87,0.002321715551186538
461
+ Karnataka,Mysuru,8591.76,6420.04,-2171.7200000000003,-25.28,24.093,18.003,-6.09,-25.28,0.002804198441297243
462
+ Madhya Pradesh,Panna,12021.34,9882.61,-2138.7299999999996,-17.79,34.635,28.473,-6.162,-17.79,0.002881126396890862
463
+ Manipur,Thoubal,7686.2,3647.4,-4038.7999999999997,-52.55,11.733,5.568,-6.165,-52.54,0.001526502042621842
464
+ Assam,Kamrup,10554.51,8309.92,-2244.59,-21.27,30.036,23.648,-6.388,-21.27,0.0028457976732221583
465
+ Manipur,Kakching,5831.62,3001.19,-2830.43,-48.54,13.237,6.812,-6.425,-48.54,0.0022698666922741882
466
+ Punjab,Mukatsar,6878.49,4431.84,-2446.6499999999996,-35.57,18.146,11.692,-6.454,-35.57,0.002638078996989165
467
+ Assam,Sonitpur,9771.88,7458.61,-2313.2699999999995,-23.67,27.606,21.071,-6.535,-23.67,0.0028250449248251107
468
+ Jharkhand,East Singhbum,9150.81,6813.31,-2337.499999999999,-25.54,25.614,19.071,-6.543,-25.54,0.0027990964734269427
469
+ Maharashtra,Buldhana,9081.12,6737.15,-2343.970000000001,-25.81,25.4,18.844,-6.556,-25.81,0.002797011822330285
470
+ Telangana,Mulugu,6234.2,3583.38,-2650.8199999999997,-42.52,15.486,8.901,-6.585,-42.52,0.002484039652240865
471
+ Bihar,Supaul,16613.3,14368.25,-2245.0499999999993,-13.51,48.832,42.233,-6.599,-13.51,0.002939331740232224
472
+ Mizoram,Mamit,7510.96,5020.06,-2490.8999999999996,-33.16,20.061,13.408,-6.653,-33.16,0.002670896929287335
473
+ Manipur,Bishnupur,7562.18,3608.77,-3953.4100000000003,-52.28,13.0,6.204,-6.796,-52.28,0.0017190810057417306
474
+ Telangana,Jogulamba Gadwal,8221.69,5703.9,-2517.790000000001,-30.62,22.354,15.508,-6.846,-30.63,0.002718905723762389
475
+ Uttarakhand,Dehradun,6697.08,3974.77,-2722.31,-40.65,16.963,10.068,-6.895,-40.65,0.0025328949333142206
476
+ Telangana,Karimnagar,10068.7,7590.95,-2477.750000000001,-24.61,28.284,21.324,-6.96,-24.61,0.002809101472881305
477
+ Jammu and Kashmir,Kulgam,9059.91,6515.53,-2544.38,-28.08,24.982,17.966,-7.016,-28.08,0.0027574225351024457
478
+ Assam,Tinsukia,6803.94,3938.15,-2865.7899999999995,-42.12,16.945,9.808,-7.137,-42.12,0.002490468757807976
479
+ Punjab,Tarn Taran,6541.89,3349.24,-3192.6500000000005,-48.8,14.739,7.546,-7.193,-48.8,0.002253018623058474
480
+ Maharashtra,Wardha,7141.7,4286.39,-2855.3099999999995,-39.98,18.223,10.937,-7.286,-39.98,0.002551633364605066
481
+ Tripura,West Tripura,12517.25,9938.92,-2578.33,-20.6,35.676,28.327,-7.349,-20.6,0.002850146797419561
482
+ Assam,Baksa,8019.76,5177.88,-2841.88,-35.44,21.163,13.664,-7.499,-35.43,0.0026388570231528127
483
+ Telangana,Nalgonda,24806.56,22250.14,-2556.420000000002,-10.31,73.812,66.205,-7.607,-10.31,0.0029755032539779797
484
+ Jharkhand,Bokaro,12385.93,9702.19,-2683.74,-21.67,35.221,27.589,-7.632,-21.67,0.0028436298283616972
485
+ Assam,Karbi Anglong,8701.63,4140.9,-4560.73,-52.41,14.745,7.017,-7.728,-52.41,0.0016945101090255505
486
+ Maharashtra,Jalgaon,13448.76,10732.48,-2716.2800000000007,-20.2,38.42,30.66,-7.76,-20.2,0.002856768951189552
487
+ Nagaland,Mon,7082.96,3844.01,-3238.95,-45.73,17.037,9.246,-7.791,-45.73,0.002405350305521985
488
+ Telangana,Rajanna Sirsilla,7731.93,4681.98,-3049.9500000000007,-39.45,19.837,12.012,-7.825,-39.45,0.002565594877346277
489
+ Manipur,Imphal West,10697.6,5062.13,-5635.47,-52.68,14.885,7.044,-7.841,-52.68,0.0013914335925815135
490
+ Jammu and Kashmir,Poonch,9185.29,6261.91,-2923.380000000001,-31.83,24.693,16.834,-7.859,-31.83,0.002688320129250138
491
+ Uttar Pradesh,Shahjahanpur,14829.62,12092.15,-2737.470000000001,-18.46,42.603,34.739,-7.864,-18.46,0.0028728315358046933
492
+ Andhra Pradesh,East Godavari,17665.2,14947.41,-2717.790000000001,-15.38,51.442,43.528,-7.914,-15.38,0.0029120530761044313
493
+ Manipur,Churachandpur,7912.79,3860.7,-4052.09,-51.21,15.623,7.623,-8.0,-51.21,0.001974398410674364
494
+ Telangana,Hanumakonda,7405.49,4127.94,-3277.55,-44.26,18.155,10.12,-8.035,-44.26,0.002451559586198888
495
+ Kerala,Kasargod,15054.29,12255.22,-2799.0700000000015,-18.59,43.24,35.2,-8.04,-18.59,0.00287227096063647
496
+ Gujarat,Banas Kantha,8980.72,5966.38,-3014.3399999999992,-33.56,23.967,15.923,-8.044,-33.56,0.002668716984829724
497
+ Bihar,Madhepura,16379.21,13574.64,-2804.5699999999997,-17.12,47.303,39.203,-8.1,-17.12,0.002887990324319671
498
+ Jharkhand,West Singhbhum,12949.84,10091.99,-2857.8500000000004,-22.07,36.781,28.664,-8.117,-22.07,0.0028402667523305305
499
+ Uttar Pradesh,Pratapgarh,24565.83,21804.35,-2761.480000000003,-11.24,72.727,64.552,-8.175,-11.24,0.0029604943126285577
500
+ Telangana,Jangaon,10576.78,7578.13,-2998.6500000000005,-28.35,29.153,20.888,-8.265,-28.35,0.00275632092186847
501
+ Uttar Pradesh,Ayodhya,15086.22,12180.39,-2905.83,-19.26,43.271,34.936,-8.335,-19.26,0.0028682466515800513
502
+ Madhya Pradesh,Ujjain,8555.13,5363.44,-3191.6899999999996,-37.31,22.354,14.014,-8.34,-37.31,0.002612935162878881
503
+ Andhra Pradesh,Konaseema,19338.21,16440.58,-2897.6299999999974,-14.98,56.443,47.986,-8.457,-14.98,0.002918729292938695
504
+ Arunachal Pradesh,East Kameng,10259.58,7145.14,-3114.4399999999996,-30.36,27.996,19.497,-8.499,-30.36,0.0027287666746591967
505
+ Meghalaya,West Garo Hills,26803.49,23897.96,-2905.5300000000025,-10.84,79.489,70.872,-8.617,-10.84,0.0029656212679766703
506
+ Telangana,Nagarkurnool,12421.53,9348.18,-3073.3500000000004,-24.74,34.892,26.259,-8.633,-24.74,0.0028089937390965527
507
+ Jharkhand,Dhanbad,10710.57,7473.55,-3237.0199999999995,-30.22,29.237,20.401,-8.836,-30.22,0.0027297333381883504
508
+ Haryana,Hisar,8273.87,4700.47,-3573.4000000000005,-43.19,20.462,11.625,-8.837,-43.19,0.0024730869593068295
509
+ Maharashtra,Solapur,8194.99,4129.84,-4065.1499999999996,-49.61,17.87,9.006,-8.864,-49.6,0.0021806005864558713
510
+ Jammu and Kashmir,Baramulla,8195.06,4535.2,-3659.8599999999997,-44.66,19.942,11.036,-8.906,-44.66,0.0024334172049991095
511
+ Karnataka,Koppal,30419.8,27406.9,-3012.899999999998,-9.9,90.552,81.583,-8.969,-9.9,0.002976745409240035
512
+ Madhya Pradesh,Khandwa,10715.03,7276.14,-3438.8900000000003,-32.09,28.753,19.525,-9.228,-32.09,0.002683426924609637
513
+ Chhattisgarh,Mahasamund,17755.77,14549.35,-3206.42,-18.06,51.144,41.908,-9.236,-18.06,0.0028804157747030963
514
+ Assam,Biswanath,8946.37,4472.62,-4473.750000000001,-50.01,18.811,9.404,-9.407,-50.01,0.0021026405122971663
515
+ Telangana,Warangal,8559.76,4668.37,-3891.3900000000003,-45.46,20.734,11.308,-9.426,-45.46,0.0024222641756310925
516
+ Chhattisgarh,Surajpur,16607.97,13275.82,-3332.1500000000015,-20.06,47.493,37.964,-9.529,-20.06,0.002859651119311993
517
+ Gujarat,Arvalli,10041.73,6389.39,-3652.3399999999992,-36.37,26.311,16.741,-9.57,-36.37,0.0026201660470855123
518
+ Karnataka,Chamaraja Nagara,8668.41,4681.28,-3987.13,-46.0,20.81,11.238,-9.572,-46.0,0.002400670941960521
519
+ Chhattisgarh,Kondagaon,10500.12,6863.52,-3636.6000000000004,-34.63,27.823,18.187,-9.636,-34.63,0.002649779240618202
520
+ Bihar,Auranagabad,23220.77,19927.62,-3293.1500000000015,-14.18,68.042,58.392,-9.65,-14.18,0.002930221521508546
521
+ Karnataka,Vijaypura,14636.94,11211.11,-3425.83,-23.41,41.388,31.701,-9.687,-23.41,0.0028276402034851543
522
+ Chhattisgarh,Bastar,10032.55,4841.3,-5191.249999999999,-51.74,18.833,9.088,-9.745,-51.74,0.0018771897473723032
523
+ Jharkhand,Ranchi,13203.11,9706.95,-3496.16,-26.48,36.847,27.09,-9.757,-26.48,0.002790781868817271
524
+ Madhya Pradesh,Mandla,30027.05,26731.95,-3295.0999999999985,-10.97,89.045,79.273,-9.772,-10.97,0.0029654927806760903
525
+ Maharashtra,Gadchiroli,20807.51,17411.58,-3395.9299999999967,-16.32,60.192,50.368,-9.824,-16.32,0.0028928016855452672
526
+ Maharashtra,Bhandara,20316.86,16919.53,-3397.3300000000017,-16.72,58.748,48.924,-9.824,-16.72,0.0028915885624058047
527
+ Bihar,Kaimur (Bhabua),15788.74,12325.48,-3463.26,-21.93,44.85,35.012,-9.838,-21.94,0.0028406319947000204
528
+ Telangana,Peddapalli,9217.53,5211.93,-4005.6000000000004,-43.46,22.745,12.861,-9.884,-43.46,0.002467580794421065
529
+ Uttar Pradesh,Bijnor,15624.95,12134.96,-3489.9900000000016,-22.34,44.327,34.426,-9.901,-22.34,0.00283693707819865
530
+ Bihar,Jehanabad,11857.02,8162.56,-3694.46,-31.16,32.07,22.077,-9.993,-31.16,0.002704726820061027
531
+ Maharashtra,Akola,9790.55,4829.22,-4961.329999999999,-50.67,19.785,9.759,-10.026,-50.67,0.0020208262048608096
532
+ Himachal Pradesh,Kangra,16610.78,13056.03,-3554.749999999998,-21.4,47.269,37.153,-10.116,-21.4,0.002845682141356396
533
+ Meghalaya,East Khasi Hills,12863.56,9182.19,-3681.369999999999,-28.62,35.419,25.283,-10.136,-28.62,0.002753436840190429
534
+ Tamil Nadu,Dharmapuri,28769.36,25304.62,-3464.7400000000016,-12.04,84.86,74.64,-10.22,-12.04,0.002949665894548923
535
+ Chhattisgarh,Balrampur,14144.84,10474.94,-3669.8999999999996,-25.95,39.551,29.289,-10.262,-25.95,0.0027961433285919107
536
+ Arunachal Pradesh,Upper Subansiri,9496.64,5242.81,-4253.829999999999,-44.79,23.102,12.754,-10.348,-44.79,0.0024326498635306804
537
+ Bihar,Madhubani,22131.05,18548.69,-3582.3600000000006,-16.19,64.036,53.67,-10.366,-16.19,0.0028934912713133813
538
+ Madhya Pradesh,Betul,14066.56,10247.74,-3818.8199999999997,-27.15,39.041,28.442,-10.599,-27.15,0.002775447586332408
539
+ Karnataka,Chikkaballapura,11493.44,7466.72,-4026.7200000000003,-35.03,30.365,19.727,-10.638,-35.03,0.002641941838126792
540
+ Assam,Hailakandi,10464.21,5119.54,-5344.669999999999,-51.08,20.85,10.201,-10.649,-51.07,0.001992505884342918
541
+ Kerala,Wayanad,14899.67,11073.76,-3825.91,-25.68,41.696,30.989,-10.707,-25.68,0.0027984512408664084
542
+ Chhattisgarh,Gaurela Pendra Marwahi,10303.55,5977.51,-4326.039999999999,-41.99,25.688,14.903,-10.785,-41.98,0.00249312130285193
543
+ Mizoram,Lawngtlai,12818.41,8790.13,-4028.2800000000007,-31.43,34.548,23.691,-10.857,-31.43,0.0026951860644182863
544
+ Jharkhand,Chatra,20374.62,16559.1,-3815.5200000000004,-18.73,58.519,47.56,-10.959,-18.73,0.002872151726019921
545
+ Andhra Pradesh,Nandyal,25448.99,21669.74,-3779.25,-14.85,74.302,63.268,-11.034,-14.85,0.0029196443552376734
546
+ Andhra Pradesh,Chittoor,29200.4,25449.55,-3750.850000000002,-12.85,86.016,74.967,-11.049,-12.85,0.0029457130724236656
547
+ Madhya Pradesh,Anuppur,16883.64,12954.53,-3929.1099999999988,-23.27,47.759,36.645,-11.114,-23.27,0.002828714661056502
548
+ Uttar Pradesh,Bahraich,38940.81,35188.06,-3752.75,-9.64,116.049,104.865,-11.184,-9.64,0.0029801383176158895
549
+ Chhattisgarh,Surguja,10510.73,5915.05,-4595.679999999999,-43.72,25.819,14.53,-11.289,-43.72,0.0024564421310413263
550
+ Uttar Pradesh,Mathura,11311.04,6924.87,-4386.170000000001,-38.78,29.154,17.849,-11.305,-38.78,0.0025774818230684354
551
+ Uttarakhand,Uttar Kashi,11087.68,6521.34,-4566.34,-41.18,27.913,16.417,-11.496,-41.19,0.0025174788594187424
552
+ Bihar,Bhojpur,15100.03,10940.09,-4159.9400000000005,-27.55,41.854,30.324,-11.53,-27.55,0.0027717825726174053
553
+ Telangana,Suryapet,19914.79,15865.94,-4048.8500000000004,-20.33,56.85,45.292,-11.558,-20.33,0.002854662288680925
554
+ Madhya Pradesh,Ashok Nagar,11232.36,6636.47,-4595.89,-40.92,28.329,16.738,-11.591,-40.92,0.0025220879672660066
555
+ Telangana,Mancherial,10785.32,5709.16,-5076.16,-47.07,25.376,13.433,-11.943,-47.06,0.0023528277325104868
556
+ Madhya Pradesh,Guna,11406.57,5717.81,-5688.759999999999,-49.87,24.136,12.099,-12.037,-49.87,0.0021159735135101963
557
+ Rajasthan,Bharatpur,11245.4,6343.53,-4901.87,-43.59,27.734,15.645,-12.089,-43.59,0.0024662528678392947
558
+ Maharashtra,Nashik,14313.02,9776.78,-4536.24,-31.69,38.496,26.295,-12.201,-31.69,0.002689579138434796
559
+ Kerala,Kannur,18684.79,14361.5,-4323.290000000001,-23.14,52.872,40.638,-12.234,-23.14,0.0028296812541109637
560
+ Uttar Pradesh,Budaun,14600.0,10070.39,-4529.610000000001,-31.02,39.583,27.302,-12.281,-31.03,0.0027111643835616437
561
+ Madhya Pradesh,Chhindwara,28645.85,24430.15,-4215.699999999997,-14.72,83.681,71.366,-12.315,-14.72,0.002921225936741273
562
+ Uttar Pradesh,Kushi Nagar,19963.69,15611.35,-4352.339999999998,-21.8,56.74,44.37,-12.37,-21.8,0.002842159941373564
563
+ Assam,Udalguri,11340.85,6215.46,-5125.39,-45.19,27.504,15.074,-12.43,-45.19,0.0024252150412006155
564
+ Chhattisgarh,Jashpur,19228.02,14830.44,-4397.58,-22.87,54.441,41.99,-12.451,-22.87,0.0028313367679043398
565
+ Uttar Pradesh,Auraiya,12628.94,6144.86,-6484.080000000001,-51.34,24.819,12.076,-12.743,-51.34,0.0019652480730766
566
+ Meghalaya,South West Garo Hills,11746.32,6421.98,-5324.34,-45.33,28.456,15.558,-12.898,-45.33,0.0024225459548181897
567
+ Uttar Pradesh,Moradabad,14381.46,9535.17,-4846.289999999999,-33.7,38.376,25.444,-12.932,-33.7,0.002668435610848968
568
+ Maharashtra,Dhule,11760.61,6193.98,-5566.630000000001,-47.33,27.642,14.558,-13.084,-47.33,0.0023503882876823564
569
+ Jharkhand,Dumka,19096.47,14422.66,-4673.810000000001,-24.47,53.707,40.562,-13.145,-24.48,0.0028124045962421326
570
+ Kerala,Malappuram,18858.68,14167.44,-4691.24,-24.88,52.92,39.756,-13.164,-24.88,0.0028061348938525923
571
+ Uttar Pradesh,Sultanpur,27055.56,22495.24,-4560.32,-16.86,78.195,65.015,-13.18,-16.86,0.0028901637962769943
572
+ Madhya Pradesh,Khargone,17612.71,12878.26,-4734.449999999999,-26.88,49.054,35.868,-13.186,-26.88,0.002785147771126647
573
+ Meghalaya,North Garo Hills,12018.96,6506.77,-5512.189999999999,-45.86,28.898,15.645,-13.253,-45.86,0.0024043677655970234
574
+ Bihar,Patna,19942.98,15195.28,-4747.699999999999,-23.81,56.338,42.926,-13.412,-23.81,0.00282495394369347
575
+ Tripura,Dhalai,24957.5,20250.39,-4707.110000000001,-18.86,71.669,58.152,-13.517,-18.86,0.002871641791044776
576
+ Chhattisgarh,Mungeli,14674.75,9553.08,-5121.67,-34.9,38.776,25.243,-13.533,-34.9,0.002642361880100172
577
+ Maharashtra,Washim,14826.09,7134.65,-7691.4400000000005,-51.88,26.187,12.602,-13.585,-51.88,0.0017662782297962578
578
+ Madhya Pradesh,Damoh,20460.75,15644.48,-4816.27,-23.54,57.838,44.223,-13.615,-23.54,0.002826778099532031
579
+ Madhya Pradesh,Alirajpur,13119.89,7646.46,-5473.429999999999,-41.72,32.83,19.134,-13.696,-41.72,0.0025023075650786705
580
+ Bihar,Lakhisarai,13302.99,7806.52,-5496.469999999999,-41.32,33.486,19.65,-13.836,-41.32,0.0025171784689006003
581
+ Madhya Pradesh,Sheopur,12448.9,6440.0,-6008.9,-48.27,28.709,14.852,-13.857,-48.27,0.002306147531107166
582
+ Manipur,Imphal East,15420.98,7400.31,-8020.669999999999,-52.01,26.955,12.935,-14.02,-52.01,0.0017479433862179965
583
+ Jammu and Kashmir,Kupwara,14803.26,9438.86,-5364.4,-36.24,38.793,24.735,-14.058,-36.24,0.0026205714146748755
584
+ Uttar Pradesh,Jaunpur,39425.13,34624.39,-4800.739999999998,-12.18,116.275,102.116,-14.159,-12.18,0.00294926104238591
585
+ Telangana,Siddipet,17696.39,12537.31,-5159.08,-29.15,48.615,34.442,-14.173,-29.15,0.0027471704681011214
586
+ Chhattisgarh,Janjgir-Champa,15997.51,7655.6,-8341.91,-52.15,27.66,13.237,-14.423,-52.14,0.001729019078594106
587
+ Kerala,Kollam,32211.56,27169.7,-5041.860000000001,-15.65,93.768,79.091,-14.677,-15.65,0.002911004620701388
588
+ Chhattisgarh,Bemetara,13721.91,7703.85,-6018.0599999999995,-43.86,33.681,18.909,-14.772,-43.86,0.0024545416782357555
589
+ Karnataka,Kolar,14916.09,9171.84,-5744.25,-38.51,38.543,23.7,-14.843,-38.51,0.002583988163117814
590
+ Jharkhand,Palamu,19396.15,14026.73,-5369.420000000002,-27.68,53.749,38.87,-14.879,-27.68,0.002771116948466577
591
+ Odisha,Kalahandi,31155.63,25987.52,-5168.110000000001,-16.59,90.118,75.169,-14.949,-16.59,0.0028925109201771875
592
+ Bihar,Nawada,31475.44,26296.35,-5179.09,-16.45,91.051,76.069,-14.982,-16.45,0.002892763373601767
593
+ Bihar,Darbhanga,26783.76,21553.2,-5230.559999999998,-19.53,76.72,61.737,-14.983,-19.53,0.0028644223215859165
594
+ Telangana,Kamareddy,14712.49,8712.32,-6000.17,-40.78,37.214,22.037,-15.177,-40.78,0.002529415483035163
595
+ Uttar Pradesh,Hamirpur,17268.67,11587.94,-5680.729999999998,-32.9,46.167,30.98,-15.187,-32.9,0.0026734542961328236
596
+ Bihar,Purbi Champaran,41023.03,35863.21,-5159.82,-12.58,120.91,105.702,-15.208,-12.58,0.0029473688316050766
597
+ Kerala,Kozhikode,34512.66,29295.16,-5217.500000000004,-15.12,100.609,85.399,-15.21,-15.12,0.002915133171421733
598
+ Karnataka,Hassan,15584.31,9749.38,-5834.93,-37.44,40.7,25.461,-15.239,-37.44,0.002611601026930291
599
+ Tripura,Gomati,18183.65,12566.53,-5617.120000000001,-30.89,49.338,34.097,-15.241,-30.89,0.00271331663334919
600
+ Uttar Pradesh,Ghazipur,29772.78,24436.07,-5336.709999999999,-17.92,85.766,70.393,-15.373,-17.92,0.002880684974664778
601
+ Jharkhand,Latehar,18732.23,13095.88,-5636.35,-30.09,51.176,35.778,-15.398,-30.09,0.002731975851246755
602
+ Rajasthan,Chittorgarh,19541.01,13896.4,-5644.609999999999,-28.89,53.741,38.217,-15.524,-28.89,0.002750164909592698
603
+ Jharkhand,Hazaribagh,17620.63,11800.57,-5820.060000000001,-33.03,47.104,31.546,-15.558,-33.03,0.0026732301852998444
604
+ Rajasthan,Bundi,15856.22,9834.7,-6021.519999999999,-37.98,41.12,25.504,-15.616,-37.98,0.002593304078777918
605
+ Karnataka,Kalaburagi,19737.7,13930.74,-5806.960000000001,-29.42,54.121,38.198,-15.923,-29.42,0.0027420114805676446
606
+ Telangana,Jagtial,14450.63,7688.0,-6762.629999999999,-46.8,34.114,18.149,-15.965,-46.8,0.002360727525374326
607
+ Bihar,Buxar,18601.21,12531.86,-6069.3499999999985,-32.63,49.755,33.521,-16.234,-32.63,0.0026748259925026387
608
+ Maharashtra,Jalna,25461.84,19706.61,-5755.23,-22.6,72.187,55.87,-16.317,-22.6,0.002835105396939106
609
+ Assam,Dhemaji,15658.21,7786.26,-7871.949999999999,-50.27,32.625,16.223,-16.402,-50.27,0.002083571493804209
610
+ Andhra Pradesh,Annamayya,30000.51,24262.06,-5738.449999999997,-19.13,86.083,69.617,-16.466,-19.13,0.002869384553795919
611
+ Telangana,Kumram Bheem(Asifabad),14917.96,7816.98,-7100.98,-47.6,34.655,18.159,-16.496,-47.6,0.0023230388069146184
612
+ Rajasthan,Kota,16670.04,10138.92,-6531.120000000001,-39.18,42.786,26.023,-16.763,-39.18,0.0025666405119603793
613
+ Himachal Pradesh,Chamba,24035.13,18024.08,-6011.049999999999,-25.01,67.436,50.571,-16.865,-25.01,0.0028057264512403304
614
+ Uttar Pradesh,Rampur,16375.64,9653.41,-6722.23,-41.05,41.278,24.333,-16.945,-41.05,0.0025206953743487277
615
+ Chhattisgarh,Raigarh,16507.48,8164.44,-8343.04,-50.54,33.751,16.693,-17.058,-50.54,0.002044588271498739
616
+ Chhattisgarh,Baloda Bazar,20198.92,13878.26,-6320.659999999998,-31.29,54.523,37.462,-17.061,-31.29,0.002699302734997713
617
+ Bihar,Bhagalpur,19540.99,13008.27,-6532.720000000001,-33.43,52.181,34.736,-17.445,-33.43,0.002670335535712366
618
+ Kerala,Alappuzha,35646.37,29590.38,-6055.990000000002,-16.99,102.95,85.46,-17.49,-16.99,0.0028880921114828803
619
+ Rajasthan,Rajsamand,35070.63,28971.82,-6098.809999999998,-17.39,101.202,83.603,-17.599,-17.39,0.0028856624474667266
620
+ Tamil Nadu,Virudhunagar,34004.69,27818.51,-6186.180000000004,-18.19,97.725,79.947,-17.778,-18.19,0.0028738682811106347
621
+ Telangana,Bhadradri Kothagudem,17547.88,10579.01,-6968.870000000001,-39.71,44.936,27.09,-17.846,-39.71,0.0025607651750524847
622
+ Bihar,Muzaffarpur,31609.49,25352.01,-6257.480000000003,-19.8,90.473,72.563,-17.91,-19.8,0.0028622100514750473
623
+ Bihar,Munger,18270.39,11209.96,-7060.43,-38.64,47.163,28.937,-18.226,-38.64,0.002581389888228987
624
+ Telangana,Nirmal,20101.47,13247.03,-6854.4400000000005,-34.1,53.53,35.277,-18.253,-34.1,0.0026629893236663786
625
+ Bihar,Siwan,19167.54,12093.5,-7074.040000000001,-36.91,50.155,31.645,-18.51,-36.91,0.002616663379859909
626
+ Chhattisgarh,Raipur,21199.5,14140.67,-7058.83,-33.3,56.614,37.763,-18.851,-33.3,0.002670534682421755
627
+ Maharashtra,Nanded,24266.9,17224.73,-7042.170000000002,-29.02,66.715,47.355,-19.36,-29.02,0.002749218070705364
628
+ Chhattisgarh,Korba,18214.47,10372.17,-7842.300000000001,-43.06,45.103,25.684,-19.419,-43.05,0.0024762180837542896
629
+ Jammu and Kashmir,Rajauri,17857.6,9070.9,-8786.699999999999,-49.2,39.503,20.066,-19.437,-49.2,0.0022121113699489297
630
+ Telangana,Adilabad,18669.8,10781.2,-7888.5999999999985,-42.25,46.42,26.806,-19.614,-42.25,0.002486368359596782
631
+ Bihar,Rohtas,19826.86,12217.96,-7608.9000000000015,-38.38,51.245,31.579,-19.666,-38.38,0.0025846250994862523
632
+ Uttar Pradesh,Firozabad,18002.93,9409.44,-8593.49,-47.73,41.815,21.855,-19.96,-47.73,0.0023226774752776354
633
+ Madhya Pradesh,Seoni,24724.12,17317.92,-7406.200000000001,-29.96,67.58,47.336,-20.244,-29.96,0.0027333632096915887
634
+ Odisha,Nuapada,19930.29,9804.04,-10126.25,-50.81,40.188,19.769,-20.419,-50.81,0.0020164282607026793
635
+ Gujarat,Panch Mahals,18705.8,9526.75,-9179.05,-49.07,41.638,21.206,-20.432,-49.07,0.0022259406173486297
636
+ Andhra Pradesh,Vizianagaram,61304.86,54331.55,-6973.309999999998,-11.37,181.374,160.743,-20.631,-11.37,0.00295855826112318
637
+ Rajasthan,Churu,35031.76,27769.01,-7262.750000000004,-20.73,99.77,79.086,-20.684,-20.73,0.0028479870837206005
638
+ Bihar,Pashchim Champaran,24053.8,16301.79,-7752.009999999998,-32.23,64.524,43.729,-20.795,-32.23,0.002682486758848914
639
+ Telangana,Khammam,20508.11,12281.38,-8226.730000000001,-40.11,52.08,31.188,-20.892,-40.12,0.002539483160564284
640
+ Bihar,Samastipur,37753.31,30430.99,-7322.319999999996,-19.4,108.285,87.283,-21.002,-19.4,0.002868225329116838
641
+ Karnataka,Ballari,20827.49,12639.69,-8187.800000000001,-39.31,53.437,32.43,-21.007,-39.31,0.002565695626309267
642
+ Uttar Pradesh,Mainpuri,19129.36,10305.03,-8824.33,-46.13,45.906,24.73,-21.176,-46.13,0.0023997666414349457
643
+ Uttar Pradesh,Jalaun,29330.76,21524.81,-7805.949999999997,-26.61,81.77,60.008,-21.762,-26.61,0.002787858207560936
644
+ Jharkhand,Deoghar,22226.47,13726.38,-8500.090000000002,-38.24,57.596,35.57,-22.026,-38.24,0.002591324668289656
645
+ Rajasthan,Baran,21702.55,12967.67,-8734.88,-40.25,55.091,32.918,-22.173,-40.25,0.0025384574623719333
646
+ Karnataka,Chitradurga,22807.82,14329.31,-8478.51,-37.17,59.668,37.487,-22.181,-37.17,0.002616120260507142
647
+ Rajasthan,Sirohi,23668.94,15186.71,-8482.23,-35.84,62.321,39.987,-22.334,-35.84,0.0026330287710391764
648
+ Tamil Nadu,Salem,42525.61,34732.45,-7793.1600000000035,-18.33,122.174,99.785,-22.389,-18.33,0.002872951146379793
649
+ Rajasthan,Pali,31508.66,23502.16,-8006.5,-25.41,88.242,65.819,-22.423,-25.41,0.0028005634006650873
650
+ Bihar,Saran,29369.01,21160.3,-8208.71,-27.95,81.016,58.372,-22.644,-27.95,0.002758553999607069
651
+ Bihar,Purnia,27039.1,18397.28,-8641.82,-31.96,72.584,49.386,-23.198,-31.96,0.002684408874555736
652
+ Tamil Nadu,Ranipet,25289.2,16496.76,-8792.440000000002,-34.77,66.911,43.648,-23.263,-34.77,0.0026458330038119037
653
+ Karnataka,Bidar,22142.02,11040.02,-11102.0,-50.14,46.417,23.144,-23.273,-50.14,0.0020963308677347417
654
+ Telangana,Vikarabad,22860.64,13598.53,-9262.109999999999,-40.52,57.958,34.476,-23.482,-40.52,0.002535274602985743
655
+ Uttar Pradesh,Maharajganj,28312.51,19604.32,-8708.189999999999,-30.76,76.975,53.299,-23.676,-30.76,0.0027187628366400576
656
+ Chhattisgarh,Bilaspur,21653.66,11896.43,-9757.23,-45.06,52.635,28.917,-23.718,-45.06,0.002430766900376195
657
+ Odisha,Kandhamal,31893.37,23362.79,-8530.579999999998,-26.75,88.861,65.093,-23.768,-26.75,0.0027861903586858337
658
+ Uttar Pradesh,Chandauli,22471.24,11354.36,-11116.880000000001,-49.47,49.156,24.838,-24.318,-49.47,0.0021875072314656422
659
+ Andhra Pradesh,Kakinada,25518.15,16202.67,-9315.480000000001,-36.51,66.84,42.44,-24.4,-36.51,0.0026193121366556746
660
+ Madhya Pradesh,Dhar,22382.24,11429.06,-10953.180000000002,-48.94,50.067,25.566,-24.501,-48.94,0.0022369074766421947
661
+ Odisha,Sundargarh,32692.36,23598.46,-9093.900000000001,-27.82,90.337,65.208,-25.129,-27.82,0.002763244990572721
662
+ Bihar,Araria,24187.38,13547.11,-10640.27,-43.99,59.331,33.231,-26.1,-43.99,0.0024529734101006394
663
+ Karnataka,Tumakuru,24323.11,12322.59,-12000.52,-49.34,53.271,26.988,-26.283,-49.34,0.0021901393366226605
664
+ Tamil Nadu,Ariyalur,24033.5,12850.52,-11182.98,-46.53,56.909,30.429,-26.48,-46.53,0.0023679031352071066
665
+ Bihar,Gaya,45679.72,36331.56,-9348.160000000003,-20.46,130.367,103.688,-26.679,-20.46,0.0028539360574014023
666
+ Chhattisgarh,Kawardha,26421.9,15363.74,-11058.160000000002,-41.85,65.999,38.377,-27.622,-41.85,0.0024978900079101044
667
+ Maharashtra,Gondia,34159.49,23972.56,-10186.929999999997,-29.82,93.475,65.599,-27.876,-29.82,0.0027364284419937185
668
+ Uttar Pradesh,Hardoi,32387.17,21862.91,-10524.259999999998,-32.5,86.694,58.523,-28.171,-32.49,0.002676800720779247
669
+ Bihar,Kishanganj,25780.32,13853.45,-11926.869999999999,-46.26,61.547,33.073,-28.474,-46.26,0.0023873636944770275
670
+ Maharashtra,Yavatmal,26865.05,15226.4,-11638.65,-43.32,66.292,37.573,-28.719,-43.32,0.0024675926529077745
671
+ Rajasthan,Jaisalmer,30664.11,19306.12,-11357.990000000002,-37.04,80.234,50.515,-29.719,-37.04,0.002616544227111108
672
+ Uttar Pradesh,Gorakhpur,26825.72,14164.2,-12661.52,-47.2,63.051,33.291,-29.76,-47.2,0.00235039357750696
673
+ Bihar,Vaishali,28966.34,16959.44,-12006.900000000001,-41.45,72.584,42.497,-30.087,-41.45,0.002505805013681397
674
+ Bihar,Saharsa,36442.24,25330.99,-11111.249999999996,-30.49,99.406,69.097,-30.309,-30.49,0.0027277686552747583
675
+ Bihar,Nalanda,35181.56,23090.87,-12090.689999999999,-34.37,93.258,61.208,-32.05,-34.37,0.002650763638678899
676
+ Bihar,Katihar,31709.05,18904.35,-12804.7,-40.38,80.469,47.974,-32.495,-40.38,0.0025377297648463133
677
+ Maharashtra,Amravati,44981.74,33190.9,-11790.839999999997,-26.21,125.621,92.693,-32.928,-26.21,0.002792710997840457
678
+ Tamil Nadu,Namakkal,31198.1,17807.37,-13390.73,-42.92,77.273,44.106,-33.167,-42.92,0.0024768495517355224
679
+ Tamil Nadu,Tiruvallur,43896.18,31920.47,-11975.71,-27.28,121.737,88.525,-33.212,-27.28,0.0027732937125736225
680
+ Rajasthan,Sri Ganganagar,39608.45,27108.25,-12500.199999999997,-31.56,106.567,72.935,-33.632,-31.56,0.0026905117468620965
681
+ Jharkhand,Garhwa,35448.39,22413.07,-13035.32,-36.77,92.846,58.704,-34.142,-36.77,0.0026191880646765624
682
+ Andhra Pradesh,Eluru,42110.87,29608.99,-12501.880000000001,-29.69,115.324,81.087,-34.237,-29.69,0.002738580323797632
683
+ Tamil Nadu,Tiruvannamalai,78187.39,66262.78,-11924.61,-15.25,227.733,193.001,-34.732,-15.25,0.002912656375919442
684
+ Andhra Pradesh,Sri Sathya Sai,31516.06,16725.0,-14791.060000000001,-46.93,74.247,39.402,-34.845,-46.93,0.0023558465112707614
685
+ Bihar,Jamui,31828.37,16550.37,-15278.0,-48.0,73.878,38.416,-35.462,-48.0,0.002321136772005604
686
+ Rajasthan,Hanumangarh,39268.34,24303.41,-14964.929999999997,-38.11,101.759,62.979,-38.78,-38.11,0.002591375138343001
687
+ Karnataka,Raichur,41250.24,26522.55,-14727.689999999999,-35.7,108.725,69.907,-38.818,-35.7,0.0026357422405299943
688
+ Tamil Nadu,Madurai,40407.61,24414.36,-15993.25,-39.58,103.623,62.609,-41.014,-39.58,0.002564442687899631
689
+ Uttar Pradesh,Azamgarh,38338.5,21268.01,-17070.49,-44.53,93.567,51.906,-41.661,-44.53,0.0024405493172659333
690
+ Maharashtra,Chatrapati Sambhaji Nagar,48511.79,32812.66,-15699.129999999997,-32.36,130.042,87.958,-42.084,-32.36,0.0026806267095071117
691
+ Andhra Pradesh,Anakapalli,42445.38,25872.53,-16572.85,-39.05,109.084,66.492,-42.592,-39.05,0.0025699852374981686
692
+ Tamil Nadu,Tiruchirappalli,49657.91,33388.75,-16269.160000000003,-32.76,132.794,89.287,-43.507,-32.76,0.0026741761785785993
693
+ Uttar Pradesh,Basti,44631.62,27742.1,-16889.520000000004,-37.84,116.275,72.274,-44.001,-37.84,0.0026052157640704056
694
+ Kerala,Thiruvananthapuram,43343.3,26072.23,-17271.070000000003,-39.85,110.654,66.562,-44.092,-39.85,0.002552966663821167
695
+ Madhya Pradesh,Morena,40950.05,22278.84,-18671.210000000003,-45.6,98.501,53.589,-44.912,-45.6,0.0024053938884079505
696
+ Rajasthan,Dungarpur,75259.77,59355.17,-15904.600000000006,-21.13,214.214,168.944,-45.27,-21.13,0.0028463281245743907
697
+ Rajasthan,Pratapgarh,41853.16,23049.91,-18803.250000000004,-44.93,101.776,56.051,-45.725,-44.93,0.0024317399211911356
698
+ Andhra Pradesh,Parvathipuram Manyam,43888.63,25285.58,-18603.049999999996,-42.39,109.104,62.858,-46.246,-42.39,0.0024859285878825565
699
+ Tamil Nadu,Kallakurichi,46627.78,22874.65,-23753.129999999997,-50.94,93.189,45.717,-47.472,-50.94,0.001998572524791015
700
+ Andhra Pradesh,Srikakulam,70040.57,52991.94,-17048.630000000005,-24.34,197.023,149.065,-47.958,-24.34,0.0028129839605817025
701
+ Maharashtra,Beed,47848.46,27950.73,-19897.73,-41.58,119.751,69.953,-49.798,-41.58,0.0025027137759501563
702
+ Tamil Nadu,Thanjavur,47953.32,24742.85,-23210.47,-48.4,109.921,56.717,-53.204,-48.4,0.0022922500465035584
703
+ Tamil Nadu,Cuddalore,53050.01,28436.33,-24613.68,-46.4,126.522,67.819,-58.703,-46.4,0.0023849571376141116
704
+ Andhra Pradesh,Alluri Sitharama Raju,54797.3,27541.7,-27255.600000000002,-49.74,119.446,60.035,-59.411,-49.74,0.0021797789307137394
705
+ Tamil Nadu,Villupuram,60813.4,34873.88,-25939.520000000004,-42.65,150.844,86.503,-64.341,-42.65,0.002480440166147592
706
+ Rajasthan,Udaipur,60574.8,32307.89,-28266.910000000003,-46.66,143.347,76.455,-66.892,-46.66,0.0023664461129050364
707
+ Rajasthan,Barmer,92571.15,68182.19,-24388.959999999992,-26.35,258.506,190.4,-68.106,-26.35,0.00279251148981081
data/scraper/mnrega_scraper.py ADDED
@@ -0,0 +1,312 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ mnrega_scraper.py
3
+ -----------------
4
+ Real MNREGA data scraper for nreganarep.nic.in
5
+
6
+ STRATEGY:
7
+ The portal has captchas on the main MIS page, but the R14 district-level
8
+ consolidated summary reports are accessible via direct GET URLs.
9
+
10
+ R14 report gives per-district per-year:
11
+ - Households demanded / offered / availed
12
+ - Person days (total, SC, ST, Women)
13
+ - Expenditure (Rs. lakhs)
14
+ - Average wage rate
15
+ - Works completed / in progress
16
+
17
+ Two-step approach:
18
+ Step 1: Fetch state-level page β†’ extract district links (which have
19
+ embedded Digest tokens needed to access sub-pages)
20
+ Step 2: Follow each district link β†’ parse the HTML table
21
+
22
+ HOW TO RUN:
23
+ pip install requests beautifulsoup4 lxml
24
+
25
+ # Maharashtra only (fast, ~2-5 min):
26
+ python data/scraper/mnrega_scraper.py --state Maharashtra
27
+
28
+ # All India (slow, ~30-60 min):
29
+ python data/scraper/mnrega_scraper.py --all-india
30
+
31
+ # Resume after interruption:
32
+ python data/scraper/mnrega_scraper.py --all-india --resume
33
+
34
+ # Custom year range:
35
+ python data/scraper/mnrega_scraper.py --state Maharashtra --years 2018-2019 2023-2024
36
+
37
+ OUTPUT:
38
+ data/raw/mnrega_real_data.csv
39
+ β†’ drop this in as replacement for mnrega_india_unified.csv
40
+ β†’ run: python main.py --stage 3
41
+ """
42
+
43
+ import os, json, time, argparse
44
+ import requests
45
+ from bs4 import BeautifulSoup
46
+ import pandas as pd
47
+ from datetime import datetime
48
+
49
+ # ── State codes ────────────────────────────────────────────────────────────────
50
+ STATE_CODES = {
51
+ "Andhra Pradesh": "02",
52
+ "Arunachal Pradesh": "03",
53
+ "Assam": "04",
54
+ "Bihar": "05",
55
+ "Chhattisgarh": "33",
56
+ "Goa": "10",
57
+ "Gujarat": "11",
58
+ "Haryana": "12",
59
+ "Himachal Pradesh": "13",
60
+ "Jharkhand": "34",
61
+ "Karnataka": "15",
62
+ "Kerala": "16",
63
+ "Madhya Pradesh": "17",
64
+ "Maharashtra": "18",
65
+ "Manipur": "19",
66
+ "Meghalaya": "20",
67
+ "Mizoram": "21",
68
+ "Nagaland": "22",
69
+ "Odisha": "24",
70
+ "Punjab": "25",
71
+ "Rajasthan": "27",
72
+ "Sikkim": "28",
73
+ "Tamil Nadu": "29",
74
+ "Telangana": "36",
75
+ "Tripura": "30",
76
+ "Uttar Pradesh": "31",
77
+ "Uttarakhand": "35",
78
+ "West Bengal": "32",
79
+ "Delhi": "07",
80
+ }
81
+
82
+ ALL_YEARS = [
83
+ "2014-2015", "2015-2016", "2016-2017", "2017-2018",
84
+ "2018-2019", "2019-2020", "2020-2021", "2021-2022",
85
+ "2022-2023", "2023-2024"
86
+ ]
87
+
88
+ BASE_URL = "https://nreganarep.nic.in/netnrega"
89
+ OUTPUT_PATH = os.path.join("data", "raw", "mnrega_real_data.csv")
90
+ CHECKPOINT_PATH = os.path.join("data", "raw", ".scraper_checkpoint.json")
91
+ DELAY = 1.5
92
+
93
+ HEADERS = {
94
+ "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36",
95
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
96
+ "Referer": "https://nreganarep.nic.in/netnrega/MISreport4.aspx",
97
+ }
98
+
99
+ HIGH_ACTIVITY = {"Rajasthan","Uttar Pradesh","Madhya Pradesh","West Bengal",
100
+ "Andhra Pradesh","Telangana","Jharkhand","Odisha","Chhattisgarh","Bihar"}
101
+ MID_ACTIVITY = {"Maharashtra","Tamil Nadu","Karnataka","Gujarat",
102
+ "Himachal Pradesh","Uttarakhand","Assam"}
103
+ SOUTH = {"Tamil Nadu","Kerala","Karnataka","Andhra Pradesh","Telangana"}
104
+ EAST = {"West Bengal","Odisha","Jharkhand","Bihar","Assam"}
105
+
106
+
107
+ class MNREGAScraper:
108
+
109
+ def __init__(self, delay=DELAY):
110
+ self.session = requests.Session()
111
+ self.session.headers.update(HEADERS)
112
+ self.delay = delay
113
+ self.records = []
114
+ self.checkpoint = self._load_checkpoint()
115
+
116
+ # ── Public ────────────────────────────────────────────────────────────────
117
+
118
+ def scrape_state(self, state_name: str, years: list) -> pd.DataFrame:
119
+ code = STATE_CODES.get(state_name)
120
+ if not code:
121
+ raise ValueError(f"Unknown state '{state_name}'. Options: {list(STATE_CODES)}")
122
+ print(f"\n{'='*60}")
123
+ print(f"[scraper] State: {state_name} | Code: {code} | Years: {years[0]}β†’{years[-1]}")
124
+ print(f"{'='*60}")
125
+ for year in years:
126
+ self._scrape_year(state_name, code, year)
127
+ return self._finalize()
128
+
129
+ def scrape_all_india(self, years: list, resume: bool = False) -> pd.DataFrame:
130
+ done = set(self.checkpoint.get("done", [])) if resume else set()
131
+ total = len(STATE_CODES) * len(years)
132
+ count = 0
133
+ for state_name, code in STATE_CODES.items():
134
+ for year in years:
135
+ count += 1
136
+ key = f"{state_name}|{year}"
137
+ if key in done:
138
+ print(f"[scraper] [{count}/{total}] SKIP {key}")
139
+ continue
140
+ print(f"[scraper] [{count}/{total}] {key}")
141
+ self._scrape_year(state_name, code, year)
142
+ done.add(key)
143
+ self._save_checkpoint(list(done))
144
+ return self._finalize()
145
+
146
+ # ── Core ──────────────────────────────────────────────────────────────────
147
+
148
+ def _scrape_year(self, state_name: str, state_code: str, year: str):
149
+ """Fetch state-year page, find district links, scrape each."""
150
+ url = f"{BASE_URL}/nrega_R14.aspx?state_code={state_code}&fin_year={year}&rpt=RP"
151
+ soup = self._get(url)
152
+ if soup is None:
153
+ return
154
+
155
+ district_links = self._find_district_links(soup)
156
+
157
+ if district_links:
158
+ print(f" β†’ {len(district_links)} districts")
159
+ for name, durl in district_links:
160
+ dsoup = self._get(durl)
161
+ if dsoup:
162
+ rows = self._parse_table(dsoup, state_name, year, name)
163
+ self.records.extend(rows)
164
+ time.sleep(self.delay)
165
+ else:
166
+ # State-level page may already contain the district table
167
+ rows = self._parse_table(soup, state_name, year)
168
+ self.records.extend(rows)
169
+ print(f" β†’ {len(rows)} rows (direct table)")
170
+
171
+ def _get(self, url: str):
172
+ try:
173
+ r = self.session.get(url, timeout=20)
174
+ r.raise_for_status()
175
+ return BeautifulSoup(r.text, "lxml")
176
+ except Exception as e:
177
+ print(f" [ERROR] {url[:80]}... β†’ {e}")
178
+ return None
179
+
180
+ def _find_district_links(self, soup: BeautifulSoup) -> list:
181
+ links = []
182
+ for a in soup.find_all("a", href=True):
183
+ href = a["href"]
184
+ text = a.get_text(strip=True)
185
+ if ("district_code" in href.lower() or "nrega_r14" in href.lower()) and text:
186
+ full = href if href.startswith("http") else f"{BASE_URL}/{href.lstrip('/')}"
187
+ links.append((text.title(), full))
188
+ return links
189
+
190
+ def _parse_table(self, soup, state_name, year, district_hint=None):
191
+ records = []
192
+ for table in soup.find_all("table"):
193
+ headers = [th.get_text(" ", strip=True).lower() for th in table.find_all("th")]
194
+ joined = " ".join(headers)
195
+ if not any(k in joined for k in ["person", "household", "expenditure"]):
196
+ continue
197
+ for row in table.find_all("tr")[1:]:
198
+ cells = [td.get_text(strip=True) for td in row.find_all("td")]
199
+ r = self._map(cells, state_name, year, district_hint)
200
+ if r:
201
+ records.append(r)
202
+ return records
203
+
204
+ def _map(self, cells, state_name, year, district_hint=None):
205
+ def num(v):
206
+ try: return float(str(v).replace(",","").replace("-","0") or 0)
207
+ except: return 0.0
208
+
209
+ if len(cells) < 6:
210
+ return None
211
+
212
+ district = district_hint or cells[0]
213
+ if not district or str(district).isdigit() or len(str(district)) < 3:
214
+ return None
215
+
216
+ # Skip subtotal/total rows
217
+ dl = district.lower()
218
+ if any(t in dl for t in ["total", "grand", "state"]):
219
+ return None
220
+
221
+ # Person days in R14 are in actual days, convert to lakhs
222
+ pd_raw = num(cells[4]) if len(cells) > 4 else 0
223
+ pd_lakhs = round(pd_raw / 1e5, 3) if pd_raw > 1000 else pd_raw # already lakhs?
224
+
225
+ exp_raw = num(cells[8]) if len(cells) > 8 else 0
226
+ exp_lakhs = round(exp_raw / 1e5, 2) if exp_raw > 1e5 else exp_raw
227
+
228
+ # Clean year format: 2023-2024 β†’ 2023-24
229
+ yr_parts = year.split("-")
230
+ fin_year = f"{yr_parts[0]}-{yr_parts[1][2:]}" if len(yr_parts) == 2 else year
231
+
232
+ return {
233
+ "state": state_name,
234
+ "district": str(district).title().strip(),
235
+ "financial_year": fin_year,
236
+ "region": "South" if state_name in SOUTH else ("East" if state_name in EAST else "Other"),
237
+ "state_category": "high" if state_name in HIGH_ACTIVITY else ("mid" if state_name in MID_ACTIVITY else "low"),
238
+ "person_days_lakhs": pd_lakhs,
239
+ "expenditure_lakhs": exp_lakhs,
240
+ "avg_wage_rate": num(cells[9]) if len(cells) > 9 else None,
241
+ "households_demanded": num(cells[1]) if len(cells) > 1 else None,
242
+ "households_offered": num(cells[2]) if len(cells) > 2 else None,
243
+ "households_availed": num(cells[3]) if len(cells) > 3 else None,
244
+ "works_completed": num(cells[10]) if len(cells) > 10 else None,
245
+ # Stage 2/3 β€” fill via enrich.py with IMD/census/PMKISAN data
246
+ "rainfall_mm": None,
247
+ "crop_season_index": None,
248
+ "rural_population_lakhs": None,
249
+ "poverty_rate_pct": None,
250
+ "pmkisan_beneficiaries": None,
251
+ "pmkisan_amount_lakhs": None,
252
+ "pmay_houses_sanctioned": None,
253
+ "pmay_houses_completed": None,
254
+ "pmay_expenditure_lakhs": None,
255
+ "budget_allocated_lakhs": round(exp_lakhs * 1.12, 2) if exp_lakhs else None,
256
+ }
257
+
258
+ # ── Persistence ───────────────────────────────────────────────────────────
259
+
260
+ def _finalize(self) -> pd.DataFrame:
261
+ df = pd.DataFrame(self.records)
262
+ os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)
263
+ df.to_csv(OUTPUT_PATH, index=False)
264
+ print(f"\n{'='*60}")
265
+ print(f"[scraper] DONE: {len(df)} rows | {df['district'].nunique() if len(df) else 0} districts")
266
+ print(f"[scraper] Saved β†’ {OUTPUT_PATH}")
267
+ print(f"[scraper] Next step: copy this to data/raw/mnrega_india_unified.csv")
268
+ print(f" then run: python main.py --stage 3")
269
+ print(f"{'='*60}")
270
+ return df
271
+
272
+ def _save_checkpoint(self, done):
273
+ os.makedirs(os.path.dirname(CHECKPOINT_PATH), exist_ok=True)
274
+ with open(CHECKPOINT_PATH, "w") as f:
275
+ json.dump({"done": done, "ts": str(datetime.now())}, f)
276
+
277
+ def _load_checkpoint(self):
278
+ if os.path.exists(CHECKPOINT_PATH):
279
+ with open(CHECKPOINT_PATH) as f:
280
+ return json.load(f)
281
+ return {}
282
+
283
+
284
+ # ── CLI ───────────────────────────────────────────────────────────────────────
285
+
286
+ if __name__ == "__main__":
287
+ ap = argparse.ArgumentParser()
288
+ ap.add_argument("--state", type=str, help="Single state e.g. 'Maharashtra'")
289
+ ap.add_argument("--all-india", action="store_true")
290
+ ap.add_argument("--resume", action="store_true", help="Resume from checkpoint")
291
+ ap.add_argument("--years", nargs=2, default=["2014-2015", "2023-2024"],
292
+ metavar=("START", "END"),
293
+ help="e.g. --years 2018-2019 2023-2024")
294
+ ap.add_argument("--delay", type=float, default=1.5)
295
+ args = ap.parse_args()
296
+
297
+ start = int(args.years[0].split("-")[0])
298
+ end = int(args.years[1].split("-")[0])
299
+ years = [f"{y}-{y+1}" for y in range(start, end + 1)]
300
+
301
+ scraper = MNREGAScraper(delay=args.delay)
302
+
303
+ if args.state:
304
+ df = scraper.scrape_state(args.state, years)
305
+ elif args.all_india:
306
+ df = scraper.scrape_all_india(years, resume=args.resume)
307
+ else:
308
+ print("Usage:")
309
+ print(" python data/scraper/mnrega_scraper.py --state Maharashtra")
310
+ print(" python data/scraper/mnrega_scraper.py --all-india")
311
+ print(" python data/scraper/mnrega_scraper.py --all-india --resume")
312
+ exit(0)
fix_optimizer.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ fix_optimizer.py
3
+ ----------------
4
+ Standalone script to re-run the two-stage proportional-LP optimizer.
5
+
6
+ Run this AFTER replacing src/optimize.py to regenerate
7
+ data/processed/optimized_budget_allocation.csv with realistic
8
+ continuous budget_change_pct values (instead of bang-bang -60%/+150%).
9
+
10
+ Usage:
11
+ cd SchemeImpactNet/
12
+ python fix_optimizer.py
13
+
14
+ Then reseed the database:
15
+ rm data/schemeimpactnet.db
16
+ ./start.sh
17
+ """
18
+
19
+ import sys, os
20
+ sys.path.insert(0, os.path.dirname(__file__))
21
+
22
+ from src.optimize import run_optimizer
23
+
24
+ if __name__ == "__main__":
25
+ print("=" * 60)
26
+ print("SchemeImpactNet β€” Optimizer Fix (v2 Proportional-LP)")
27
+ print("=" * 60)
28
+
29
+ result = run_optimizer(
30
+ predictions_path="data/processed/mnrega_predictions.csv",
31
+ raw_path="data/raw/mnrega_real_data_final_clean.csv",
32
+ scope_state=None, # All-India
33
+ target_year=2024,
34
+ )
35
+
36
+ print(f"\nβœ… Done. {len(result)} districts processed.")
37
+ print(f" budget_change_pct range: {result['budget_change_pct'].min():.1f}% to {result['budget_change_pct'].max():.1f}%")
38
+ print(f" Unique values: {result['budget_change_pct'].nunique()}")
39
+ print("\nNext steps:")
40
+ print(" rm data/schemeimpactnet.db")
41
+ print(" ./start.sh")
frontend/app.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # frontend/app.py β€” SchemeImpactNet entry point
2
+ # Run from project root: streamlit run frontend/app.py
3
+
4
+ import sys, os
5
+ sys.path.insert(0, os.path.dirname(__file__))
6
+
7
+ import streamlit as st
8
+
9
+ st.set_page_config(
10
+ page_title="SchemeImpactNet",
11
+ page_icon="πŸ›οΈ",
12
+ layout="wide",
13
+ initial_sidebar_state="expanded",
14
+ )
15
+
16
+ # ── Inject CSS first β€” before anything else ───────────────────────────────────
17
+ # Must happen before st.navigation() so sidebar styles are present when nav renders.
18
+ st.markdown("""
19
+ <style>
20
+ @import url('https://fonts.googleapis.com/css2?family=Fraunces:ital,opsz,wght@0,9..144,300;0,9..144,600;0,9..144,700;1,9..144,300&family=Source+Serif+4:ital,opsz,wght@0,8..60,300;0,8..60,400;0,8..60,600&family=DM+Mono:wght@400;500&display=swap');
21
+
22
+ /* ── Global ── */
23
+ html, body, [class*="css"] { font-family: 'Source Serif 4', Georgia, serif !important; }
24
+ .stApp { background-color: #FAF9F7 !important; }
25
+ #MainMenu, footer, header { visibility: hidden; }
26
+ .block-container { padding: 2rem 2.5rem 3rem !important; max-width: 1320px !important; }
27
+
28
+ /* ── Sidebar shell ── */
29
+ [data-testid="stSidebar"] {
30
+ background: #1C1917 !important;
31
+ border-right: none !important;
32
+ min-width: 220px !important;
33
+ }
34
+ [data-testid="stSidebarContent"] {
35
+ background: #1C1917 !important;
36
+ }
37
+ section[data-testid="stSidebar"] > div {
38
+ background: #1C1917 !important;
39
+ }
40
+
41
+ /* ── Sidebar text ── */
42
+ [data-testid="stSidebar"] p,
43
+ [data-testid="stSidebar"] span,
44
+ [data-testid="stSidebar"] label,
45
+ [data-testid="stSidebar"] div {
46
+ color: #A8A29E !important;
47
+ }
48
+
49
+ /* ── Nav links from st.navigation() ── */
50
+ [data-testid="stSidebarNavLink"] {
51
+ border-radius: 5px !important;
52
+ padding: 0.48rem 1rem !important;
53
+ font-family: 'DM Mono', monospace !important;
54
+ font-size: 0.7rem !important;
55
+ letter-spacing: 0.5px !important;
56
+ color: #A8A29E !important;
57
+ border-left: 2px solid transparent !important;
58
+ transition: all 0.15s ease !important;
59
+ }
60
+ [data-testid="stSidebarNavLink"]:hover {
61
+ background: rgba(251,146,60,0.1) !important;
62
+ color: #FB923C !important;
63
+ border-left-color: rgba(251,146,60,0.5) !important;
64
+ }
65
+ [data-testid="stSidebarNavLink"][aria-current="page"] {
66
+ background: rgba(251,146,60,0.15) !important;
67
+ color: #FB923C !important;
68
+ border-left-color: #FB923C !important;
69
+ }
70
+ [data-testid="stSidebarNavLink"] svg { display: none !important; }
71
+
72
+ /* ── Sidebar nav section label ── */
73
+ [data-testid="stSidebarNavSeparator"] {
74
+ border-color: rgba(255,255,255,0.07) !important;
75
+ }
76
+
77
+ /* ── Collapse button ── */
78
+ [data-testid="collapsedControl"] {
79
+ background: #1C1917 !important;
80
+ color: #A8A29E !important;
81
+ border-right: 1px solid #292524 !important;
82
+ }
83
+ button[kind="header"] { background: transparent !important; }
84
+
85
+ /* ── Main area typography ── */
86
+ h1, h2, h3 { font-family: 'Fraunces', serif !important; color: #1C1917 !important; }
87
+ h1 { font-size: 2.2rem !important; font-weight: 600 !important; line-height: 1.15 !important; }
88
+ h2 { font-size: 1.5rem !important; font-weight: 600 !important; }
89
+ h3 { font-size: 1.1rem !important; font-weight: 600 !important; }
90
+ p { font-family: 'Source Serif 4', serif !important; color: #292524 !important; }
91
+
92
+ /* ── Metric cards ── */
93
+ [data-testid="stMetric"] {
94
+ background: #FFFFFF !important; border: 1px solid #E7E5E4 !important;
95
+ border-radius: 8px !important; padding: 1rem 1.2rem !important;
96
+ }
97
+ [data-testid="stMetricLabel"] p {
98
+ font-family: 'DM Mono', monospace !important; font-size: 0.62rem !important;
99
+ letter-spacing: 2px !important; text-transform: uppercase !important; color: #78716C !important;
100
+ }
101
+ [data-testid="stMetricValue"] {
102
+ font-family: 'Fraunces', serif !important; font-size: 1.85rem !important;
103
+ font-weight: 600 !important; color: #1C1917 !important; line-height: 1.2 !important;
104
+ }
105
+ [data-testid="stMetricDelta"] { font-family: 'DM Mono', monospace !important; font-size: 0.7rem !important; }
106
+
107
+ /* ── Inputs ── */
108
+ [data-testid="stSelectbox"] label p,
109
+ [data-testid="stSlider"] label p,
110
+ [data-testid="stTextInput"] label p {
111
+ font-family: 'DM Mono', monospace !important; font-size: 0.65rem !important;
112
+ letter-spacing: 1.5px !important; text-transform: uppercase !important; color: #78716C !important;
113
+ }
114
+
115
+ /* ── Buttons ── */
116
+ .stButton > button {
117
+ font-family: 'DM Mono', monospace !important; font-size: 0.7rem !important;
118
+ letter-spacing: 1px !important; text-transform: uppercase !important;
119
+ background: #1C1917 !important; color: #FAF9F7 !important;
120
+ border: none !important; border-radius: 6px !important; padding: 0.5rem 1.2rem !important;
121
+ }
122
+ .stButton > button:hover { background: #FB923C !important; }
123
+
124
+ /* ── Dataframes ── */
125
+ [data-testid="stDataFrame"] {
126
+ border: 1px solid #E7E5E4 !important; border-radius: 8px !important; overflow: hidden !important;
127
+ }
128
+
129
+ /* ── Expander ── */
130
+ [data-testid="stExpander"] {
131
+ border: 1px solid #E7E5E4 !important; border-radius: 8px !important; background: #FFFFFF !important;
132
+ }
133
+
134
+ /* ── Caption ── */
135
+ [data-testid="stCaptionContainer"] p {
136
+ font-family: 'DM Mono', monospace !important; font-size: 0.63rem !important;
137
+ color: #A8A29E !important; letter-spacing: 0.3px !important;
138
+ }
139
+
140
+ /* ── Divider ── */
141
+ hr { border: none !important; border-top: 1px solid #E7E5E4 !important; margin: 1.5rem 0 !important; }
142
+
143
+ /* ── Tabs ── */
144
+ [data-testid="stTabs"] [role="tab"] {
145
+ font-family: 'DM Mono', monospace !important; font-size: 0.68rem !important;
146
+ letter-spacing: 1px !important; text-transform: uppercase !important;
147
+ }
148
+ </style>
149
+ """, unsafe_allow_html=True)
150
+
151
+ # ── Sidebar brand β€” inject before navigation ──────────────────────────────────
152
+ with st.sidebar:
153
+ st.markdown("""
154
+ <div style="padding:1.4rem 0.75rem 1.2rem 0.75rem;
155
+ border-bottom:1px solid rgba(255,255,255,0.07);
156
+ margin-bottom:0.75rem;">
157
+ <p style="font-family:'DM Mono',monospace; font-size:0.52rem; letter-spacing:4px;
158
+ text-transform:uppercase; color:#FB923C; margin:0 0 8px 0; line-height:1;">
159
+ Policy Analytics
160
+ </p>
161
+ <p style="font-family:'Fraunces',serif; font-size:1.35rem; font-weight:600;
162
+ color:#FAF9F7; line-height:1.1; margin:0;">
163
+ Scheme<br>Impact<em style="color:#FB923C;">Net</em>
164
+ </p>
165
+ <p style="font-family:'DM Mono',monospace; font-size:0.55rem; color:#57534E;
166
+ margin:10px 0 0 0; letter-spacing:0.4px; line-height:1.65;">
167
+ MNREGA Β· XGBoost Β· SciPy LP<br>
168
+ 7,758 district-years Β· 2014–2024
169
+ </p>
170
+ </div>
171
+ """, unsafe_allow_html=True)
172
+
173
+ # ── Page registry ─────────────────────────────────────────────────────────────
174
+ pages = [
175
+ st.Page("pages/home.py", title="Home", icon="πŸ›οΈ", default=True),
176
+ st.Page("pages/overview.py", title="Overview", icon="πŸ“Š"),
177
+ st.Page("pages/districts.py", title="District Explorer", icon="πŸ”"),
178
+ st.Page("pages/predictions.py", title="Predictions", icon="πŸ€–"),
179
+ st.Page("pages/optimizer.py", title="Budget Optimizer", icon="βš–οΈ"),
180
+ st.Page("pages/spatial.py", title="Spatial Map", icon="πŸ—ΊοΈ"),
181
+ st.Page("pages/insights.py", title="Strategic Insights", icon="🧠"),
182
+ ]
183
+
184
+ pg = st.navigation(pages, position="sidebar")
185
+ pg.run()
frontend/pages/districts.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pages/districts.py β€” District deep-dive explorer.
2
+
3
+ import sys, os
4
+ sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
5
+
6
+ import streamlit as st
7
+ import plotly.graph_objects as go
8
+
9
+ from theme import inject_theme, page_header, section_label, PLOTLY_LAYOUT, SAFFRON, GREEN, RED
10
+ from utils.api_client import fetch_states, fetch_districts, fetch_district_history
11
+
12
+ inject_theme()
13
+ page_header(
14
+ "β—ˆ Module 02",
15
+ "District Explorer",
16
+ "Full historical MNREGA performance deep-dive for any district",
17
+ )
18
+
19
+ # ── Selectors ─────────────────────────────────────────────────────────────────
20
+ states = fetch_states()
21
+ if not states:
22
+ st.error("⚠️ API offline β€” run `uvicorn backend.main:app --port 8000`")
23
+ st.stop()
24
+
25
+ col1, col2 = st.columns(2)
26
+ with col1:
27
+ state = st.selectbox("State", states)
28
+ with col2:
29
+ districts = fetch_districts(state)
30
+ if not districts:
31
+ st.warning("No districts found for this state.")
32
+ st.stop()
33
+ district = st.selectbox("District", districts)
34
+
35
+ # ── Fetch district history ────────────────────────────────────────────────────
36
+ df = fetch_district_history(state, district)
37
+
38
+ if df.empty:
39
+ st.warning("No historical data for this district.")
40
+ st.stop()
41
+
42
+ df = df.sort_values("financial_year").reset_index(drop=True)
43
+
44
+ # ── District headline ─────────────────────────────────────────────────────────
45
+ latest = df.iloc[-1]
46
+ prev = df.iloc[-2] if len(df) > 1 else latest
47
+
48
+ st.markdown(f"""
49
+ <div style="margin:0.5rem 0 1.5rem;">
50
+ <p style="font-family:'Fraunces',serif; font-size:1.65rem; font-weight:600;
51
+ color:#1C1917; margin:0;">
52
+ {district}
53
+ <span style="font-size:1rem; font-weight:300; color:#78716C;">Β· {state}</span>
54
+ </p>
55
+ </div>
56
+ """, unsafe_allow_html=True)
57
+
58
+ pd_delta = latest['person_days_lakhs'] - prev['person_days_lakhs']
59
+ wage_delta = latest['avg_wage_rate'] - prev['avg_wage_rate']
60
+
61
+ c1, c2, c3 = st.columns(3)
62
+ c1.metric(
63
+ "Person-Days (latest yr)",
64
+ f"{latest['person_days_lakhs']:.2f}L",
65
+ delta=f"{pd_delta:+.2f}L",
66
+ )
67
+ c2.metric(
68
+ "Avg Wage Rate",
69
+ f"β‚Ή{latest['avg_wage_rate']:.0f}/day",
70
+ delta=f"β‚Ή{wage_delta:+.0f}",
71
+ )
72
+ c3.metric(
73
+ "Years on Record",
74
+ f"{len(df)}",
75
+ )
76
+
77
+ st.markdown("---")
78
+
79
+ # ── Person-Days Trend ─────────────────────────────────────────────────────────
80
+ section_label("Person-Days Trend")
81
+ fig1 = go.Figure()
82
+ fig1.add_scatter(
83
+ x=df["financial_year"], y=df["person_days_lakhs"],
84
+ mode="lines+markers",
85
+ fill="tozeroy",
86
+ fillcolor="rgba(251,146,60,0.07)",
87
+ line=dict(color=SAFFRON, width=2.5),
88
+ marker=dict(size=6, color=SAFFRON, line=dict(width=1.5, color="#FFFFFF")),
89
+ name="Person-Days",
90
+ hovertemplate="FY%{x}<br>PD: <b>%{y:.2f}L</b><extra></extra>",
91
+ )
92
+ if 2020 in df["financial_year"].values:
93
+ fig1.add_vline(
94
+ x=2020, line_dash="dot", line_color=RED, line_width=1.5,
95
+ annotation_text="COVID",
96
+ annotation_font=dict(color=RED, size=9, family="DM Mono, monospace"),
97
+ )
98
+ if 2022 in df["financial_year"].values:
99
+ fig1.add_vline(
100
+ x=2022, line_dash="dot", line_color="#A8A29E", line_width=1,
101
+ annotation_text="2022 anomaly",
102
+ annotation_font=dict(color="#A8A29E", size=9, family="DM Mono, monospace"),
103
+ )
104
+ l1 = {**PLOTLY_LAYOUT}
105
+ l1.update(dict(
106
+ height=320,
107
+ yaxis=dict(**PLOTLY_LAYOUT["yaxis"], title="Lakh Person-Days"),
108
+ xaxis=dict(**PLOTLY_LAYOUT["xaxis"], title="Financial Year", dtick=1),
109
+ ))
110
+ fig1.update_layout(**l1)
111
+ st.plotly_chart(fig1, use_container_width=True, config={"displayModeBar": False})
112
+
113
+ # ── YoY Change ────────────────────────────────────────────────────────────────
114
+ section_label("Year-on-Year Change")
115
+ df["yoy"] = df["person_days_lakhs"].pct_change() * 100
116
+
117
+ fig2 = go.Figure()
118
+ fig2.add_bar(
119
+ x=df["financial_year"],
120
+ y=df["yoy"],
121
+ marker=dict(
122
+ color=[GREEN if v >= 0 else RED for v in df["yoy"].fillna(0)],
123
+ opacity=0.8,
124
+ ),
125
+ hovertemplate="FY%{x}<br>YoY: <b>%{y:+.1f}%</b><extra></extra>",
126
+ )
127
+ fig2.add_hline(y=0, line_dash="solid", line_color="#1C1917", line_width=1)
128
+ l2 = {**PLOTLY_LAYOUT}
129
+ l2.update(dict(
130
+ height=220,
131
+ bargap=0.3,
132
+ yaxis=dict(**PLOTLY_LAYOUT["yaxis"], title="% Change"),
133
+ xaxis=dict(**PLOTLY_LAYOUT["xaxis"], title="Financial Year", dtick=1),
134
+ ))
135
+ fig2.update_layout(**l2)
136
+ st.plotly_chart(fig2, use_container_width=True, config={"displayModeBar": False})
137
+
138
+ # ── Wage Rate Trend ───────────────────────────────────────────────────────────
139
+ section_label("Wage Rate History")
140
+ fig3 = go.Figure()
141
+ fig3.add_scatter(
142
+ x=df["financial_year"], y=df["avg_wage_rate"],
143
+ mode="lines+markers",
144
+ fill="tozeroy",
145
+ fillcolor="rgba(22,163,74,0.06)",
146
+ line=dict(color=GREEN, width=2),
147
+ marker=dict(size=6, color=GREEN),
148
+ hovertemplate="FY%{x}<br>β‚Ή%{y:.0f}/day<extra></extra>",
149
+ )
150
+ l3 = {**PLOTLY_LAYOUT}
151
+ l3.update(dict(
152
+ height=220,
153
+ yaxis=dict(**PLOTLY_LAYOUT["yaxis"], title="β‚Ή/day"),
154
+ xaxis=dict(**PLOTLY_LAYOUT["xaxis"], title="Financial Year", dtick=1),
155
+ ))
156
+ fig3.update_layout(**l3)
157
+ st.plotly_chart(fig3, use_container_width=True, config={"displayModeBar": False})
158
+
159
+ # ── Raw data ──────────────────────────────────────────────────────────────────
160
+ with st.expander("πŸ“‹ Raw Data Table"):
161
+ display_cols = [c for c in [
162
+ "financial_year", "person_days_lakhs", "avg_wage_rate",
163
+ ] if c in df.columns]
164
+ st.dataframe(df[display_cols].round(3), use_container_width=True, hide_index=True)
frontend/pages/home.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pages/home.py β€” Landing dashboard.
2
+
3
+ import sys, os
4
+ sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
5
+
6
+ import streamlit as st
7
+ import numpy as np
8
+ import plotly.graph_objects as go
9
+
10
+ from theme import (
11
+ inject_theme, page_header, section_label, kpi_html,
12
+ signal_card_html, PLOTLY_LAYOUT, SAFFRON, SAFFRON_SCALE, GREEN, RED, AMBER,
13
+ )
14
+ from utils.api_client import (
15
+ is_online, fetch_stats, fetch_predictions, fetch_optimizer_results,
16
+ )
17
+
18
+ inject_theme()
19
+
20
+ # ── Status pill ───────────────────────────────────────────────────────────────
21
+ online = is_online()
22
+ pill_color = "#16A34A" if online else "#DC2626"
23
+ pill_text = "API LIVE" if online else "API OFFLINE β€” run `uvicorn backend.main:app --port 8000`"
24
+ st.markdown(
25
+ f'<div style="display:flex;align-items:center;gap:8px;margin-bottom:1.4rem;">'
26
+ f'<span style="width:7px;height:7px;border-radius:50%;background:{pill_color};display:inline-block;"></span>'
27
+ f'<span style="font-family:DM Mono,monospace;font-size:0.62rem;letter-spacing:2px;'
28
+ f'text-transform:uppercase;color:{pill_color};">{pill_text}</span></div>',
29
+ unsafe_allow_html=True,
30
+ )
31
+
32
+ page_header(
33
+ "β—ˆ MNREGA Β· India Β· 2014–2024",
34
+ "SchemeImpactNet",
35
+ "Predictive impact analysis and budget optimisation for India's rural employment scheme",
36
+ )
37
+
38
+ # ── Data fetch ────────────────────────────────────────────────────────────────
39
+ stats = fetch_stats()
40
+ pred_df = fetch_predictions()
41
+ opt_df = fetch_optimizer_results()
42
+
43
+ # Derived KPIs
44
+ n_dist = stats.get("total_districts", "β€”")
45
+ n_states = stats.get("total_states", "β€”")
46
+ yr_range = stats.get("year_range", "β€”")
47
+ total_pd = stats.get("total_persondays_lakhs", 0)
48
+ covid_pct = stats.get("covid_spike_pct", 0)
49
+
50
+ nat_gain = gain_pct = 0.0
51
+ if not opt_df.empty and "persondays_gain" in opt_df.columns:
52
+ nat_gain = opt_df["persondays_gain"].sum()
53
+ sq_sum = opt_df["sq_persondays"].sum() if "sq_persondays" in opt_df.columns else 1
54
+ gain_pct = nat_gain / sq_sum * 100 if sq_sum else 0
55
+
56
+ # ── KPI strip ─────────────────────────────────────────────────────────────────
57
+ c1, c2, c3, c4, c5 = st.columns(5, gap="small")
58
+ cards = [
59
+ (str(n_dist), "Districts", SAFFRON, ""),
60
+ (str(n_states), "States / UTs", "#1C1917", ""),
61
+ (f"{total_pd:,.0f}L", "Person-Days", "#1C1917", "historical total"),
62
+ (f"{covid_pct:+.1f}%", "COVID-20 Spike", RED, "2020 peak"),
63
+ (f"{gain_pct:+.2f}%", "LP Opt. Gain", GREEN, "budget-neutral"),
64
+ ]
65
+ for col, (val, label, color, note) in zip([c1, c2, c3, c4, c5], cards):
66
+ with col:
67
+ st.markdown(kpi_html(val, label, color, note), unsafe_allow_html=True)
68
+
69
+ st.markdown("<div style='margin-top:2rem'></div>", unsafe_allow_html=True)
70
+
71
+ # ── Two-column layout ─────────────────────────────────────────────────────────
72
+ left, right = st.columns([3, 2], gap="large")
73
+
74
+ # ── LEFT: state bubble map ────────────────────────────────────────────────────
75
+ STATE_COORDS = {
76
+ "Andhra Pradesh": (15.9, 79.7), "Arunachal Pradesh": (28.2, 94.7),
77
+ "Assam": (26.2, 92.9), "Bihar": (25.1, 85.3),
78
+ "Chhattisgarh": (21.3, 81.7), "Goa": (15.3, 74.0),
79
+ "Gujarat": (22.3, 71.2), "Haryana": (29.1, 76.1),
80
+ "Himachal Pradesh": (31.1, 77.2), "Jharkhand": (23.6, 85.3),
81
+ "Karnataka": (15.3, 75.7), "Kerala": (10.9, 76.3),
82
+ "Madhya Pradesh": (22.9, 78.7), "Maharashtra": (19.7, 75.7),
83
+ "Manipur": (24.7, 93.9), "Meghalaya": (25.5, 91.4),
84
+ "Mizoram": (23.2, 92.7), "Nagaland": (26.2, 94.6),
85
+ "Odisha": (20.9, 85.1), "Punjab": (31.1, 75.3),
86
+ "Rajasthan": (27.0, 74.2), "Sikkim": (27.5, 88.5),
87
+ "Tamil Nadu": (11.1, 78.7), "Telangana": (17.4, 79.1),
88
+ "Tripura": (23.9, 91.5), "Uttar Pradesh": (26.8, 80.9),
89
+ "Uttarakhand": (30.1, 79.3), "West Bengal": (22.9, 87.9),
90
+ "Jammu and Kashmir": (33.7, 76.9), "Ladakh": (34.2, 77.6),
91
+ "Delhi": (28.7, 77.1), "Puducherry": (11.9, 79.8),
92
+ }
93
+
94
+ with left:
95
+ section_label("State-Level Employment Β· Latest Year")
96
+
97
+ if not pred_df.empty and "financial_year" in pred_df.columns:
98
+ ly = pred_df["financial_year"].max()
99
+ agg = (
100
+ pred_df[pred_df["financial_year"] == ly]
101
+ .groupby("state", as_index=False)
102
+ .agg(
103
+ pd_sum =("person_days_lakhs", "sum"),
104
+ pred_sum =("predicted_persondays", "sum"),
105
+ n_dist =("district", "count"),
106
+ avg_err =("prediction_error", "mean"),
107
+ )
108
+ )
109
+
110
+ rng = np.random.default_rng(42)
111
+ lats, lons, szs = [], [], []
112
+ for _, r in agg.iterrows():
113
+ lat, lon = STATE_COORDS.get(r["state"], (22.0, 78.0))
114
+ lats.append(lat + rng.uniform(-0.12, 0.12))
115
+ lons.append(lon + rng.uniform(-0.12, 0.12))
116
+ szs.append(float(r["pd_sum"]))
117
+
118
+ mn, mx = min(szs), max(szs)
119
+ bsz = [float(np.clip((v - mn) / (mx - mn + 1e-9) * 14 + 5, 5, 19)) for v in szs]
120
+
121
+ fig = go.Figure()
122
+ fig.add_scattergeo(
123
+ lat=lats, lon=lons, mode="markers",
124
+ marker=dict(
125
+ size=bsz, color=szs,
126
+ colorscale=SAFFRON_SCALE,
127
+ colorbar=dict(
128
+ title=dict(text="Lakh PD", font=dict(color="#78716C", size=9)),
129
+ tickfont=dict(color="#78716C", size=8),
130
+ thickness=8, len=0.45,
131
+ bgcolor="rgba(255,255,255,0.85)",
132
+ ),
133
+ opacity=0.88,
134
+ line=dict(width=1, color="#FFFFFF"),
135
+ ),
136
+ text=agg["state"],
137
+ customdata=list(zip(
138
+ agg["pd_sum"].round(1),
139
+ agg["pred_sum"].round(1),
140
+ agg["n_dist"],
141
+ agg["avg_err"].round(2),
142
+ )),
143
+ hovertemplate=(
144
+ "<b>%{text}</b><br>"
145
+ "Actual PD: <b>%{customdata[0]}L</b><br>"
146
+ "Predicted: <b>%{customdata[1]}L</b><br>"
147
+ "Districts: %{customdata[2]}<br>"
148
+ "Avg Model Error: %{customdata[3]}L"
149
+ "<extra></extra>"
150
+ ),
151
+ )
152
+ fig.update_geos(
153
+ scope="asia", showland=True, landcolor="#F5F5F4",
154
+ showocean=True, oceancolor="#EFF6FF",
155
+ showcountries=True, countrycolor="#D6D3D1",
156
+ showsubunits=True, subunitcolor="#E7E5E4",
157
+ center=dict(lat=22, lon=80), projection_scale=5.2,
158
+ bgcolor="rgba(0,0,0,0)",
159
+ )
160
+ fig.update_layout(
161
+ height=420, paper_bgcolor="rgba(0,0,0,0)",
162
+ margin=dict(l=0, r=0, t=0, b=0),
163
+ font=dict(family="DM Mono, monospace", color="#1C1917"),
164
+ showlegend=False,
165
+ )
166
+ st.plotly_chart(fig, use_container_width=True, config={"displayModeBar": False})
167
+ st.caption(f"FY {ly} · bubble size ∝ employment volume · hover for model predictions")
168
+ else:
169
+ st.info("Start the backend to load state-level data.")
170
+
171
+ # ── RIGHT: brief + signals ────────────────────────────────────────────────────
172
+ with right:
173
+ section_label("Intelligence Brief")
174
+
175
+ n_declining = n_underfunded = 0
176
+ top_state = "β€”"
177
+ if not pred_df.empty:
178
+ ly = pred_df["financial_year"].max()
179
+ lat = pred_df[pred_df["financial_year"] == ly]
180
+ prv = pred_df[pred_df["financial_year"] == ly - 1]
181
+ if not prv.empty:
182
+ mg = lat.merge(
183
+ prv[["state", "district", "person_days_lakhs"]].rename(
184
+ columns={"person_days_lakhs": "prev"}
185
+ ),
186
+ on=["state", "district"], how="left",
187
+ )
188
+ n_declining = int((mg["predicted_persondays"] < mg["prev"]).sum())
189
+
190
+ if not opt_df.empty and "budget_allocated_lakhs" in opt_df.columns:
191
+ th = opt_df["budget_allocated_lakhs"].quantile(0.33)
192
+ n_underfunded = int((opt_df["budget_allocated_lakhs"] < th).sum())
193
+ if not opt_df.empty and "persondays_gain" in opt_df.columns:
194
+ top_state = opt_df.groupby("state")["persondays_gain"].sum().idxmax()
195
+
196
+ gain_str = f"{nat_gain:+,.1f}L" if nat_gain else "β€”"
197
+ ly_label = pred_df["financial_year"].max() if not pred_df.empty else "β€”"
198
+
199
+ st.markdown(f"""
200
+ <div style="background:#FFF7ED; border:1px solid #FED7AA; border-left:3px solid #FB923C;
201
+ border-radius:8px; padding:1.2rem 1.4rem; margin-bottom:1rem;">
202
+ <p style="font-family:'DM Mono',monospace; font-size:0.56rem; letter-spacing:2.5px;
203
+ text-transform:uppercase; color:#FB923C; margin:0 0 9px 0;">
204
+ β—ˆ Auto-generated Β· Pipeline FY {ly_label}</p>
205
+ <p style="font-family:'Source Serif 4',serif; font-size:0.88rem; color:#431407;
206
+ line-height:1.75; margin:0;">
207
+ Budget-neutral LP reallocation yields a projected
208
+ <strong>{gain_str}</strong> of additional employment β€”
209
+ a <strong>{gain_pct:+.2f}%</strong> uplift at zero additional outlay.
210
+ <strong>{n_declining} districts</strong> face declining employment trajectories.
211
+ Highest reallocation opportunity: <strong>{top_state}</strong>.
212
+ <strong>{n_underfunded} districts</strong> in the bottom budget tercile show
213
+ above-average delivery efficiency.
214
+ </p>
215
+ </div>
216
+ """, unsafe_allow_html=True)
217
+
218
+ section_label("Live Signals")
219
+ signals = [
220
+ (str(n_declining), "High-Risk Districts", "Predicted employment decline", RED),
221
+ (str(n_underfunded), "Underfunded Β· High Eff.", "Bottom-tercile budget", AMBER),
222
+ (gain_str, "LP Reallocation Gain", f"Budget-neutral Β· {gain_pct:+.2f}%", GREEN),
223
+ (str(n_dist), "Districts in Model", "GBR Β· Walk-fwd CV RΒ²β‰ˆ0.91", SAFFRON),
224
+ ]
225
+ for val, title, body, accent in signals:
226
+ st.markdown(signal_card_html(val, title, body, accent), unsafe_allow_html=True)
frontend/pages/insights.py ADDED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pages/insights.py β€” Strategic Insights & Policy Brief.
2
+
3
+
4
+ import sys, os
5
+ sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
6
+
7
+ import streamlit as st
8
+ import plotly.graph_objects as go
9
+ import plotly.express as px
10
+ import pandas as pd
11
+ import numpy as np
12
+
13
+ from theme import inject_theme, page_header, section_label, kpi_html, signal_card_html, PLOTLY_LAYOUT, SAFFRON, GREEN, RED, AMBER, BLUE
14
+ from utils.api_client import fetch_states, fetch_predictions, fetch_optimizer_results, fetch_yearly_trend
15
+
16
+ inject_theme()
17
+ page_header(
18
+ "β—ˆ Module 06",
19
+ "Strategic Insights",
20
+ "Auto-generated policy intelligence β€” high-risk districts, efficiency leaders, and reallocation priorities",
21
+ )
22
+
23
+ states = fetch_states()
24
+ if not states:
25
+ st.error("⚠️ API offline β€” run `uvicorn backend.main:app --port 8000`")
26
+ st.stop()
27
+
28
+ cs, _ = st.columns([1, 2])
29
+ with cs:
30
+ scope = st.selectbox("State Scope", ["All India"] + states)
31
+ state_param = None if scope == "All India" else scope
32
+
33
+ pred_df = fetch_predictions(state=state_param)
34
+ opt_df = fetch_optimizer_results(state=state_param)
35
+ trend = fetch_yearly_trend(state_param)
36
+
37
+ if pred_df.empty:
38
+ st.info("No data β€” run the pipeline first.")
39
+ st.stop()
40
+
41
+ st.markdown("---")
42
+
43
+ # ── Section A: Declining districts ───────────────────────────────────────────
44
+ section_label("A. High-Risk Districts β€” Declining Employment Trajectory")
45
+
46
+ ly = pred_df["financial_year"].max()
47
+ prv = ly - 1
48
+
49
+ lat = pred_df[pred_df["financial_year"] == ly].copy()
50
+ prv_df = pred_df[pred_df["financial_year"] == prv].copy()
51
+
52
+ if not prv_df.empty:
53
+ mg = lat.merge(
54
+ prv_df[["state", "district", "person_days_lakhs"]].rename(
55
+ columns={"person_days_lakhs": "prev_actual"}
56
+ ),
57
+ on=["state", "district"], how="inner",
58
+ )
59
+ mg["predicted_chg"] = mg["predicted_persondays"] - mg["prev_actual"]
60
+ mg["predicted_chg_pct"]= (mg["predicted_chg"] / mg["prev_actual"] * 100).round(2)
61
+
62
+ declining = mg[mg["predicted_chg"] < 0].copy().nsmallest(20, "predicted_chg")
63
+ declining["label"] = declining["district"] + " Β· " + declining["state"]
64
+
65
+ if not declining.empty:
66
+ col_risk, col_info = st.columns([2, 1])
67
+ with col_risk:
68
+ fig1 = go.Figure()
69
+ fig1.add_bar(
70
+ x=declining["predicted_chg"],
71
+ y=declining["label"],
72
+ orientation="h",
73
+ marker=dict(
74
+ color=declining["predicted_chg_pct"],
75
+ colorscale=[[0, "#7F1D1D"], [1, "#FCA5A5"]],
76
+ showscale=False,
77
+ opacity=0.85,
78
+ ),
79
+ customdata=list(zip(
80
+ declining["state"], declining["district"],
81
+ declining["prev_actual"].round(2),
82
+ declining["predicted_persondays"].round(2),
83
+ declining["predicted_chg"].round(2),
84
+ declining["predicted_chg_pct"],
85
+ )),
86
+ hovertemplate=(
87
+ "<b>%{customdata[1]}</b> Β· %{customdata[0]}<br>"
88
+ "Actual: %{customdata[2]}L<br>"
89
+ "Predicted: %{customdata[3]}L<br>"
90
+ "Change: <b>%{customdata[4]:+.2f}L</b> (%{customdata[5]:+.1f}%)"
91
+ "<extra></extra>"
92
+ ),
93
+ )
94
+ l1 = {**PLOTLY_LAYOUT}
95
+ l1.update(dict(
96
+ height=max(380, len(declining) * 26),
97
+ title=dict(text=f"Districts with Declining Predicted Employment Β· FY{prv}β†’{ly}",
98
+ font=dict(family="Fraunces, serif", size=13, color="#1C1917")),
99
+ xaxis=dict(**PLOTLY_LAYOUT["xaxis"], title="Predicted Change (Lakh PD)"),
100
+ yaxis=dict(**PLOTLY_LAYOUT["yaxis"]),
101
+ bargap=0.28, showlegend=False,
102
+ ))
103
+ fig1.update_layout(**l1)
104
+ st.plotly_chart(fig1, width="stretch", config={"displayModeBar": False})
105
+
106
+ with col_info:
107
+ st.markdown(f"""
108
+ <div style="background:#FEF2F2; border:1px solid #FECACA; border-left:3px solid #DC2626;
109
+ border-radius:8px; padding:1.1rem 1.2rem; margin-bottom:0.8rem;">
110
+ <p style="font-family:'DM Mono',monospace; font-size:0.56rem; letter-spacing:2px;
111
+ text-transform:uppercase; color:#DC2626; margin:0 0 8px 0;">Risk Alert</p>
112
+ <p style="font-family:'Fraunces',serif; font-size:1.6rem; font-weight:600;
113
+ color:#7F1D1D; margin:0 0 4px 0;">{len(declining)}</p>
114
+ <p style="font-family:'Source Serif 4',serif; font-size:0.82rem; color:#991B1B;
115
+ margin:0; line-height:1.5;">
116
+ Districts predicted to see employment decline next cycle.
117
+ Avg change: <strong>{declining['predicted_chg'].mean():+.2f}L</strong> person-days.
118
+ </p>
119
+ </div>
120
+
121
+ <div style="background:#FFFFFF; border:1px solid #E7E5E4;
122
+ border-radius:8px; padding:1rem 1.1rem;">
123
+ <p style="font-family:'DM Mono',monospace; font-size:0.56rem; letter-spacing:2px;
124
+ text-transform:uppercase; color:#A8A29E; margin:0 0 8px 0;">Worst Decline</p>
125
+ <p style="font-family:'Fraunces',serif; font-size:1.1rem; font-weight:600;
126
+ color:#1C1917; margin:0 0 2px 0;">{declining.iloc[0]['district']}</p>
127
+ <p style="font-family:'DM Mono',monospace; font-size:0.62rem; color:#78716C; margin:0;">
128
+ {declining.iloc[0]['state']} Β· {declining.iloc[0]['predicted_chg']:+.2f}L
129
+ </p>
130
+ </div>
131
+ """, unsafe_allow_html=True)
132
+ else:
133
+ st.success("βœ… No districts show predicted employment decline.")
134
+ else:
135
+ st.info("Previous year data unavailable for trend comparison.")
136
+
137
+ st.markdown("---")
138
+
139
+ # ── Section B: Efficiency leaders & laggards ──────────────────────────────────
140
+ section_label("B. Cost Efficiency β€” Leaders & Laggards")
141
+
142
+ eff_df = (
143
+ pred_df.groupby(["state", "district"], as_index=False)
144
+ .agg(
145
+ avg_actual =("person_days_lakhs", "mean"),
146
+ avg_predicted =("predicted_persondays", "mean"),
147
+ avg_error =("prediction_error", "mean"),
148
+ )
149
+ )
150
+
151
+ if not opt_df.empty and "persondays_per_lakh" in opt_df.columns:
152
+ eff_sub = opt_df[["state", "district", "persondays_per_lakh"]].drop_duplicates(["state", "district"])
153
+ eff_df = eff_df.merge(eff_sub, on=["state", "district"], how="left")
154
+
155
+ top_eff = eff_df.nlargest(12, "persondays_per_lakh")
156
+ bot_eff = eff_df.nsmallest(12, "persondays_per_lakh")
157
+
158
+ col_e1, col_e2 = st.columns(2)
159
+ for col_e, sub, title_str, c in [
160
+ (col_e1, top_eff, "Top 12 Most Efficient", GREEN),
161
+ (col_e2, bot_eff, "Bottom 12 Least Efficient", RED),
162
+ ]:
163
+ with col_e:
164
+ sub = sub.copy()
165
+ sub["label"] = sub["district"] + " Β· " + sub["state"]
166
+ fig_e = go.Figure()
167
+ fig_e.add_bar(
168
+ x=sub["persondays_per_lakh"],
169
+ y=sub["label"],
170
+ orientation="h",
171
+ marker=dict(color=c, opacity=0.78),
172
+ hovertemplate="<b>%{y}</b><br>%{x:.4f} PD/β‚ΉL<extra></extra>",
173
+ )
174
+ l_e = {**PLOTLY_LAYOUT}
175
+ l_e.update(dict(
176
+ height=340,
177
+ title=dict(text=title_str, font=dict(family="Fraunces, serif", size=13, color="#1C1917")),
178
+ xaxis=dict(**PLOTLY_LAYOUT["xaxis"], title="PD per β‚Ή Lakh"),
179
+ yaxis=dict(**PLOTLY_LAYOUT["yaxis"], autorange="reversed"),
180
+ bargap=0.25, showlegend=False,
181
+ ))
182
+ fig_e.update_layout(**l_e)
183
+ st.plotly_chart(fig_e, width="stretch", config={"displayModeBar": False})
184
+ else:
185
+ st.info("Run optimizer pipeline to see efficiency rankings.")
186
+
187
+ st.markdown("---")
188
+
189
+ # ── Section C: State-level LP opportunities ───────────────────────────────────
190
+ section_label("C. LP Reallocation Opportunities by State")
191
+
192
+ if not opt_df.empty and "persondays_gain" in opt_df.columns:
193
+ state_gain = (
194
+ opt_df.groupby("state", as_index=False)
195
+ .agg(
196
+ total_gain =("persondays_gain", "sum"),
197
+ n_districts=("district", "count"),
198
+ avg_eff =("persondays_per_lakh", "mean"),
199
+ total_bud =("budget_allocated_lakhs", "sum"),
200
+ )
201
+ .sort_values("total_gain", ascending=False)
202
+ )
203
+ state_gain["gain_per_dist"] = (state_gain["total_gain"] / state_gain["n_districts"]).round(3)
204
+
205
+ fig_s = go.Figure()
206
+ fig_s.add_bar(
207
+ x=state_gain["state"],
208
+ y=state_gain["total_gain"],
209
+ marker=dict(
210
+ color=state_gain["total_gain"],
211
+ colorscale=[[0, "#FEF3C7"], [0.5, "#FB923C"], [1, "#7C2D12"]],
212
+ showscale=False,
213
+ opacity=0.85,
214
+ ),
215
+ customdata=list(zip(
216
+ state_gain["state"],
217
+ state_gain["total_gain"].round(2),
218
+ state_gain["n_districts"],
219
+ state_gain["avg_eff"].round(4),
220
+ state_gain["total_bud"].round(0),
221
+ )),
222
+ hovertemplate=(
223
+ "<b>%{customdata[0]}</b><br>"
224
+ "Total PD Gain: <b>%{customdata[1]:+.2f}L</b><br>"
225
+ "Districts: %{customdata[2]}<br>"
226
+ "Avg Efficiency: %{customdata[3]} PD/β‚ΉL<br>"
227
+ "Total Budget: β‚Ή%{customdata[4]:,.0f}L"
228
+ "<extra></extra>"
229
+ ),
230
+ )
231
+ l_s = {**PLOTLY_LAYOUT}
232
+ l_s.update(dict(
233
+ height=360,
234
+ title=dict(text="Total LP Person-Day Gain by State",
235
+ font=dict(family="Fraunces, serif", size=14, color="#1C1917")),
236
+ xaxis=dict(**PLOTLY_LAYOUT["xaxis"], title="State", tickangle=-35),
237
+ yaxis=dict(**PLOTLY_LAYOUT["yaxis"], title="Total PD Gain (Lakh)"),
238
+ bargap=0.3,
239
+ ))
240
+ fig_s.update_layout(**l_s)
241
+ st.plotly_chart(fig_s, width="stretch", config={"displayModeBar": False})
242
+
243
+ with st.expander("πŸ“‹ State-Level Summary Table"):
244
+ st.dataframe(state_gain.round(3), width="stretch", hide_index=True)
245
+ else:
246
+ st.info("No optimizer data β€” run `python main.py --stage 3`.")
247
+
248
+ st.markdown("---")
249
+
250
+ # ── Section D: National trend analysis ───────────────────────────────────────
251
+ section_label("D. National Employment Trend & COVID Impact")
252
+
253
+ if not trend.empty:
254
+ fig_t = go.Figure()
255
+ fig_t.add_scatter(
256
+ x=trend["financial_year"], y=trend["total_persondays"],
257
+ name="Total PD (Lakh)", mode="lines+markers",
258
+ fill="tozeroy", fillcolor="rgba(251,146,60,0.07)",
259
+ line=dict(color=SAFFRON, width=2.5),
260
+ marker=dict(size=7, color=SAFFRON),
261
+ )
262
+ if 2020 in trend["financial_year"].values:
263
+ fig_t.add_vline(
264
+ x=2020, line_dash="dot", line_color=RED, line_width=1.5,
265
+ annotation_text="COVID surge",
266
+ annotation_font=dict(color=RED, size=9, family="DM Mono, monospace"),
267
+ )
268
+ l_t = {**PLOTLY_LAYOUT}
269
+ l_t.update(dict(
270
+ height=260,
271
+ title=dict(text="National Person-Days Trend",
272
+ font=dict(family="Fraunces, serif", size=13, color="#1C1917")),
273
+ xaxis=dict(**PLOTLY_LAYOUT["xaxis"], title="Financial Year", dtick=1),
274
+ yaxis=dict(**PLOTLY_LAYOUT["yaxis"], title="Lakh PD"),
275
+ showlegend=False,
276
+ ))
277
+ fig_t.update_layout(**l_t)
278
+ st.plotly_chart(fig_t, width="stretch", config={"displayModeBar": False})
frontend/pages/optimizer.py ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pages/optimizer.py β€” Budget reallocation optimizer results and live LP runner.
2
+
3
+
4
+ import sys, os
5
+ sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
6
+
7
+ import streamlit as st
8
+ import plotly.graph_objects as go
9
+ import plotly.express as px
10
+ import pandas as pd
11
+
12
+ from theme import inject_theme, page_header, section_label, kpi_html, PLOTLY_LAYOUT, SAFFRON, GREEN, RED, AMBER
13
+ from utils.api_client import fetch_states, fetch_optimizer_results, run_optimizer_live
14
+
15
+ inject_theme()
16
+ page_header(
17
+ "β—ˆ Module 04",
18
+ "Budget Optimizer",
19
+ "SciPy LP two-stage proportional reallocation β€” maximize employment at zero additional cost",
20
+ )
21
+
22
+ # ── Tabs: pre-computed vs live ────────────────────────────────────────────────
23
+ tab1, tab2 = st.tabs(["Pre-Computed Results", "Run Live Optimizer"])
24
+
25
+ # ══════════════════════════════════════════════════════════════════════════════
26
+ # TAB 1 β€” Pre-computed results
27
+ # ══════════════════════════════════════════════════════════════════════════════
28
+ with tab1:
29
+ states = fetch_states()
30
+ if not states:
31
+ st.error("⚠️ API offline β€” run `uvicorn backend.main:app --port 8000`")
32
+ st.stop()
33
+
34
+ cs, _ = st.columns([1, 2])
35
+ with cs:
36
+ scope = st.selectbox("State Filter", ["All-India"] + states, key="pre_scope")
37
+ state_param = None if scope == "All-India" else scope
38
+
39
+ df = fetch_optimizer_results(state_param)
40
+
41
+ if df.empty:
42
+ st.info("No optimizer results β€” run the pipeline first: `python main.py --stage 3`")
43
+ else:
44
+ # ── Summary KPIs ──────────────────────────────────────────────────────
45
+ sq_total = df["sq_persondays"].sum()
46
+ opt_total = df["opt_persondays"].sum() if "opt_persondays" in df.columns else sq_total + df["persondays_gain"].sum()
47
+ gain = df["persondays_gain"].sum()
48
+ gain_pct = gain / sq_total * 100 if sq_total else 0
49
+ tot_bud = df["budget_allocated_lakhs"].sum() if "budget_allocated_lakhs" in df.columns else 0
50
+ n_gain = int((df["persondays_gain"] > 0).sum())
51
+ n_cut = int((df["persondays_gain"] <= 0).sum())
52
+
53
+ kc1, kc2, kc3, kc4, kc5 = st.columns(5)
54
+ with kc1: st.markdown(kpi_html(f"{sq_total:,.0f}L", "Status Quo PD", "#1C1917"), unsafe_allow_html=True)
55
+ with kc2: st.markdown(kpi_html(f"{opt_total:,.0f}L", "Optimized PD", GREEN), unsafe_allow_html=True)
56
+ with kc3: st.markdown(kpi_html(f"{gain:+,.1f}L", "Net Gain", GREEN, "lakh person-days"), unsafe_allow_html=True)
57
+ with kc4: st.markdown(kpi_html(f"{gain_pct:+.2f}%", "% Uplift", GREEN, "budget-neutral"), unsafe_allow_html=True)
58
+ with kc5: st.markdown(kpi_html(f"β‚Ή{tot_bud:,.0f}L", "Total Budget", "#1C1917", "unchanged"), unsafe_allow_html=True)
59
+
60
+ st.markdown("<div style='margin-top:1.5rem'></div>", unsafe_allow_html=True)
61
+
62
+ # ── Budget change waterfall β€” top movers ──────────────────────────────
63
+ section_label("Top Budget Movers")
64
+
65
+ top_gain = df.nlargest(10, "persondays_gain").copy()
66
+ top_cut = df.nsmallest(10, "persondays_gain").copy()
67
+ show = pd.concat([top_gain, top_cut]).drop_duplicates().sort_values("persondays_gain")
68
+ show["label"] = show["district"] + " Β· " + show["state"]
69
+
70
+ fig1 = go.Figure()
71
+ fig1.add_bar(
72
+ x=show["persondays_gain"],
73
+ y=show["label"],
74
+ orientation="h",
75
+ marker=dict(
76
+ color=[GREEN if v > 0 else RED for v in show["persondays_gain"]],
77
+ opacity=0.8,
78
+ ),
79
+ customdata=list(zip(
80
+ show["state"], show["district"],
81
+ show["budget_allocated_lakhs"].round(0) if "budget_allocated_lakhs" in show else [0]*len(show),
82
+ show.get("budget_change_pct", pd.Series([0]*len(show))).round(1),
83
+ show["persondays_gain"].round(2),
84
+ show.get("persondays_per_lakh", pd.Series([0]*len(show))).round(4),
85
+ )),
86
+ hovertemplate=(
87
+ "<b>%{customdata[1]}</b> Β· %{customdata[0]}<br>"
88
+ "Budget: β‚Ή%{customdata[2]:,.0f}L β†’ %{customdata[3]:+.1f}%<br>"
89
+ "PD Gain: <b>%{customdata[4]:+.2f}L</b><br>"
90
+ "Efficiency: %{customdata[5]} PD/β‚ΉL"
91
+ "<extra></extra>"
92
+ ),
93
+ )
94
+ fig1.add_vline(x=0, line_dash="solid", line_color="#1C1917", line_width=1)
95
+ l1 = {**PLOTLY_LAYOUT}
96
+ l1.update(dict(
97
+ height=520,
98
+ title=dict(text="Person-Day Gain by District (Top 10 + Bottom 10)",
99
+ font=dict(family="Fraunces, serif", size=14, color="#1C1917")),
100
+ xaxis=dict(**PLOTLY_LAYOUT["xaxis"], title="Person-Day Gain (Lakh)"),
101
+ yaxis=dict(**PLOTLY_LAYOUT["yaxis"]),
102
+ showlegend=False,
103
+ bargap=0.3,
104
+ ))
105
+ fig1.update_layout(**l1)
106
+ st.plotly_chart(fig1, use_container_width=True, config={"displayModeBar": False})
107
+
108
+ # ── Efficiency vs budget change scatter ───────────────────────────────
109
+ section_label("Efficiency vs Budget Reallocation")
110
+
111
+ if "persondays_per_lakh" in df.columns and "budget_change_pct" in df.columns:
112
+ fig2 = go.Figure()
113
+ fig2.add_scatter(
114
+ x=df["persondays_per_lakh"],
115
+ y=df["budget_change_pct"],
116
+ mode="markers",
117
+ marker=dict(
118
+ color=df["persondays_gain"],
119
+ colorscale=[[0, RED], [0.5, "#FED7AA"], [1, GREEN]],
120
+ size=5, opacity=0.65,
121
+ colorbar=dict(
122
+ title=dict(text="PD Gain", font=dict(color="#78716C", size=9)),
123
+ tickfont=dict(color="#78716C", size=8),
124
+ thickness=8, len=0.5,
125
+ ),
126
+ ),
127
+ customdata=list(zip(
128
+ df["state"], df["district"],
129
+ df["budget_change_pct"].round(1),
130
+ df["persondays_gain"].round(2),
131
+ )),
132
+ hovertemplate=(
133
+ "<b>%{customdata[1]}</b> Β· %{customdata[0]}<br>"
134
+ "Budget Ξ”: %{customdata[2]:+.1f}%<br>"
135
+ "PD Gain: %{customdata[3]:+.2f}L"
136
+ "<extra></extra>"
137
+ ),
138
+ )
139
+ fig2.add_hline(y=0, line_dash="dot", line_color="#1C1917", line_width=1)
140
+ l2 = {**PLOTLY_LAYOUT}
141
+ l2.update(dict(
142
+ height=340,
143
+ title=dict(text="Efficiency (PD/β‚Ή Lakh) vs Budget Change %",
144
+ font=dict(family="Fraunces, serif", size=13, color="#1C1917")),
145
+ xaxis=dict(**PLOTLY_LAYOUT["xaxis"], title="PD per β‚Ή Lakh"),
146
+ yaxis=dict(**PLOTLY_LAYOUT["yaxis"], title="Budget Change (%)"),
147
+ showlegend=False,
148
+ ))
149
+ fig2.update_layout(**l2)
150
+ st.plotly_chart(fig2, use_container_width=True, config={"displayModeBar": False})
151
+
152
+ # ── Full table ────────────────────────────────────────────────────────
153
+ with st.expander("πŸ“‹ Full Reallocation Table"):
154
+ show_cols = [c for c in [
155
+ "state", "district", "budget_allocated_lakhs", "optimized_budget",
156
+ "budget_change_pct", "sq_persondays", "opt_persondays",
157
+ "persondays_gain", "persondays_gain_pct", "persondays_per_lakh",
158
+ ] if c in df.columns]
159
+ styled = df[show_cols].round(3).sort_values("persondays_gain", ascending=False)
160
+ st.dataframe(styled, use_container_width=True, hide_index=True)
161
+
162
+ # ══════════════════════════════════════════════════════════════════════════════
163
+ # TAB 2 β€” Live optimizer
164
+ # ══════════════════════════════════════════════════════════════════════════════
165
+ with tab2:
166
+ st.markdown("""
167
+ <p style="font-family:'Source Serif 4',serif; font-size:0.9rem; color:#57534E;
168
+ line-height:1.7; margin-bottom:1.5rem;">
169
+ Run the SciPy linear-programming optimizer live with custom parameters.
170
+ Results are computed in real-time using the latest district predictions from the database.
171
+ </p>
172
+ """, unsafe_allow_html=True)
173
+
174
+ ca, cb = st.columns(2)
175
+ states2 = fetch_states() or []
176
+ with ca:
177
+ scope2 = st.selectbox("State (or All-India)", ["All-India"] + states2, key="live_scope")
178
+ budget_scale = st.slider("Budget Scale", 0.8, 1.5, 1.0, 0.05,
179
+ help="1.0 = same total budget; 1.1 = +10% more funds")
180
+ with cb:
181
+ min_frac = st.slider("Min Allocation (floor)", 0.10, 0.60, 0.40, 0.05,
182
+ help="No district drops below this fraction of its current budget")
183
+ max_frac = st.slider("Max Allocation (cap)", 1.5, 3.0, 2.5, 0.1,
184
+ help="No district exceeds this multiple of its current budget")
185
+
186
+ if st.button("οΏ½οΏ½ Run Optimizer", type="primary"):
187
+ with st.spinner("Running LP optimization…"):
188
+ result = run_optimizer_live(
189
+ state=None if scope2 == "All-India" else scope2,
190
+ budget_scale=budget_scale,
191
+ min_fraction=min_frac,
192
+ max_fraction=max_frac,
193
+ )
194
+
195
+ if result:
196
+ st.success(
197
+ f"βœ… Optimization complete β€” "
198
+ f"Gain: **{result['gain_lakhs']:+,.2f}L** person-days "
199
+ f"({result['gain_pct']:+.2f}%) Β· "
200
+ f"Total budget: β‚Ή{result['total_budget_lakhs']:,.0f}L"
201
+ )
202
+
203
+ # Summary metrics
204
+ m1, m2, m3, m4 = st.columns(4)
205
+ m1.metric("SQ Person-Days", f"{result['sq_persondays_total']:,.1f}L")
206
+ m2.metric("Opt Person-Days", f"{result['opt_persondays_total']:,.1f}L")
207
+ m3.metric("Net Gain", f"{result['gain_lakhs']:+,.2f}L")
208
+ m4.metric("% Uplift", f"{result['gain_pct']:+.2f}%")
209
+
210
+ # District breakdown
211
+ if result.get("districts"):
212
+ dist_df = pd.DataFrame(result["districts"])
213
+
214
+ section_label("District Reallocation Details")
215
+ top10 = dist_df.nlargest(10, "persondays_gain")
216
+ top10["label"] = top10["district"] + " Β· " + top10["state"]
217
+
218
+ fig_live = go.Figure()
219
+ fig_live.add_bar(
220
+ x=top10["persondays_gain"], y=top10["label"],
221
+ orientation="h",
222
+ marker=dict(color=GREEN, opacity=0.8),
223
+ hovertemplate=(
224
+ "<b>%{y}</b><br>PD Gain: <b>%{x:+.2f}L</b><extra></extra>"
225
+ ),
226
+ )
227
+ l_live = {**PLOTLY_LAYOUT}
228
+ l_live.update(dict(
229
+ height=380, showlegend=False, bargap=0.3,
230
+ title=dict(text="Top 10 Districts to Increase",
231
+ font=dict(family="Fraunces, serif", size=13, color="#1C1917")),
232
+ xaxis=dict(**PLOTLY_LAYOUT["xaxis"], title="PD Gain (Lakh)"),
233
+ yaxis=dict(**PLOTLY_LAYOUT["yaxis"]),
234
+ ))
235
+ fig_live.update_layout(**l_live)
236
+ st.plotly_chart(fig_live, use_container_width=True,
237
+ config={"displayModeBar": False})
238
+
239
+ with st.expander("πŸ“‹ Full Live Results Table"):
240
+ st.dataframe(dist_df.round(3), use_container_width=True, hide_index=True)
frontend/pages/overview.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pages/overview.py β€” National MNREGA trend overview.
2
+
3
+ import sys, os
4
+ sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
5
+
6
+ import streamlit as st
7
+ import plotly.graph_objects as go
8
+
9
+ from theme import inject_theme, page_header, section_label, PLOTLY_LAYOUT, SAFFRON, GREEN, RED
10
+ from utils.api_client import fetch_stats, fetch_states, fetch_yearly_trend, fetch_top_districts
11
+
12
+ inject_theme()
13
+ page_header(
14
+ "β—ˆ Module 01",
15
+ "Overview",
16
+ "Longitudinal MNREGA performance across India β€” employment and wage trends",
17
+ )
18
+
19
+ # ── Stats KPIs ────────────────────────────────────────────────────────────────
20
+ stats = fetch_stats()
21
+ if stats:
22
+ c1, c2, c3, c4, c5 = st.columns(5)
23
+ c1.metric("States", stats.get("total_states", "β€”"))
24
+ c2.metric("Districts", stats.get("total_districts", "β€”"))
25
+ c3.metric("Period", stats.get("year_range", "β€”"))
26
+ c4.metric("Total PD", f"{stats.get('total_persondays_lakhs', 0):,.0f}L")
27
+ c5.metric("COVID Spike", f"{stats.get('covid_spike_pct', 0):.1f}%", delta="2020 peak")
28
+ else:
29
+ st.warning("⚠️ Backend offline β€” run `uvicorn backend.main:app --port 8000`")
30
+ st.stop()
31
+
32
+ st.markdown("---")
33
+
34
+ # ── Scope selector ────────────────────────────────────────────────────────────
35
+ states_list = fetch_states()
36
+ col_sel, _ = st.columns([1, 2])
37
+ with col_sel:
38
+ scope = st.selectbox("Geographic Scope", ["All-India"] + states_list)
39
+ state_param = None if scope == "All-India" else scope
40
+
41
+ # ── Trend chart ───────────────────────────────────────────────────────────────
42
+ section_label("Employment Trend")
43
+ df_trend = fetch_yearly_trend(state_param)
44
+
45
+ if not df_trend.empty:
46
+ fig = go.Figure()
47
+
48
+ fig.add_bar(
49
+ x=df_trend["financial_year"],
50
+ y=df_trend["total_persondays"],
51
+ name="Person-Days (lakh)",
52
+ marker=dict(color=SAFFRON, opacity=0.78),
53
+ )
54
+
55
+ # Wage on secondary axis if available
56
+ if "avg_wage" in df_trend.columns:
57
+ fig.add_scatter(
58
+ x=df_trend["financial_year"],
59
+ y=df_trend["avg_wage"],
60
+ name="Avg Wage Rate (β‚Ή/day)",
61
+ yaxis="y2",
62
+ mode="lines+markers",
63
+ line=dict(color=GREEN, width=2.5),
64
+ marker=dict(size=6, color=GREEN),
65
+ )
66
+
67
+ if 2020 in df_trend["financial_year"].values:
68
+ fig.add_vline(
69
+ x=2020, line_dash="dot", line_color=RED, line_width=1.5,
70
+ annotation_text="COVID-19",
71
+ annotation_font=dict(color=RED, size=9, family="DM Mono, monospace"),
72
+ annotation_position="top right",
73
+ )
74
+ if 2022 in df_trend["financial_year"].values:
75
+ fig.add_vline(
76
+ x=2022, line_dash="dot", line_color="#A8A29E", line_width=1,
77
+ annotation_text="WB anomaly",
78
+ annotation_font=dict(color="#A8A29E", size=9, family="DM Mono, monospace"),
79
+ annotation_position="top left",
80
+ )
81
+
82
+ layout = {**PLOTLY_LAYOUT}
83
+ layout.update(dict(
84
+ title=dict(
85
+ text=f"MNREGA Employment Trend β€” {scope}",
86
+ font=dict(family="Fraunces, serif", size=15, color="#1C1917"),
87
+ ),
88
+ hovermode="x unified",
89
+ height=420,
90
+ bargap=0.35,
91
+ yaxis=dict(**PLOTLY_LAYOUT["yaxis"], title="Person-Days (lakh)"),
92
+ yaxis2=dict(
93
+ title="Avg Wage Rate (β‚Ή/day)", overlaying="y", side="right",
94
+ gridcolor="rgba(0,0,0,0)",
95
+ tickfont=dict(color="#78716C", size=10),
96
+ title_font=dict(color="#57534E", size=11),
97
+ ),
98
+ legend=dict(**PLOTLY_LAYOUT["legend"], orientation="h", y=1.08, x=0),
99
+ ))
100
+ fig.update_layout(**layout)
101
+ st.plotly_chart(fig, use_container_width=True, config={"displayModeBar": False})
102
+ st.caption("Source: MNREGA MIS Β· Ministry of Rural Development Β· Annual district-level aggregates")
103
+ else:
104
+ st.info("No trend data β€” API offline or pipeline not yet run.")
105
+
106
+ st.markdown("---")
107
+
108
+ # ── District ranking ──────────────────────────────────────────────────────────
109
+ section_label("District Performance Benchmarking")
110
+
111
+ cm, cn = st.columns([2, 1])
112
+ with cm:
113
+ # V3: only person_days_lakhs is a real non-synthetic column
114
+ metric = "person_days_lakhs"
115
+ st.markdown(
116
+ '<p style="font-family:\'DM Mono\',monospace; font-size:0.65rem; '
117
+ 'letter-spacing:1.5px; text-transform:uppercase; color:#78716C; margin-bottom:4px;">'
118
+ 'Ranking Metric</p>'
119
+ '<p style="font-size:0.9rem; color:#1C1917; margin:0;">Employment Volume (Lakh Person-Days)</p>',
120
+ unsafe_allow_html=True
121
+ )
122
+ with cn:
123
+ n_top = st.slider("Top N Districts", 5, 30, 15)
124
+
125
+ df_top = fetch_top_districts(state_param, metric, n_top)
126
+
127
+ if not df_top.empty:
128
+ df_top["label"] = df_top["district"] + " Β· " + df_top["state"]
129
+
130
+ fig2 = go.Figure()
131
+ fig2.add_bar(
132
+ x=df_top["avg_persondays"],
133
+ y=df_top["label"],
134
+ orientation="h",
135
+ marker=dict(
136
+ color=df_top["avg_persondays"],
137
+ colorscale=[[0, "#FED7AA"], [1, "#9A3412"]],
138
+ showscale=False,
139
+ ),
140
+ customdata=list(zip(df_top["state"], df_top["district"], df_top["avg_persondays"].round(2))),
141
+ hovertemplate=(
142
+ "<b>%{customdata[1]}</b> Β· %{customdata[0]}<br>"
143
+ "Avg Person-Days: <b>%{customdata[2]}L</b><extra></extra>"
144
+ ),
145
+ )
146
+ layout2 = {**PLOTLY_LAYOUT}
147
+ layout2.update(dict(
148
+ title=dict(
149
+ text=f"Top {n_top} Districts β€” Employment Volume",
150
+ font=dict(family="Fraunces, serif", size=14, color="#1C1917"),
151
+ ),
152
+ height=max(380, n_top * 30),
153
+ xaxis=dict(**PLOTLY_LAYOUT["xaxis"], title="Avg Lakh Person-Days"),
154
+ yaxis=dict(**PLOTLY_LAYOUT["yaxis"], autorange="reversed"),
155
+ ))
156
+ fig2.update_layout(**layout2)
157
+ st.plotly_chart(fig2, use_container_width=True, config={"displayModeBar": False})
158
+ else:
159
+ st.info("No ranking data available.")
frontend/pages/predictions.py ADDED
@@ -0,0 +1,255 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pages/predictions.py β€” GBR V3 model predictions and error analysis.
2
+
3
+ import sys, os
4
+ sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
5
+
6
+ import streamlit as st
7
+ import plotly.graph_objects as go
8
+ import numpy as np
9
+
10
+ from theme import inject_theme, page_header, section_label, kpi_html, PLOTLY_LAYOUT, SAFFRON, GREEN, RED
11
+ from utils.api_client import fetch_states, fetch_districts, fetch_predictions
12
+
13
+ inject_theme()
14
+ page_header(
15
+ "β—ˆ Module 03",
16
+ "Predictions",
17
+ "GBR V3 district-level employment forecasts β€” walk-forward CV RΒ²β‰ˆ0.91 (excl. 2022 anomaly)",
18
+ )
19
+
20
+ # ── Filters ───────────────────────────────────────────────────────────────────
21
+ states = fetch_states()
22
+ if not states:
23
+ st.error("⚠️ API offline β€” run `uvicorn backend.main:app --port 8000`")
24
+ st.stop()
25
+
26
+ c1, c2, c3 = st.columns(3)
27
+ with c1:
28
+ scope = st.selectbox("State", ["All States"] + states)
29
+ with c2:
30
+ state_param = None if scope == "All States" else scope
31
+ districts = ["All Districts"] + fetch_districts(state_param) if state_param else ["All Districts"]
32
+ dist_sel = st.selectbox("District", districts)
33
+ with c3:
34
+ df_all = fetch_predictions(state=state_param)
35
+ years = sorted(df_all["financial_year"].unique().tolist()) if not df_all.empty else []
36
+ yr_sel = st.selectbox("Year", ["All Years"] + years)
37
+
38
+ # Apply filters
39
+ df = fetch_predictions(
40
+ state=state_param,
41
+ district=None if dist_sel == "All Districts" else dist_sel,
42
+ year=None if yr_sel == "All Years" else int(yr_sel),
43
+ )
44
+
45
+ if df.empty:
46
+ st.info("No prediction data for selected filters.")
47
+ st.stop()
48
+
49
+ # ── Model KPIs ────────────────────────────────────────────────────────────────
50
+ from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
51
+ import warnings
52
+ warnings.filterwarnings("ignore")
53
+
54
+ try:
55
+ rmse = np.sqrt(mean_squared_error(df["person_days_lakhs"], df["predicted_persondays"]))
56
+ mae = mean_absolute_error(df["person_days_lakhs"], df["predicted_persondays"])
57
+ r2 = r2_score(df["person_days_lakhs"], df["predicted_persondays"])
58
+ bias = (df["predicted_persondays"] - df["person_days_lakhs"]).mean()
59
+
60
+ c1, c2, c3, c4 = st.columns(4)
61
+ c1.metric("RΒ² Score", f"{r2:.4f}")
62
+ c2.metric("RMSE", f"{rmse:.3f}L")
63
+ c3.metric("MAE", f"{mae:.3f}L")
64
+ c4.metric("Mean Bias", f"{bias:+.3f}L")
65
+ except Exception:
66
+ pass
67
+
68
+ # ── Model info callout ────────────────────────────────────────────────────────
69
+ st.markdown("""
70
+ <div style="background:#F0FDF4; border:1px solid #BBF7D0; border-left:3px solid #16A34A;
71
+ border-radius:8px; padding:0.9rem 1.1rem; margin:1rem 0;">
72
+ <p style="font-family:'DM Mono',monospace; font-size:0.56rem; letter-spacing:2px;
73
+ text-transform:uppercase; color:#16A34A; margin:0 0 6px 0;">V3 Leak-Free Model</p>
74
+ <p style="font-family:'Source Serif 4',serif; font-size:0.85rem; color:#14532D;
75
+ line-height:1.65; margin:0;">
76
+ GradientBoostingRegressor Β· 17 lag-based features Β· Walk-forward CV
77
+ Β· RΒ²=0.91 excl. 2022 Β· Previous RΒ²=0.9963 was data leakage
78
+ (<code>works_completed</code> r=1.0 with target).
79
+ 2022 West Bengal reporting anomaly (βˆ’93 to βˆ’98% drop) is structurally unpredictable.
80
+ </p>
81
+ </div>
82
+ """, unsafe_allow_html=True)
83
+
84
+ st.markdown("---")
85
+
86
+ col_left, col_right = st.columns(2)
87
+
88
+ # ── Actual vs Predicted scatter ───────────────────────────────────────────────
89
+ with col_left:
90
+ section_label("Actual vs Predicted")
91
+
92
+ fig1 = go.Figure()
93
+ lim_mn = min(df["person_days_lakhs"].min(), df["predicted_persondays"].min()) * 0.92
94
+ lim_mx = max(df["person_days_lakhs"].max(), df["predicted_persondays"].max()) * 1.06
95
+
96
+ fig1.add_scatter(
97
+ x=[lim_mn, lim_mx], y=[lim_mn, lim_mx],
98
+ mode="lines",
99
+ line=dict(color="#E7E5E4", width=1.5, dash="dot"),
100
+ name="Perfect prediction",
101
+ hoverinfo="skip",
102
+ )
103
+ fig1.add_scatter(
104
+ x=df["person_days_lakhs"],
105
+ y=df["predicted_persondays"],
106
+ mode="markers",
107
+ marker=dict(
108
+ color=df["prediction_error"].abs(),
109
+ colorscale=[[0, SAFFRON], [1, RED]],
110
+ size=5, opacity=0.65,
111
+ colorbar=dict(
112
+ title=dict(text="|Error|L", font=dict(color="#78716C", size=9)),
113
+ tickfont=dict(color="#78716C", size=8),
114
+ thickness=8, len=0.5,
115
+ ),
116
+ ),
117
+ customdata=list(zip(
118
+ df["state"], df["district"],
119
+ df["financial_year"],
120
+ df["person_days_lakhs"].round(2),
121
+ df["predicted_persondays"].round(2),
122
+ df["prediction_error"].round(2),
123
+ )),
124
+ hovertemplate=(
125
+ "<b>%{customdata[1]}</b> Β· %{customdata[0]}<br>"
126
+ "FY: %{customdata[2]}<br>"
127
+ "Actual: <b>%{customdata[3]}L</b><br>"
128
+ "Predicted: <b>%{customdata[4]}L</b><br>"
129
+ "Error: %{customdata[5]}L"
130
+ "<extra></extra>"
131
+ ),
132
+ name="Districts",
133
+ )
134
+
135
+ l1 = {**PLOTLY_LAYOUT}
136
+ l1.update(dict(
137
+ height=370,
138
+ title=dict(text="Actual vs Predicted Person-Days",
139
+ font=dict(family="Fraunces, serif", size=13, color="#1C1917")),
140
+ xaxis=dict(**PLOTLY_LAYOUT["xaxis"], title="Actual (Lakh PD)", range=[lim_mn, lim_mx]),
141
+ yaxis=dict(**PLOTLY_LAYOUT["yaxis"], title="Predicted (Lakh PD)", range=[lim_mn, lim_mx]),
142
+ showlegend=False,
143
+ ))
144
+ fig1.update_layout(**l1)
145
+ st.plotly_chart(fig1, use_container_width=True, config={"displayModeBar": False})
146
+
147
+ # ── Error distribution ────────────────────────────────────────────────────────
148
+ with col_right:
149
+ section_label("Prediction Error Distribution")
150
+
151
+ errors = df["prediction_error"]
152
+ fig2 = go.Figure()
153
+ fig2.add_histogram(
154
+ x=errors, nbinsx=40,
155
+ marker=dict(color=SAFFRON, opacity=0.75, line=dict(color="#FFFFFF", width=0.5)),
156
+ hovertemplate="Error: %{x:.2f}L<br>Count: %{y}<extra></extra>",
157
+ )
158
+ fig2.add_vline(x=0, line_dash="dot", line_color="#1C1917", line_width=1.5)
159
+ fig2.add_vline(x=errors.mean(), line_dash="dash", line_color=RED, line_width=1,
160
+ annotation_text=f"Mean={errors.mean():+.2f}",
161
+ annotation_font=dict(color=RED, size=9, family="DM Mono, monospace"))
162
+
163
+ l2 = {**PLOTLY_LAYOUT}
164
+ l2.update(dict(
165
+ height=370,
166
+ title=dict(text="Error Distribution (Actual βˆ’ Predicted)",
167
+ font=dict(family="Fraunces, serif", size=13, color="#1C1917")),
168
+ xaxis=dict(**PLOTLY_LAYOUT["xaxis"], title="Error (Lakh PD)"),
169
+ yaxis=dict(**PLOTLY_LAYOUT["yaxis"], title="Count"),
170
+ showlegend=False, bargap=0.05,
171
+ ))
172
+ fig2.update_layout(**l2)
173
+ st.plotly_chart(fig2, use_container_width=True, config={"displayModeBar": False})
174
+
175
+ st.markdown("---")
176
+
177
+ # ── Year-on-year prediction vs actual trend ───────────────────────────────────
178
+ section_label("Year-on-Year Prediction Accuracy")
179
+
180
+ trend = df.groupby("financial_year", as_index=False).agg(
181
+ actual =("person_days_lakhs", "sum"),
182
+ predicted=("predicted_persondays", "sum"),
183
+ )
184
+
185
+ fig3 = go.Figure()
186
+ fig3.add_bar(
187
+ x=trend["financial_year"], y=trend["actual"],
188
+ name="Actual",
189
+ marker=dict(color="#E7E5E4", opacity=0.9),
190
+ )
191
+ fig3.add_scatter(
192
+ x=trend["financial_year"], y=trend["predicted"],
193
+ name="Predicted",
194
+ mode="lines+markers",
195
+ line=dict(color=SAFFRON, width=2.5),
196
+ marker=dict(size=7, color=SAFFRON, line=dict(width=1.5, color="#FFFFFF")),
197
+ )
198
+
199
+ l3 = {**PLOTLY_LAYOUT}
200
+ l3.update(dict(
201
+ height=300,
202
+ barmode="overlay", bargap=0.35,
203
+ title=dict(text="Aggregated Actual vs Predicted by Year",
204
+ font=dict(family="Fraunces, serif", size=13, color="#1C1917")),
205
+ xaxis=dict(**PLOTLY_LAYOUT["xaxis"], title="Financial Year", dtick=1),
206
+ yaxis=dict(**PLOTLY_LAYOUT["yaxis"], title="Total Lakh PD"),
207
+ legend=dict(**PLOTLY_LAYOUT["legend"], orientation="h", y=1.08, x=0),
208
+ ))
209
+
210
+ # Annotate known anomalies
211
+ if 2020 in trend["financial_year"].values:
212
+ fig3.add_vline(x=2020, line_dash="dot", line_color=RED, line_width=1.5,
213
+ annotation_text="COVID", annotation_font=dict(color=RED, size=9, family="DM Mono, monospace"))
214
+ if 2022 in trend["financial_year"].values:
215
+ fig3.add_vline(x=2022, line_dash="dot", line_color="#A8A29E", line_width=1,
216
+ annotation_text="WB anomaly", annotation_font=dict(color="#A8A29E", size=9, family="DM Mono, monospace"))
217
+
218
+ fig3.update_layout(**l3)
219
+ st.plotly_chart(fig3, use_container_width=True, config={"displayModeBar": False})
220
+
221
+ st.markdown("---")
222
+
223
+ # ── Walk-forward CV summary ───────────────────────────────────────────────────
224
+ section_label("Walk-Forward CV Performance (Honest Evaluation)")
225
+
226
+ cv_data = {
227
+ "Year": [2018, 2019, 2020, 2021, 2022, 2023, 2024],
228
+ "RΒ²": [0.916, 0.926, 0.835, 0.926, 0.510, 0.909, 0.935],
229
+ "MAE": [6.639, 6.380, 12.681, 7.150, 13.954, 7.403, 5.673],
230
+ "vs Naive RΒ²": ["+0.004", "+0.061", "+0.083", "βˆ’0.012", "+0.330", "βˆ’0.014", "+0.065"],
231
+ "Note": ["", "", "COVID spike", "", "WB reporting anomaly", "", ""],
232
+ }
233
+ import pandas as pd
234
+ cv_df = pd.DataFrame(cv_data)
235
+ st.dataframe(cv_df, use_container_width=True, hide_index=True)
236
+ st.caption("Walk-forward CV: model trained on years before test year only. Mean RΒ²=0.851, excl. 2022: RΒ²=0.908.")
237
+
238
+ st.markdown("---")
239
+
240
+ # ── Worst predictions table ───────────────────────────────────────────────────
241
+ section_label("Largest Prediction Errors")
242
+ worst = (
243
+ df.assign(abs_error=df["prediction_error"].abs())
244
+ .nlargest(20, "abs_error")[
245
+ ["state", "district", "financial_year",
246
+ "person_days_lakhs", "predicted_persondays", "prediction_error"]
247
+ ]
248
+ .rename(columns={
249
+ "person_days_lakhs": "actual_L",
250
+ "predicted_persondays":"predicted_L",
251
+ "prediction_error": "error_L",
252
+ })
253
+ .round(3)
254
+ )
255
+ st.dataframe(worst, use_container_width=True, hide_index=True)
frontend/pages/spatial.py ADDED
@@ -0,0 +1,491 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pages/spatial.py β€” Spatial Overview Map
2
+
3
+
4
+ import sys, os
5
+ sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
6
+
7
+ import streamlit as st
8
+ import plotly.graph_objects as go
9
+ import pandas as pd
10
+ import numpy as np
11
+
12
+ from theme import inject_theme, page_header, section_label, kpi_html, PLOTLY_LAYOUT, SAFFRON, SAFFRON_SCALE, GREEN, RED, AMBER
13
+ from utils.api_client import fetch_states, fetch_predictions, fetch_optimizer_results, fetch_district_history
14
+
15
+ inject_theme()
16
+ page_header(
17
+ "β—ˆ Module 05",
18
+ "Spatial Overview",
19
+ "District-level employment prediction map β€” hover any bubble for full model details",
20
+ )
21
+
22
+ # ── District coordinates (approximate centroids for all major districts) ──────
23
+ # Covers all 36 states/UTs across India's 700+ districts.
24
+ # Format: "District|State": (lat, lon)
25
+ DISTRICT_COORDS: dict[str, tuple[float, float]] = {
26
+ # ── Andhra Pradesh ─────────────────────────────────────────────────────────
27
+ "Srikakulam|Andhra Pradesh": (18.30, 83.90), "Vizianagaram|Andhra Pradesh": (18.12, 83.41),
28
+ "Visakhapatnam|Andhra Pradesh": (17.69, 83.22), "East Godavari|Andhra Pradesh":(17.00, 82.00),
29
+ "West Godavari|Andhra Pradesh": (16.92, 81.34), "Krishna|Andhra Pradesh": (16.61, 80.83),
30
+ "Guntur|Andhra Pradesh": (16.31, 80.44), "Prakasam|Andhra Pradesh": (15.35, 79.57),
31
+ "Nellore|Andhra Pradesh": (14.44, 79.99), "Kurnool|Andhra Pradesh": (15.83, 78.05),
32
+ "Kadapa|Andhra Pradesh": (14.47, 78.82), "Anantapur|Andhra Pradesh": (14.68, 77.60),
33
+ "Chittoor|Andhra Pradesh": (13.22, 79.10),
34
+
35
+ # ── Assam ─────────────────────────────────────────────────────────────────
36
+ "Kamrup|Assam": (26.14, 91.77), "Barpeta|Assam": (26.32, 91.00),
37
+ "Dhubri|Assam": (26.02, 89.98), "Goalpara|Assam": (26.17, 90.62),
38
+ "Nagaon|Assam": (26.35, 92.68), "Cachar|Assam": (24.81, 92.86),
39
+ "Lakhimpur|Assam": (27.24, 94.10), "Dibrugarh|Assam": (27.49, 95.00),
40
+ "Sonitpur|Assam": (26.63, 92.80), "Jorhat|Assam": (26.75, 94.22),
41
+
42
+ # ── Bihar ─────────────────────────────────────────────────────────────────
43
+ "Patna|Bihar": (25.59, 85.13), "Gaya|Bihar": (24.80, 84.99),
44
+ "Muzaffarpur|Bihar": (26.12, 85.38), "Bhagalpur|Bihar": (25.24, 86.98),
45
+ "Darbhanga|Bihar": (26.16, 85.90), "Purnea|Bihar": (25.78, 87.47),
46
+ "Rohtas|Bihar": (24.98, 83.98), "Siwan|Bihar": (26.22, 84.36),
47
+ "Saran|Bihar": (25.92, 84.74), "Nalanda|Bihar": (25.10, 85.44),
48
+ "Madhubani|Bihar": (26.37, 86.07), "Champaran East|Bihar": (26.65, 84.92),
49
+ "Champaran West|Bihar": (27.02, 84.46),
50
+
51
+ # ── Chhattisgarh ──────────────────────────────────────────────────────────
52
+ "Raipur|Chhattisgarh": (21.25, 81.63), "Bilaspur|Chhattisgarh": (22.09, 82.15),
53
+ "Durg|Chhattisgarh": (21.19, 81.28), "Rajnandgaon|Chhattisgarh": (21.10, 81.03),
54
+ "Bastar|Chhattisgarh": (19.10, 81.95), "Sarguja|Chhattisgarh": (23.12, 83.19),
55
+ "Korba|Chhattisgarh": (22.35, 82.72), "Raigarh|Chhattisgarh": (21.90, 83.40),
56
+
57
+ # ── Gujarat ───────────────────────────────────────────────────────────────
58
+ "Ahmedabad|Gujarat": (23.03, 72.58), "Surat|Gujarat": (21.17, 72.83),
59
+ "Vadodara|Gujarat": (22.31, 73.18), "Rajkot|Gujarat": (22.30, 70.80),
60
+ "Bhavnagar|Gujarat": (21.77, 72.15), "Jamnagar|Gujarat": (22.47, 70.06),
61
+ "Junagadh|Gujarat": (21.52, 70.46), "Anand|Gujarat": (22.56, 72.93),
62
+ "Mehsana|Gujarat": (23.59, 72.37), "Banaskantha|Gujarat": (24.17, 72.42),
63
+ "Kutch|Gujarat": (23.73, 69.86), "Dahod|Gujarat": (22.83, 74.25),
64
+ "Narmada|Gujarat": (21.87, 73.49), "Valsad|Gujarat": (20.59, 72.93),
65
+ "Dang|Gujarat": (20.75, 73.69),
66
+
67
+ # ── Haryana ───────────────────────────────────────────────────────────────
68
+ "Hisar|Haryana": (29.15, 75.72), "Sirsa|Haryana": (29.53, 75.03),
69
+ "Bhiwani|Haryana": (28.79, 76.13), "Rohtak|Haryana": (28.89, 76.61),
70
+ "Sonipat|Haryana": (28.99, 77.01), "Karnal|Haryana": (29.68, 76.99),
71
+ "Ambala|Haryana": (30.37, 76.78), "Kurukshetra|Haryana": (29.97, 76.85),
72
+ "Mahendragarh|Haryana": (28.27, 76.15),
73
+
74
+ # ── Jharkhand ─────────────────────────────────────────────────────────────
75
+ "Ranchi|Jharkhand": (23.35, 85.33), "Dhanbad|Jharkhand": (23.80, 86.45),
76
+ "Bokaro|Jharkhand": (23.67, 86.15), "Giridih|Jharkhand": (24.19, 86.30),
77
+ "Hazaribagh|Jharkhand": (23.99, 85.36), "Dumka|Jharkhand": (24.27, 87.25),
78
+ "Palamu|Jharkhand": (24.03, 84.08), "Gumla|Jharkhand": (23.05, 84.54),
79
+ "Pakur|Jharkhand": (24.63, 87.84), "Lohardaga|Jharkhand": (23.44, 84.68),
80
+
81
+ # ── Karnataka ─────────────────────────────────────────────────────────────
82
+ "Bangalore Rural|Karnataka": (13.01, 77.57), "Tumkur|Karnataka": (13.34, 77.10),
83
+ "Kolar|Karnataka": (13.14, 78.13), "Mysore|Karnataka": (12.30, 76.65),
84
+ "Mandya|Karnataka": (12.52, 76.90), "Hassan|Karnataka": (13.00, 76.10),
85
+ "Chikmagalur|Karnataka": (13.32, 75.78), "Shimoga|Karnataka": (13.93, 75.57),
86
+ "Dakshina Kannada|Karnataka": (12.85, 75.24), "Uttara Kannada|Karnataka": (14.79, 74.68),
87
+ "Raichur|Karnataka": (16.21, 77.36), "Koppal|Karnataka": (15.35, 76.15),
88
+ "Gadag|Karnataka": (15.42, 75.62), "Dharwad|Karnataka": (15.46, 75.01),
89
+ "Bagalkot|Karnataka": (16.18, 75.70), "Bijapur|Karnataka": (16.83, 75.72),
90
+ "Gulbarga|Karnataka": (17.34, 76.82), "Bidar|Karnataka": (17.91, 77.52),
91
+ "Bellary|Karnataka": (15.14, 76.92), "Chitradurga|Karnataka": (14.23, 76.40),
92
+ "Davangere|Karnataka": (14.46, 75.92), "Udupi|Karnataka": (13.34, 74.75),
93
+
94
+ # ── Kerala ────────────────────────────────────────────────────────────────
95
+ "Thiruvananthapuram|Kerala": (8.52, 76.94), "Kollam|Kerala": (8.88, 76.61),
96
+ "Pathanamthitta|Kerala": (9.27, 76.77), "Alappuzha|Kerala": (9.49, 76.32),
97
+ "Kottayam|Kerala": (9.59, 76.52), "Idukki|Kerala": (9.85, 77.10),
98
+ "Ernakulam|Kerala": (10.01, 76.31), "Thrissur|Kerala": (10.52, 76.22),
99
+ "Palakkad|Kerala": (10.77, 76.65), "Malappuram|Kerala": (11.07, 76.07),
100
+ "Kozhikode|Kerala": (11.25, 75.78), "Wayanad|Kerala": (11.61, 76.08),
101
+ "Kannur|Kerala": (11.87, 75.37), "Kasaragod|Kerala": (12.50, 74.99),
102
+
103
+ # ── Madhya Pradesh ────────────────────────────────────────────────────────
104
+ "Bhopal|Madhya Pradesh": (23.26, 77.41), "Indore|Madhya Pradesh": (22.72, 75.86),
105
+ "Jabalpur|Madhya Pradesh": (23.18, 79.99), "Gwalior|Madhya Pradesh": (26.22, 78.18),
106
+ "Sagar|Madhya Pradesh": (23.84, 78.74), "Rewa|Madhya Pradesh": (24.53, 81.30),
107
+ "Satna|Madhya Pradesh": (24.60, 80.83), "Ujjain|Madhya Pradesh": (23.18, 75.78),
108
+ "Chhindwara|Madhya Pradesh": (22.06, 78.94), "Shivpuri|Madhya Pradesh": (25.42, 77.66),
109
+ "Morena|Madhya Pradesh": (26.50, 78.00), "Bhind|Madhya Pradesh": (26.56, 78.78),
110
+ "Datia|Madhya Pradesh": (25.67, 78.46), "Chhatarpur|Madhya Pradesh": (24.92, 79.58),
111
+ "Tikamgarh|Madhya Pradesh": (24.74, 78.83), "Raisen|Madhya Pradesh": (22.99, 77.79),
112
+ "Vidisha|Madhya Pradesh": (23.52, 77.81), "Hoshangabad|Madhya Pradesh": (22.75, 77.73),
113
+ "Harda|Madhya Pradesh": (22.34, 77.09), "Betul|Madhya Pradesh": (21.91, 77.90),
114
+ "Balaghat|Madhya Pradesh": (21.81, 80.19), "Seoni|Madhya Pradesh": (22.09, 79.55),
115
+ "Mandla|Madhya Pradesh": (22.60, 80.38), "Dindori|Madhya Pradesh": (22.95, 81.08),
116
+ "Shahdol|Madhya Pradesh": (23.30, 81.36), "Anuppur|Madhya Pradesh": (23.10, 81.69),
117
+ "Umaria|Madhya Pradesh": (23.53, 80.84), "Katni|Madhya Pradesh": (23.83, 80.39),
118
+ "Panna|Madhya Pradesh": (24.72, 80.19), "Damoh|Madhya Pradesh": (23.83, 79.45),
119
+ "Narsinghpur|Madhya Pradesh": (22.95, 79.19), "Niwari|Madhya Pradesh": (25.01, 78.76),
120
+
121
+ # ── Maharashtra ───────────────────────────────────────────────────────────
122
+ "Ahmednagar|Maharashtra": (19.10, 74.74), "Akola|Maharashtra": (20.71, 77.00),
123
+ "Amravati|Maharashtra": (20.93, 77.75), "Aurangabad|Maharashtra": (19.88, 75.34),
124
+ "Beed|Maharashtra": (18.99, 75.75), "Bhandara|Maharashtra": (21.17, 79.65),
125
+ "Buldhana|Maharashtra": (20.53, 76.18), "Chandrapur|Maharashtra": (19.96, 79.30),
126
+ "Dhule|Maharashtra": (20.90, 74.78), "Gadchiroli|Maharashtra": (20.18, 80.00),
127
+ "Gondia|Maharashtra": (21.46, 80.20), "Hingoli|Maharashtra": (19.72, 77.15),
128
+ "Jalgaon|Maharashtra": (21.00, 75.57), "Jalna|Maharashtra": (19.84, 75.89),
129
+ "Kolhapur|Maharashtra": (16.70, 74.24), "Latur|Maharashtra": (18.40, 76.57),
130
+ "Mumbai City|Maharashtra": (18.96, 72.82), "Mumbai Suburban|Maharashtra": (19.17, 72.96),
131
+ "Nagpur|Maharashtra": (21.15, 79.09), "Nanded|Maharashtra": (19.15, 77.32),
132
+ "Nandurbar|Maharashtra": (21.37, 74.24), "Nashik|Maharashtra": (19.99, 73.79),
133
+ "Osmanabad|Maharashtra": (18.18, 76.04), "Palghar|Maharashtra": (19.70, 72.77),
134
+ "Parbhani|Maharashtra": (19.27, 76.77), "Pune|Maharashtra": (18.52, 73.86),
135
+ "Raigad|Maharashtra": (18.52, 73.18), "Ratnagiri|Maharashtra": (16.99, 73.30),
136
+ "Sangli|Maharashtra": (16.86, 74.56), "Satara|Maharashtra": (17.69, 74.00),
137
+ "Sindhudurg|Maharashtra": (16.35, 73.74), "Solapur|Maharashtra": (17.69, 75.91),
138
+ "Thane|Maharashtra": (19.22, 72.98), "Wardha|Maharashtra": (20.75, 78.60),
139
+ "Washim|Maharashtra": (20.11, 77.15), "Yavatmal|Maharashtra": (20.39, 78.13),
140
+
141
+ # ── Odisha ────────────────────────────────────────────────────────────────
142
+ "Bhubaneswar|Odisha": (20.30, 85.84), "Cuttack|Odisha": (20.46, 85.88),
143
+ "Balasore|Odisha": (21.49, 86.93), "Mayurbhanj|Odisha": (21.92, 86.73),
144
+ "Keonjhar|Odisha": (21.63, 85.58), "Sundargarh|Odisha": (22.12, 84.03),
145
+ "Sambalpur|Odisha": (21.47, 83.97), "Bargarh|Odisha": (21.33, 83.62),
146
+ "Bolangir|Odisha": (20.71, 83.49), "Kalahandi|Odisha": (19.91, 83.17),
147
+ "Koraput|Odisha": (18.81, 82.71), "Rayagada|Odisha": (19.17, 83.41),
148
+ "Ganjam|Odisha": (19.39, 84.70), "Puri|Odisha": (19.81, 85.83),
149
+ "Khordha|Odisha": (20.18, 85.62), "Jagatsinghpur|Odisha": (20.25, 86.18),
150
+ "Kendrapara|Odisha": (20.50, 86.42), "Jajpur|Odisha": (20.85, 86.33),
151
+
152
+ # ── Rajasthan ─────────────────────────────────────────────────────────────
153
+ "Jaipur|Rajasthan": (26.92, 75.79), "Jodhpur|Rajasthan": (26.29, 73.03),
154
+ "Udaipur|Rajasthan": (24.58, 73.69), "Kota|Rajasthan": (25.18, 75.84),
155
+ "Ajmer|Rajasthan": (26.45, 74.64), "Bikaner|Rajasthan": (28.02, 73.31),
156
+ "Alwar|Rajasthan": (27.57, 76.61), "Bharatpur|Rajasthan": (27.22, 77.49),
157
+ "Sikar|Rajasthan": (27.61, 75.14), "Nagaur|Rajasthan": (27.21, 73.74),
158
+ "Pali|Rajasthan": (25.77, 73.33), "Barmer|Rajasthan": (25.75, 71.39),
159
+ "Jaisalmer|Rajasthan": (26.92, 70.91), "Churu|Rajasthan": (28.30, 74.96),
160
+ "Jhunjhunu|Rajasthan": (28.13, 75.40), "Sirohi|Rajasthan": (24.89, 72.86),
161
+ "Banswara|Rajasthan": (23.54, 74.44), "Dungarpur|Rajasthan": (23.84, 73.71),
162
+ "Baran|Rajasthan": (25.10, 76.52), "Jhalawar|Rajasthan": (24.60, 76.16),
163
+ "Tonk|Rajasthan": (26.17, 75.79), "Sawai Madhopur|Rajasthan": (26.01, 76.35),
164
+ "Dausa|Rajasthan": (26.89, 76.34), "Karauli|Rajasthan": (26.50, 77.02),
165
+
166
+ # ── Tamil Nadu ────────────────────────────────────────────────────────────
167
+ "Chennai|Tamil Nadu": (13.08, 80.27), "Coimbatore|Tamil Nadu": (11.02, 76.97),
168
+ "Madurai|Tamil Nadu": (9.93, 78.12), "Tiruchirappalli|Tamil Nadu": (10.80, 78.69),
169
+ "Salem|Tamil Nadu": (11.65, 78.16), "Tirunelveli|Tamil Nadu": (8.73, 77.70),
170
+ "Vellore|Tamil Nadu": (12.92, 79.13), "Erode|Tamil Nadu": (11.34, 77.73),
171
+ "Thanjavur|Tamil Nadu": (10.79, 79.14), "Virudhunagar|Tamil Nadu": (9.58, 77.96),
172
+ "Ramanathapuram|Tamil Nadu": (9.37, 78.83), "Pudukkottai|Tamil Nadu": (10.38, 78.82),
173
+ "Dindigul|Tamil Nadu": (10.36, 77.98), "Dharmapuri|Tamil Nadu": (12.13, 78.16),
174
+ "Krishnagiri|Tamil Nadu": (12.52, 78.21), "Namakkal|Tamil Nadu": (11.22, 78.17),
175
+ "Nilgiris|Tamil Nadu": (11.47, 76.73), "Tiruppur|Tamil Nadu": (11.11, 77.34),
176
+ "Cuddalore|Tamil Nadu": (11.75, 79.77), "Villupuram|Tamil Nadu": (11.94, 79.49),
177
+ "Kancheepuram|Tamil Nadu": (12.83, 79.70), "Thiruvallur|Tamil Nadu": (13.15, 79.91),
178
+ "Tiruvannamalai|Tamil Nadu": (12.23, 79.07),
179
+
180
+ # ── Telangana ─────────────────────────────────────────────────────────────
181
+ "Hyderabad|Telangana": (17.38, 78.47), "Medchal|Telangana": (17.62, 78.48),
182
+ "Rangareddy|Telangana": (17.25, 78.38), "Nalgonda|Telangana": (17.05, 79.27),
183
+ "Warangal|Telangana": (17.97, 79.59), "Karimnagar|Telangana": (18.44, 79.13),
184
+ "Khammam|Telangana": (17.25, 80.15), "Nizamabad|Telangana": (18.67, 78.10),
185
+ "Adilabad|Telangana": (19.67, 78.53), "Mahabubnagar|Telangana": (16.74, 77.99),
186
+
187
+ # ── Uttar Pradesh ─────────────────────────────────────────────────────────
188
+ "Lucknow|Uttar Pradesh": (26.85, 80.95), "Kanpur Nagar|Uttar Pradesh": (26.45, 80.35),
189
+ "Agra|Uttar Pradesh": (27.18, 78.02), "Varanasi|Uttar Pradesh": (25.32, 83.01),
190
+ "Allahabad|Uttar Pradesh": (25.44, 81.85), "Meerut|Uttar Pradesh": (28.98, 77.71),
191
+ "Bareilly|Uttar Pradesh": (28.35, 79.43), "Gorakhpur|Uttar Pradesh": (26.76, 83.37),
192
+ "Mathura|Uttar Pradesh": (27.49, 77.67), "Muzaffarnagar|Uttar Pradesh": (29.47, 77.70),
193
+ "Shahjahanpur|Uttar Pradesh": (27.88, 79.91), "Sitapur|Uttar Pradesh": (27.57, 80.68),
194
+ "Lakhimpur Kheri|Uttar Pradesh": (27.94, 80.78), "Hardoi|Uttar Pradesh": (27.40, 80.13),
195
+ "Unnao|Uttar Pradesh": (26.54, 80.49), "Rae Bareli|Uttar Pradesh": (26.22, 81.24),
196
+ "Pratapgarh|Uttar Pradesh": (25.89, 81.99), "Jaunpur|Uttar Pradesh": (25.73, 82.69),
197
+ "Ghazipur|Uttar Pradesh": (25.58, 83.57), "Ballia|Uttar Pradesh": (25.75, 84.15),
198
+ "Azamgarh|Uttar Pradesh": (26.07, 83.18), "Mau|Uttar Pradesh": (25.94, 83.56),
199
+ "Deoria|Uttar Pradesh": (26.50, 83.78), "Basti|Uttar Pradesh": (26.79, 82.73),
200
+ "Siddharthnagar|Uttar Pradesh": (27.29, 83.07), "Maharajganj|Uttar Pradesh": (27.15, 83.56),
201
+ "Gonda|Uttar Pradesh": (27.13, 81.97), "Bahraich|Uttar Pradesh": (27.57, 81.60),
202
+ "Shravasti|Uttar Pradesh": (27.72, 81.87), "Balrampur|Uttar Pradesh": (27.43, 82.19),
203
+ "Barabanki|Uttar Pradesh": (26.94, 81.19), "Faizabad|Uttar Pradesh": (26.77, 82.14),
204
+ "Ambedkar Nagar|Uttar Pradesh": (26.43, 82.62), "Sultanpur|Uttar Pradesh": (26.26, 82.06),
205
+ "Banda|Uttar Pradesh": (25.48, 80.34), "Chitrakoot|Uttar Pradesh": (25.20, 80.90),
206
+ "Hamirpur|Uttar Pradesh": (25.95, 80.15), "Mahoba|Uttar Pradesh": (25.29, 79.87),
207
+ "Lalitpur|Uttar Pradesh": (24.69, 78.41), "Jhansi|Uttar Pradesh": (25.45, 78.57),
208
+ "Jalaun|Uttar Pradesh": (26.14, 79.34), "Etawah|Uttar Pradesh": (26.78, 79.02),
209
+ "Auraiya|Uttar Pradesh": (26.47, 79.51), "Kannauj|Uttar Pradesh": (27.05, 79.92),
210
+ "Farrukhabad|Uttar Pradesh": (27.38, 79.57), "Mainpuri|Uttar Pradesh": (27.23, 79.02),
211
+ "Firozabad|Uttar Pradesh": (27.15, 78.39), "Etah|Uttar Pradesh": (27.65, 78.67),
212
+ "Kasganj|Uttar Pradesh": (27.81, 78.65), "Hathras|Uttar Pradesh": (27.60, 78.06),
213
+ "Aligarh|Uttar Pradesh": (27.88, 78.07), "Bulandshahr|Uttar Pradesh": (28.41, 77.85),
214
+ "Hapur|Uttar Pradesh": (28.72, 77.78), "Gautam Buddha Nagar|Uttar Pradesh": (28.54, 77.39),
215
+ "Ghaziabad|Uttar Pradesh": (28.67, 77.44), "Bagpat|Uttar Pradesh": (28.94, 77.22),
216
+ "Bijnor|Uttar Pradesh": (29.37, 78.13), "Amroha|Uttar Pradesh": (28.91, 78.47),
217
+ "Sambhal|Uttar Pradesh": (28.59, 78.56), "Moradabad|Uttar Pradesh": (28.84, 78.77),
218
+ "Rampur|Uttar Pradesh": (28.81, 79.03), "Pilibhit|Uttar Pradesh": (28.64, 79.81),
219
+ "Budaun|Uttar Pradesh": (28.04, 79.13),
220
+
221
+ # ── West Bengal ───────────────────────────────────────────────────────────
222
+ "Kolkata|West Bengal": (22.57, 88.37), "Howrah|West Bengal": (22.59, 88.31),
223
+ "North 24 Parganas|West Bengal": (22.86, 88.54), "South 24 Parganas|West Bengal":(22.15, 88.27),
224
+ "Bardhaman|West Bengal": (23.23, 87.86), "Birbhum|West Bengal": (23.90, 87.53),
225
+ "Murshidabad|West Bengal": (24.18, 88.27), "Nadia|West Bengal": (23.47, 88.55),
226
+ "Hooghly|West Bengal": (22.96, 88.38), "Midnapore West|West Bengal": (22.43, 86.92),
227
+ "Midnapore East|West Bengal": (22.11, 87.67), "Bankura|West Bengal": (23.23, 87.07),
228
+ "Purulia|West Bengal": (23.33, 86.36), "Malda|West Bengal": (25.00, 88.14),
229
+ "Dinajpur North|West Bengal": (25.62, 88.43), "Dinajpur South|West Bengal": (25.29, 88.68),
230
+ "Jalpaiguri|West Bengal": (26.54, 88.73), "Darjeeling|West Bengal": (27.04, 88.26),
231
+ "Cooch Behar|West Bengal": (26.32, 89.45),
232
+
233
+ # ── Himachal Pradesh ──────────────────────────────────────────────────────
234
+ "Shimla|Himachal Pradesh": (31.10, 77.17), "Kangra|Himachal Pradesh": (32.10, 76.27),
235
+ "Mandi|Himachal Pradesh": (31.71, 76.93), "Hamirpur|Himachal Pradesh": (31.69, 76.52),
236
+ "Una|Himachal Pradesh": (31.46, 76.27), "Chamba|Himachal Pradesh": (32.55, 76.13),
237
+ "Solan|Himachal Pradesh": (30.91, 77.10), "Sirmaur|Himachal Pradesh": (30.56, 77.46),
238
+ "Bilaspur|Himachal Pradesh": (31.34, 76.76), "Kinnaur|Himachal Pradesh": (31.59, 78.45),
239
+ "Kullu|Himachal Pradesh": (31.96, 77.11), "Lahul Spiti|Himachal Pradesh":(32.77, 77.67),
240
+
241
+ # ── Uttarakhand ───────────────────────────────────────────────────────────
242
+ "Dehradun|Uttarakhand": (30.32, 78.03), "Haridwar|Uttarakhand": (29.96, 78.16),
243
+ "Nainital|Uttarakhand": (29.38, 79.46), "Udham Singh Nagar|Uttarakhand":(29.00, 79.52),
244
+ "Almora|Uttarakhand": (29.60, 79.66), "Pauri Garhwal|Uttarakhand": (29.78, 79.01),
245
+ "Tehri Garhwal|Uttarakhand": (30.39, 78.48), "Chamoli|Uttarakhand": (30.41, 79.32),
246
+ "Rudraprayag|Uttarakhand": (30.28, 78.98), "Uttarkashi|Uttarakhand": (30.73, 78.44),
247
+ "Bageshwar|Uttarakhand": (29.84, 79.77), "Pithoragarh|Uttarakhand": (29.58, 80.22),
248
+ "Champawat|Uttarakhand": (29.33, 80.09),
249
+
250
+ # ── Punjab ────────────────────────────────────────────────────────────────
251
+ "Amritsar|Punjab": (31.63, 74.87), "Ludhiana|Punjab": (30.90, 75.85),
252
+ "Jalandhar|Punjab": (31.33, 75.58), "Patiala|Punjab": (30.34, 76.39),
253
+ "Bathinda|Punjab": (30.21, 74.95), "Gurdaspur|Punjab": (32.04, 75.41),
254
+ "Firozpur|Punjab": (30.93, 74.61), "Hoshiarpur|Punjab": (31.53, 75.91),
255
+ "Rupnagar|Punjab": (30.96, 76.53), "Sangrur|Punjab": (30.25, 75.84),
256
+ "Moga|Punjab": (30.82, 75.17), "Faridkot|Punjab": (30.67, 74.76),
257
+ "Muktsar|Punjab": (30.48, 74.52), "Fazilka|Punjab": (30.40, 74.02),
258
+ "Nawanshahr|Punjab": (31.12, 76.12), "Kapurthala|Punjab": (31.38, 75.38),
259
+
260
+ # ── Jharkhand extra ───────────────────────────────────────────────────────
261
+ "Chatra|Jharkhand": (24.21, 84.88), "Koderma|Jharkhand": (24.47, 85.60),
262
+ "Simdega|Jharkhand": (22.61, 84.51), "Khunti|Jharkhand": (23.07, 85.28),
263
+ "Ramgarh|Jharkhand": (23.63, 85.51), "Jamtara|Jharkhand": (23.96, 86.80),
264
+ "Sahibganj|Jharkhand": (24.96, 87.63), "Godda|Jharkhand": (24.83, 87.21),
265
+ "Deoghar|Jharkhand": (24.48, 86.70),
266
+
267
+ # ── Generic fallback centroids for states ─────────────────────────────────
268
+ "Unknown|Andhra Pradesh": (15.9, 79.7),
269
+ "Unknown|Assam": (26.2, 92.9),
270
+ "Unknown|Bihar": (25.1, 85.3),
271
+ "Unknown|Chhattisgarh": (21.3, 81.7),
272
+ "Unknown|Gujarat": (22.3, 71.2),
273
+ "Unknown|Haryana": (29.1, 76.1),
274
+ "Unknown|Jharkhand": (23.6, 85.3),
275
+ "Unknown|Karnataka": (15.3, 75.7),
276
+ "Unknown|Kerala": (10.9, 76.3),
277
+ "Unknown|Madhya Pradesh": (22.9, 78.7),
278
+ "Unknown|Maharashtra": (19.7, 75.7),
279
+ "Unknown|Odisha": (20.9, 85.1),
280
+ "Unknown|Rajasthan": (27.0, 74.2),
281
+ "Unknown|Tamil Nadu": (11.1, 78.7),
282
+ "Unknown|Telangana": (17.4, 79.1),
283
+ "Unknown|Uttar Pradesh": (26.8, 80.9),
284
+ "Unknown|West Bengal": (22.9, 87.9),
285
+ }
286
+
287
+
288
+ def get_coords(district: str, state: str) -> tuple[float, float]:
289
+ """Return (lat, lon) for a district, with fallback to state centroid."""
290
+ rng = np.random.default_rng(abs(hash(f"{district}{state}")) % (2**31))
291
+ key = f"{district}|{state}"
292
+ if key in DISTRICT_COORDS:
293
+ lat, lon = DISTRICT_COORDS[key]
294
+ lat += rng.uniform(-0.08, 0.08)
295
+ lon += rng.uniform(-0.08, 0.08)
296
+ return lat, lon
297
+ # Fallback: state centroid + jitter
298
+ fb_key = f"Unknown|{state}"
299
+ lat, lon = DISTRICT_COORDS.get(fb_key, (22.0, 78.0))
300
+ lat += rng.uniform(-1.2, 1.2)
301
+ lon += rng.uniform(-1.2, 1.2)
302
+ return lat, lon
303
+
304
+
305
+ # ── Controls ──────────────────────────────────────────────────────────────────
306
+ states = fetch_states()
307
+ if not states:
308
+ st.error("⚠️ API offline β€” run `uvicorn backend.main:app --port 8000`")
309
+ st.stop()
310
+
311
+ cc1, cc2, cc3 = st.columns(3)
312
+ with cc1:
313
+ state_filter = st.selectbox("State Filter", ["All India"] + states)
314
+ with cc2:
315
+ map_metric = st.selectbox("Bubble Color / Size", [
316
+ "Predicted Person-Days",
317
+ "Prediction Error",
318
+ "Budget Gain (LP Optimizer)",
319
+ "Actual Person-Days",
320
+ ])
321
+ with cc3:
322
+ year_opts = []
323
+ _df_raw = fetch_predictions()
324
+ if not _df_raw.empty:
325
+ year_opts = sorted(_df_raw["financial_year"].unique().tolist())
326
+ selected_year = st.selectbox("Financial Year", year_opts if year_opts else ["β€”"])
327
+
328
+ # ── Fetch & merge data ────────────────────────────────────────────────────────
329
+ pred_df = fetch_predictions(
330
+ state=None if state_filter == "All India" else state_filter,
331
+ year=int(selected_year) if selected_year != "β€”" else None,
332
+ )
333
+ opt_df = fetch_optimizer_results(
334
+ state=None if state_filter == "All India" else state_filter,
335
+ )
336
+
337
+ if pred_df.empty:
338
+ st.info("No prediction data for selected filters. Ensure the pipeline has run.")
339
+ st.stop()
340
+
341
+ # Merge optimizer results in if available
342
+ if not opt_df.empty:
343
+ merge_cols = ["state", "district"]
344
+ opt_sub = opt_df[merge_cols + [
345
+ c for c in ["persondays_gain", "budget_change_pct", "persondays_per_lakh",
346
+ "budget_allocated_lakhs", "optimized_budget"]
347
+ if c in opt_df.columns
348
+ ]].drop_duplicates(subset=merge_cols)
349
+ pred_df = pred_df.merge(opt_sub, on=merge_cols, how="left")
350
+
351
+ # Pick what to color by
352
+ COLOR_MAP = {
353
+ "Predicted Person-Days": "predicted_persondays",
354
+ "Prediction Error": "prediction_error",
355
+ "Budget Gain (LP Optimizer)": "persondays_gain",
356
+ "Actual Person-Days": "person_days_lakhs",
357
+ }
358
+ color_col = COLOR_MAP[map_metric]
359
+ if color_col not in pred_df.columns:
360
+ color_col = "predicted_persondays"
361
+
362
+ # ── Build map data ────────────────────────────────────────────────────────────
363
+ lats, lons, colors, sizes = [], [], [], []
364
+ hover_data = []
365
+
366
+ for _, row in pred_df.iterrows():
367
+ lat, lon = get_coords(str(row["district"]), str(row["state"]))
368
+ lats.append(lat)
369
+ lons.append(lon)
370
+ colors.append(float(row.get(color_col, 0) or 0))
371
+ sizes.append(max(float(row.get("predicted_persondays", 1) or 1), 0.1))
372
+ hover_data.append(row)
373
+
374
+ # Normalize sizes for bubble radius
375
+ sz_arr = np.array(sizes)
376
+ sz_min, sz_max = sz_arr.min(), sz_arr.max()
377
+ norm_sz = np.clip((sz_arr - sz_min) / (sz_max - sz_min + 1e-9) * 13 + 4, 4, 17).tolist()
378
+
379
+ # ── Choose colorscale based on metric ────────────────────────────────────────
380
+ if color_col == "prediction_error":
381
+ cscale = [[0, RED], [0.5, "#FED7AA"], [1, "#FED7AA"]]
382
+ cscale = [[0, RED], [0.5, "#FAFAF9"], [1, GREEN]]
383
+ elif color_col == "persondays_gain":
384
+ cscale = [[0, RED], [0.5, "#FFF7ED"], [1, GREEN]]
385
+ else:
386
+ cscale = SAFFRON_SCALE
387
+
388
+ # ── Build hover template ──────────────────────────────────────────────────────
389
+ # customdata columns: 0=district, 1=state, 2=fy, 3=actual, 4=predicted,
390
+ # 5=error, 6=persondays_gain, 7=budget_chg_pct,
391
+ # 8=persondays_per_lakh, 9=budget_allocated
392
+ custom = []
393
+ for row in hover_data:
394
+ custom.append([
395
+ str(row.get("district", "")),
396
+ str(row.get("state", "")),
397
+ int(row.get("financial_year", 0)),
398
+ float(row.get("person_days_lakhs", 0) or 0),
399
+ float(row.get("predicted_persondays", 0) or 0),
400
+ float(row.get("prediction_error", 0) or 0),
401
+ float(row.get("persondays_gain", 0) or 0),
402
+ float(row.get("budget_change_pct", 0) or 0),
403
+ float(row.get("persondays_per_lakh", 0) or 0),
404
+ float(row.get("budget_allocated_lakhs", 0) or 0),
405
+ ])
406
+
407
+ hover_tmpl = (
408
+ "<b>%{customdata[0]}</b><br>"
409
+ "<span style='color:#A8A29E'>%{customdata[1]}</span><br>"
410
+ "<br>"
411
+ "<b>FY:</b> %{customdata[2]}<br>"
412
+ "<b>Actual PD:</b> %{customdata[3]:.2f}L<br>"
413
+ "<b>Predicted PD:</b> %{customdata[4]:.2f}L<br>"
414
+ "<b>Model Error:</b> %{customdata[5]:+.2f}L<br>"
415
+ "<br>"
416
+ "<b>LP Optimizer</b><br>"
417
+ "<b>PD Gain:</b> %{customdata[6]:+.2f}L<br>"
418
+ "<b>Budget Ξ”:</b> %{customdata[7]:+.1f}%<br>"
419
+ "<b>Efficiency:</b> %{customdata[8]:.4f} PD/β‚ΉL<br>"
420
+ "<b>Budget:</b> β‚Ή%{customdata[9]:,.0f}L"
421
+ "<extra></extra>"
422
+ )
423
+
424
+ fig = go.Figure()
425
+ fig.add_scattergeo(
426
+ lat=lats, lon=lons,
427
+ mode="markers",
428
+ marker=dict(
429
+ size=norm_sz,
430
+ color=colors,
431
+ colorscale=cscale,
432
+ colorbar=dict(
433
+ title=dict(text=map_metric[:12], font=dict(color="#78716C", size=9)),
434
+ tickfont=dict(color="#78716C", size=8),
435
+ thickness=10, len=0.55,
436
+ bgcolor="rgba(255,255,255,0.88)",
437
+ ),
438
+ opacity=0.80,
439
+ line=dict(width=0.8, color="rgba(255,255,255,0.7)"),
440
+ ),
441
+ customdata=custom,
442
+ hovertemplate=hover_tmpl,
443
+ )
444
+
445
+ fig.update_geos(
446
+ scope="asia",
447
+ showland=True, landcolor="#F5F5F4",
448
+ showocean=True, oceancolor="#EFF6FF",
449
+ showcountries=True, countrycolor="#D6D3D1",
450
+ showsubunits=True, subunitcolor="#E7E5E4",
451
+ showrivers=True, rivercolor="#DBEAFE",
452
+ center=dict(lat=22, lon=80),
453
+ projection_scale=5.0,
454
+ bgcolor="rgba(0,0,0,0)",
455
+ )
456
+ fig.update_layout(
457
+ height=620,
458
+ paper_bgcolor="rgba(0,0,0,0)",
459
+ margin=dict(l=0, r=0, t=10, b=0),
460
+ font=dict(family="DM Mono, monospace", color="#1C1917"),
461
+ showlegend=False,
462
+ hoverlabel=dict(
463
+ bgcolor="#1C1917",
464
+ bordercolor="#1C1917",
465
+ font=dict(family="DM Mono, monospace", size=11, color="#FAF9F7"),
466
+ ),
467
+ )
468
+ st.plotly_chart(fig, use_container_width=True, config={"displayModeBar": False})
469
+
470
+ # ── Caption ───────────────────────────────────────────────────────────────────
471
+ n_mapped = len([c for c in custom if c[0]])
472
+ year_label = selected_year if selected_year != "β€”" else "all years"
473
+ st.caption(
474
+ f"{n_mapped} districts Β· FY {year_label} Β· "
475
+ f"Bubble size ∝ predicted person-days · Hover for full model details"
476
+ )
477
+
478
+ # ── Summary cards below map ───────────────────────────────────────────────────
479
+ st.markdown("<div style='margin-top:1.5rem'></div>", unsafe_allow_html=True)
480
+ section_label("Prediction Summary for Filtered View")
481
+
482
+ c1, c2, c3, c4 = st.columns(4)
483
+ total_pred = pred_df["predicted_persondays"].sum()
484
+ total_act = pred_df["person_days_lakhs"].sum()
485
+ mean_err = pred_df["prediction_error"].mean()
486
+ gain_total = pred_df["persondays_gain"].sum() if "persondays_gain" in pred_df.columns else 0
487
+
488
+ c1.metric("Total Predicted PD", f"{total_pred:,.1f}L")
489
+ c2.metric("Total Actual PD", f"{total_act:,.1f}L")
490
+ c3.metric("Mean Model Error", f"{mean_err:+.3f}L")
491
+ c4.metric("Total LP Gain", f"{gain_total:+,.1f}L")
frontend/theme.py ADDED
@@ -0,0 +1,297 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ theme.py β€” SchemeImpactNet shared design system
3
+ Editorial / policy-brief aesthetic.
4
+ Fonts: Fraunces (display) + Source Serif 4 (body) + DM Mono (data/labels)
5
+ Palette: warm off-white #FAF9F7, deep stone #1C1917, saffron accent #FB923C
6
+ """
7
+
8
+ THEME_CSS = """
9
+ <style>
10
+ @import url('https://fonts.googleapis.com/css2?family=Fraunces:ital,opsz,wght@0,9..144,300;0,9..144,600;0,9..144,700;1,9..144,300&family=Source+Serif+4:ital,opsz,wght@0,8..60,300;0,8..60,400;0,8..60,600&family=DM+Mono:wght@400;500&display=swap');
11
+
12
+ html, body, [class*="css"] {
13
+ font-family: 'Source Serif 4', Georgia, serif !important;
14
+ }
15
+ .stApp {
16
+ background-color: #FAF9F7 !important;
17
+ }
18
+ #MainMenu, footer, header { visibility: hidden; }
19
+
20
+ .block-container {
21
+ padding: 2rem 2.5rem 3rem !important;
22
+ max-width: 1320px !important;
23
+ }
24
+
25
+ /* ── Sidebar ── */
26
+ [data-testid="stSidebar"] {
27
+ background: #1C1917 !important;
28
+ border-right: none !important;
29
+ }
30
+ [data-testid="stSidebarContent"] {
31
+ background: #1C1917 !important;
32
+ }
33
+
34
+ /* Nav links generated by st.navigation */
35
+ [data-testid="stSidebarNavLink"] {
36
+ border-radius: 5px !important;
37
+ padding: 0.5rem 1rem !important;
38
+ font-family: 'DM Mono', monospace !important;
39
+ font-size: 0.7rem !important;
40
+ letter-spacing: 0.5px !important;
41
+ color: #A8A29E !important;
42
+ text-decoration: none !important;
43
+ transition: all 0.15s ease !important;
44
+ border-left: 2px solid transparent !important;
45
+ }
46
+ [data-testid="stSidebarNavLink"]:hover {
47
+ background: rgba(251,146,60,0.1) !important;
48
+ color: #FB923C !important;
49
+ border-left-color: rgba(251,146,60,0.4) !important;
50
+ }
51
+ [data-testid="stSidebarNavLink"][aria-current="page"] {
52
+ background: rgba(251,146,60,0.15) !important;
53
+ color: #FB923C !important;
54
+ border-left-color: #FB923C !important;
55
+ }
56
+
57
+ /* ── Typography ── */
58
+ h1, h2, h3 {
59
+ font-family: 'Fraunces', serif !important;
60
+ color: #1C1917 !important;
61
+ }
62
+ h1 { font-size: 2.2rem !important; font-weight: 600 !important; line-height: 1.15 !important; }
63
+ h2 { font-size: 1.5rem !important; font-weight: 600 !important; }
64
+ h3 { font-size: 1.1rem !important; font-weight: 600 !important; }
65
+ p { font-family: 'Source Serif 4', serif !important; color: #292524 !important; }
66
+
67
+ /* ── Metric cards ── */
68
+ [data-testid="stMetric"] {
69
+ background: #FFFFFF !important;
70
+ border: 1px solid #E7E5E4 !important;
71
+ border-radius: 8px !important;
72
+ padding: 1rem 1.2rem !important;
73
+ }
74
+ [data-testid="stMetricLabel"] p {
75
+ font-family: 'DM Mono', monospace !important;
76
+ font-size: 0.62rem !important;
77
+ letter-spacing: 2px !important;
78
+ text-transform: uppercase !important;
79
+ color: #78716C !important;
80
+ }
81
+ [data-testid="stMetricValue"] {
82
+ font-family: 'Fraunces', serif !important;
83
+ font-size: 1.85rem !important;
84
+ font-weight: 600 !important;
85
+ color: #1C1917 !important;
86
+ line-height: 1.2 !important;
87
+ }
88
+ [data-testid="stMetricDelta"] {
89
+ font-family: 'DM Mono', monospace !important;
90
+ font-size: 0.7rem !important;
91
+ }
92
+
93
+ /* ── Inputs ── */
94
+ [data-testid="stSelectbox"] label p,
95
+ [data-testid="stSlider"] label p,
96
+ [data-testid="stTextInput"] label p,
97
+ [data-testid="stMultiSelect"] label p {
98
+ font-family: 'DM Mono', monospace !important;
99
+ font-size: 0.65rem !important;
100
+ letter-spacing: 1.5px !important;
101
+ text-transform: uppercase !important;
102
+ color: #78716C !important;
103
+ }
104
+
105
+ /* ── Buttons ── */
106
+ .stButton > button {
107
+ font-family: 'DM Mono', monospace !important;
108
+ font-size: 0.7rem !important;
109
+ letter-spacing: 1px !important;
110
+ text-transform: uppercase !important;
111
+ background: #1C1917 !important;
112
+ color: #FAF9F7 !important;
113
+ border: none !important;
114
+ border-radius: 6px !important;
115
+ padding: 0.5rem 1.2rem !important;
116
+ }
117
+ .stButton > button:hover {
118
+ background: #FB923C !important;
119
+ }
120
+
121
+ /* ── Dataframes ── */
122
+ [data-testid="stDataFrame"] {
123
+ border: 1px solid #E7E5E4 !important;
124
+ border-radius: 8px !important;
125
+ overflow: hidden !important;
126
+ }
127
+ [data-testid="stDataFrame"] th {
128
+ font-family: 'DM Mono', monospace !important;
129
+ font-size: 0.65rem !important;
130
+ letter-spacing: 1px !important;
131
+ text-transform: uppercase !important;
132
+ background: #F5F5F4 !important;
133
+ color: #57534E !important;
134
+ }
135
+
136
+ /* ── Expander ── */
137
+ [data-testid="stExpander"] {
138
+ border: 1px solid #E7E5E4 !important;
139
+ border-radius: 8px !important;
140
+ background: #FFFFFF !important;
141
+ }
142
+ details summary p {
143
+ font-family: 'DM Mono', monospace !important;
144
+ font-size: 0.72rem !important;
145
+ letter-spacing: 0.5px !important;
146
+ color: #57534E !important;
147
+ }
148
+
149
+ /* ── Alerts ── */
150
+ [data-testid="stAlert"] {
151
+ border-radius: 8px !important;
152
+ }
153
+
154
+ /* ── Caption ── */
155
+ [data-testid="stCaptionContainer"] p {
156
+ font-family: 'DM Mono', monospace !important;
157
+ font-size: 0.63rem !important;
158
+ color: #A8A29E !important;
159
+ letter-spacing: 0.3px !important;
160
+ }
161
+
162
+ /* ── Divider ── */
163
+ hr {
164
+ border: none !important;
165
+ border-top: 1px solid #E7E5E4 !important;
166
+ margin: 1.5rem 0 !important;
167
+ }
168
+
169
+ /* ── Tab strip ── */
170
+ [data-testid="stTabs"] [role="tab"] {
171
+ font-family: 'DM Mono', monospace !important;
172
+ font-size: 0.68rem !important;
173
+ letter-spacing: 1px !important;
174
+ text-transform: uppercase !important;
175
+ }
176
+ </style>
177
+ """
178
+
179
+ # ── Plotly shared layout (light, editorial) ───────────────────────────────────
180
+ PLOTLY_LAYOUT = dict(
181
+ paper_bgcolor="#FFFFFF",
182
+ plot_bgcolor="#FAFAF9",
183
+ font=dict(family="DM Mono, monospace", color="#292524", size=10.5),
184
+ margin=dict(l=0, r=0, t=44, b=0),
185
+ legend=dict(
186
+ bgcolor="rgba(255,255,255,0.92)",
187
+ bordercolor="#E7E5E4", borderwidth=1,
188
+ font=dict(size=10),
189
+ ),
190
+ xaxis=dict(
191
+ gridcolor="#F5F5F4", linecolor="#E7E5E4",
192
+ tickfont=dict(color="#78716C", size=10),
193
+ title_font=dict(color="#57534E", size=11),
194
+ zerolinecolor="#E7E5E4",
195
+ ),
196
+ yaxis=dict(
197
+ gridcolor="#F5F5F4", linecolor="#E7E5E4",
198
+ tickfont=dict(color="#78716C", size=10),
199
+ title_font=dict(color="#57534E", size=11),
200
+ zerolinecolor="#E7E5E4",
201
+ ),
202
+ )
203
+
204
+ # ── Colour tokens ─────────────────────────────────────────────────────────────
205
+ SAFFRON = "#FB923C" # primary accent
206
+ SAFFRON_D = "#EA580C" # darker saffron
207
+ SLATE = "#1C1917" # near-black
208
+ STONE = "#78716C" # muted label
209
+ BORDER = "#E7E5E4"
210
+ BG = "#FAF9F7"
211
+ WHITE = "#FFFFFF"
212
+ GREEN = "#16A34A"
213
+ RED = "#DC2626"
214
+ AMBER = "#D97706"
215
+ BLUE = "#2563EB"
216
+
217
+ # ── Saffron scale for choropleth / sequential maps ───────────────────────────
218
+ SAFFRON_SCALE = [
219
+ [0.0, "#FFF7ED"],
220
+ [0.25, "#FED7AA"],
221
+ [0.5, "#FB923C"],
222
+ [0.75, "#EA580C"],
223
+ [1.0, "#7C2D12"],
224
+ ]
225
+
226
+
227
+ # ── Helpers ───────────────────────────────────────────────────────────────────
228
+ def inject_theme():
229
+ import streamlit as st
230
+ st.markdown(THEME_CSS, unsafe_allow_html=True)
231
+
232
+
233
+ def page_header(eyebrow: str, title: str, subtitle: str = ""):
234
+ import streamlit as st
235
+ sub_html = (
236
+ f'<p style="font-family:\'Source Serif 4\',serif; font-size:0.92rem; '
237
+ f'color:#78716C; margin:6px 0 0 0; line-height:1.5;">{subtitle}</p>'
238
+ if subtitle else ""
239
+ )
240
+ st.markdown(f"""
241
+ <div style="margin-bottom:1.75rem; padding-bottom:1.25rem; border-bottom:2px solid #E7E5E4;">
242
+ <p style="font-family:'DM Mono',monospace; font-size:0.58rem; letter-spacing:3.5px;
243
+ text-transform:uppercase; color:#FB923C; margin:0 0 7px 0;">{eyebrow}</p>
244
+ <h1 style="font-family:'Fraunces',serif; font-size:2.1rem; font-weight:600;
245
+ color:#1C1917; margin:0; line-height:1.15;">{title}</h1>
246
+ {sub_html}
247
+ </div>""", unsafe_allow_html=True)
248
+
249
+
250
+ def section_label(text: str):
251
+ import streamlit as st
252
+ st.markdown(
253
+ f'<p style="font-family:\'DM Mono\',monospace; font-size:0.58rem; '
254
+ f'letter-spacing:3px; text-transform:uppercase; color:#A8A29E; '
255
+ f'margin:0 0 10px 0; padding-bottom:8px; border-bottom:1px solid #F5F5F4;">'
256
+ f'{text}</p>',
257
+ unsafe_allow_html=True,
258
+ )
259
+
260
+
261
+ def kpi_html(value: str, label: str, color: str = "#1C1917", note: str = "") -> str:
262
+ note_html = (
263
+ f'<p style="font-family:\'DM Mono\',monospace; font-size:0.62rem; '
264
+ f'color:#A8A29E; margin:3px 0 0 0;">{note}</p>'
265
+ if note else ""
266
+ )
267
+ return f"""
268
+ <div style="background:#FFFFFF; border:1px solid #E7E5E4; border-radius:8px; padding:1rem 1.25rem;">
269
+ <p style="font-family:'DM Mono',monospace; font-size:0.58rem; letter-spacing:2.5px;
270
+ text-transform:uppercase; color:#A8A29E; margin:0 0 5px 0;">{label}</p>
271
+ <p style="font-family:'Fraunces',serif; font-size:1.9rem; font-weight:600;
272
+ color:{color}; line-height:1; margin:0;">{value}</p>
273
+ {note_html}
274
+ </div>"""
275
+
276
+
277
+ def signal_card_html(value: str, title: str, body: str, accent: str = "#FB923C") -> str:
278
+ return f"""
279
+ <div style="background:#FFFFFF; border:1px solid #E7E5E4; border-left:3px solid {accent};
280
+ border-radius:8px; padding:0.85rem 1rem; margin-bottom:7px;
281
+ display:flex; align-items:center; gap:0.9rem;">
282
+ <span style="font-family:'Fraunces',serif; font-size:1.55rem; font-weight:600;
283
+ color:{accent}; min-width:56px; text-align:right; flex-shrink:0;">{value}</span>
284
+ <div>
285
+ <p style="font-family:'DM Mono',monospace; font-size:0.6rem; letter-spacing:1.2px;
286
+ text-transform:uppercase; color:#57534E; margin:0 0 2px 0;">{title}</p>
287
+ <p style="font-family:'Source Serif 4',serif; font-size:0.78rem;
288
+ color:#A8A29E; margin:0; line-height:1.4;">{body}</p>
289
+ </div>
290
+ </div>"""
291
+
292
+
293
+ # NOTE: inject_theme() is now a no-op for page files.
294
+ # All CSS is injected once in app.py before st.navigation() runs,
295
+ # which means it persists across every page automatically.
296
+ def inject_theme():
297
+ pass # CSS already injected globally by app.py
frontend/utils/api_client.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ utils/api_client.py
3
+ --------------------
4
+ Centralized, cached API wrappers.
5
+
6
+ HF Spaces compatible: reads API_URL from environment variable so the
7
+ same code works locally (localhost:8000) and on HuggingFace (localhost:8000
8
+ since both services run in the same container).
9
+ """
10
+
11
+ import os
12
+ import requests
13
+ import pandas as pd
14
+ import streamlit as st
15
+
16
+ # HF Spaces: backend always on localhost:8000 inside the container
17
+ API = os.environ.get("API_URL", "http://localhost:8000")
18
+ TIMEOUT = 15
19
+
20
+
21
+ @st.cache_data(ttl=300)
22
+ def _get(endpoint: str, params: dict | None = None):
23
+ """Raw cached GET β€” returns JSON or None on any error."""
24
+ try:
25
+ r = requests.get(f"{API}{endpoint}", params=params or {}, timeout=TIMEOUT)
26
+ r.raise_for_status()
27
+ return r.json()
28
+ except requests.exceptions.ConnectionError:
29
+ return None
30
+ except requests.exceptions.Timeout:
31
+ return None
32
+ except Exception:
33
+ return None
34
+
35
+
36
+ def _df(data) -> pd.DataFrame:
37
+ if not data:
38
+ return pd.DataFrame()
39
+ if isinstance(data, list):
40
+ return pd.DataFrame(data)
41
+ if isinstance(data, dict):
42
+ return pd.DataFrame([data])
43
+ return pd.DataFrame()
44
+
45
+
46
+ # ── Health ─────────────────────────────────────────────────────────────────────
47
+ def is_online() -> bool:
48
+ try:
49
+ requests.get(f"{API}/health", timeout=5)
50
+ return True
51
+ except Exception:
52
+ return False
53
+
54
+
55
+ # ── /districts/* ───────────────────────────────────────────────────────────────
56
+ def fetch_stats() -> dict:
57
+ return _get("/districts/stats") or {}
58
+
59
+
60
+ def fetch_states() -> list[str]:
61
+ return _get("/districts/states") or []
62
+
63
+
64
+ def fetch_districts(state: str) -> list[str]:
65
+ return _get("/districts/list", {"state": state}) or []
66
+
67
+
68
+ def fetch_district_history(state: str, district: str) -> pd.DataFrame:
69
+ return _df(_get("/districts/history", {"state": state, "district": district}))
70
+
71
+
72
+ def fetch_top_districts(
73
+ state: str | None = None,
74
+ metric: str = "person_days_lakhs",
75
+ n: int = 12,
76
+ ) -> pd.DataFrame:
77
+ params = {"metric": metric, "n": n}
78
+ if state:
79
+ params["state"] = state
80
+ return _df(_get("/districts/top", params))
81
+
82
+
83
+ def fetch_yearly_trend(state: str | None = None) -> pd.DataFrame:
84
+ params = {"state": state} if state else {}
85
+ return _df(_get("/districts/trend", params))
86
+
87
+
88
+ # ── /predictions/* ─────────────────────────────────────────────────────────────
89
+ def fetch_predictions(
90
+ state: str | None = None,
91
+ district: str | None = None,
92
+ year: int | None = None,
93
+ ) -> pd.DataFrame:
94
+ params = {}
95
+ if state: params["state"] = state
96
+ if district: params["district"] = district
97
+ if year: params["year"] = year
98
+ return _df(_get("/predictions/", params))
99
+
100
+
101
+ # ── /optimizer/* ───────────────────────────────────────────────────────────────
102
+ def fetch_optimizer_results(state: str | None = None) -> pd.DataFrame:
103
+ params = {"state": state} if state else {}
104
+ return _df(_get("/optimizer/results", params))
105
+
106
+
107
+ def run_optimizer_live(
108
+ state: str | None = None,
109
+ budget_scale: float = 1.0,
110
+ min_fraction: float = 0.40,
111
+ max_fraction: float = 2.50,
112
+ ) -> dict | None:
113
+ payload = {
114
+ "state": state,
115
+ "budget_scale": budget_scale,
116
+ "min_fraction": min_fraction,
117
+ "max_fraction": max_fraction,
118
+ }
119
+ try:
120
+ r = requests.post(f"{API}/optimizer/run", json=payload, timeout=60)
121
+ r.raise_for_status()
122
+ return r.json()
123
+ except requests.exceptions.ConnectionError:
124
+ st.error("Cannot reach API β€” backend may still be starting up, refresh in a moment.")
125
+ return None
126
+ except Exception as e:
127
+ st.error(f"Optimizer error: {e}")
128
+ return None
hf_start.sh ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # hf_start.sh β€” SchemeImpactNet HuggingFace Spaces entrypoint
3
+ # Runs pipeline (if needed), starts FastAPI on 8000, Streamlit on 7860
4
+
5
+ set -euo pipefail
6
+
7
+ echo "============================================================"
8
+ echo " SchemeImpactNet β€” HuggingFace Spaces Startup"
9
+ echo "============================================================"
10
+
11
+ cd /app
12
+
13
+ # ── Step 1: Generate / verify processed data ─────────────────────────────────
14
+ echo ""
15
+ echo "β†’ Checking processed data..."
16
+
17
+ NEEDS_PIPELINE=false
18
+ for f in data/processed/mnrega_cleaned.csv \
19
+ data/processed/mnrega_predictions.csv \
20
+ data/processed/optimized_budget_allocation.csv; do
21
+ if [[ ! -f "$f" ]]; then
22
+ echo " Missing: $f"
23
+ NEEDS_PIPELINE=true
24
+ fi
25
+ done
26
+
27
+ if [[ "$NEEDS_PIPELINE" == true ]]; then
28
+ echo "β†’ Running data pipeline (Stage 3)..."
29
+ python main.py --stage 3
30
+ echo "βœ“ Pipeline complete"
31
+ else
32
+ echo "βœ“ Processed data found β€” skipping pipeline"
33
+ fi
34
+
35
+ # ── Step 2: Start FastAPI backend on port 8000 (background) ──────────────────
36
+ echo ""
37
+ echo "β†’ Starting FastAPI backend on port 8000..."
38
+ python -m uvicorn backend.main:app \
39
+ --host 0.0.0.0 \
40
+ --port 8000 \
41
+ --log-level warning &
42
+ BACKEND_PID=$!
43
+
44
+ # Wait for backend health
45
+ MAX_WAIT=20
46
+ WAITED=0
47
+ until curl -sf "http://localhost:8000/health" >/dev/null 2>&1; do
48
+ sleep 1
49
+ WAITED=$((WAITED + 1))
50
+ if [[ $WAITED -ge $MAX_WAIT ]]; then
51
+ echo " ⚠ Backend health timeout β€” continuing"
52
+ break
53
+ fi
54
+ done
55
+ echo "βœ“ Backend live"
56
+
57
+ # ── Step 3: Start Streamlit on HF port 7860 (foreground) ────────────────────
58
+ echo ""
59
+ echo "β†’ Starting Streamlit frontend on port 7860..."
60
+ echo "βœ“ Dashboard: https://huggingface.co/spaces/{YOUR_SPACE}"
61
+ echo ""
62
+
63
+ exec python -m streamlit run frontend/app.py \
64
+ --server.port 7860 \
65
+ --server.address 0.0.0.0 \
66
+ --server.headless true \
67
+ --server.enableCORS false \
68
+ --server.enableXsrfProtection false \
69
+ --browser.gatherUsageStats false
main.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ main.py
3
+ -------
4
+ Entry point for SchemeImpactNet.
5
+
6
+ Usage:
7
+ python main.py # Stage 1 β€” Maharashtra
8
+ python main.py --stage 2 # Stage 2 β€” All-India
9
+ python main.py --stage 3 # Stage 3 β€” All-India + optimize
10
+ python main.py --stage 3 --state Maharashtra # Stage 3, one state
11
+ python main.py --optimize-only # Run optimizer on existing predictions
12
+ """
13
+
14
+ import sys
15
+ from src.pipeline import run_pipeline, run_optimizer_step
16
+
17
+ if __name__ == "__main__":
18
+ args = sys.argv[1:]
19
+
20
+ stage = 1
21
+ if "--stage" in args:
22
+ stage = int(args[args.index("--stage") + 1])
23
+
24
+ scope_state = None
25
+ if "--state" in args:
26
+ scope_state = args[args.index("--state") + 1]
27
+
28
+ optimize_only = "--optimize-only" in args
29
+
30
+ if optimize_only:
31
+ print("\nRunning optimizer on existing predictions...")
32
+ run_optimizer_step(scope_state=scope_state)
33
+ else:
34
+ predictions = run_pipeline(stage=stage)
35
+
36
+ print(f"\nTop 10 predicted districts (2023):")
37
+ latest = predictions[predictions["financial_year"] == 2023]
38
+ print(
39
+ latest[["state", "district", "person_days_lakhs", "predicted_persondays"]]
40
+ .sort_values("predicted_persondays", ascending=False)
41
+ .head(10)
42
+ .to_string(index=False)
43
+ )
44
+
45
+ # Stage 3: automatically run optimizer after model
46
+ if stage == 3:
47
+ print("\n" + "─" * 60)
48
+ print(" Running Stage 3 Budget Optimizer...")
49
+ print("─" * 60)
50
+ run_optimizer_step(scope_state=scope_state)
overview.txt ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### Project Overview: SchemeImpactNet - A Machine Learning Framework for Predictive Impact Analysis and Optimization of Indian Government Schemes
2
+
3
+ SchemeImpactNet is an innovative, machine learning-powered platform designed to revolutionize how Indian government schemes are analyzed, predicted, and optimized. Building at the intersection of AI and public policy, this system addresses the limitations of traditional scheme management tools by shifting from reactive administration to proactive, data-driven decision-making. It leverages publicly available datasets from Indian government portals (such as data.gov.in, mospi.gov.in, and scheme-specific sites like pmkisan.gov.in) to forecast the socio-economic impacts of schemes, identify inefficiencies, and recommend optimized resource allocations. This makes it particularly suitable for a final-year engineering project, as it combines real-world data integration, advanced ML algorithms, and practical visualizations, demonstrating technical depth while solving a pressing national issue: enhancing the effectiveness of welfare programs that affect millions.
4
+
5
+ The core inspiration stems from the vast, underutilized data on schemes like Pradhan Mantri Kisan Samman Nidhi (PM-KISAN), Mahatma Gandhi National Rural Employment Guarantee Act (MNREGA), Pradhan Mantri Awas Yojana (PMAY), and others. These datasets include beneficiary demographics, budget expenditures, regional implementations, and outcome metrics (e.g., employment generated, houses built, income uplifts). By applying ML, SchemeImpactNet not only manages this data but transforms it into actionable insightsβ€”predicting future outcomes and simulating policy tweaks to maximize benefits like poverty alleviation or rural development. Unlike basic management systems (which might have led to your initial rejection), this framework emphasizes predictive analytics and optimization, making it unique, scalable, and aligned with India's National AI Strategy for governance.
6
+
7
+ #### Key Features and Benefits
8
+ - **Predictive Impact Analysis**: Uses historical data to forecast scheme performance. For instance, it could predict how MNREGA's job creation in a Maharashtra district might reduce migration rates over the next 5 years, factoring in variables like rainfall, population density, and overlapping schemes.
9
+ - **Optimization Engine**: Recommends budget reallocations or scheme integrations to minimize waste. E.g., if PMAY is underperforming in urban slums due to labor shortages, the system might suggest diverting funds from less critical areas, using optimization algorithms to ensure equitable distribution.
10
+ - **Interconnected Scheme Network**: Models schemes as a graph, revealing dependencies (e.g., how agricultural schemes like PM-KISAN influence health outcomes via better nutrition), enabling holistic policy simulations.
11
+ - **User-Friendly Dashboard**: An interactive interface for stakeholders (policymakers, researchers, or even citizens) to query predictions, visualize data, and explore "what-if" scenarios.
12
+ - **Ethical and Bias-Aware Design**: Incorporates fairness checks to avoid regional biases in predictions, ensuring the system promotes inclusive growth.
13
+ - **Benefits for India-Specific Context**: With over 400 central schemes and trillions in annual budgets, inefficiencies like duplication (e.g., multiple housing aids) cost billions. SchemeImpactNet could help save resources, improve targeting (e.g., to underserved tribal areas), and support evidence-based policymaking, aligning with Sustainable Development Goals (SDGs) like No Poverty and Decent Work.
14
+
15
+ This project stands out for examiners because it tackles a real problem with measurable impact: You can quantify improvements (e.g., 15-20% better resource utilization in simulations) using metrics from ML models. It's feasible with open data, requires no proprietary tools, and has extension potential (e.g., integrating real-time APIs from government sites).
16
+
17
+ #### System Architecture
18
+ To visualize the high-level structure, here's a text-based diagram representing the end-to-end architecture. (If a graphical diagram is preferred, imagine this as a flowchart: Data sources feed into preprocessing, which branches to ML modules, converging at the optimization and UI layers.)
19
+
20
+ ```
21
+ +-------------------+ +-------------------+ +-------------------+
22
+ | Data Sources | | Data Preprocessing| | ML Core Engine |
23
+ | - data.gov.in | --> | - Cleaning | --> | - Predictive Models|
24
+ | - mospi.gov.in | | - Integration | | (XGBoost, LSTM) |
25
+ | - Scheme APIs | | - Feature Eng. | | - Clustering (K-Means)|
26
+ +-------------------+ +-------------------+ | - Graph Analysis (GNN)|
27
+ | - Optimization (RL/PuLP)|
28
+ +-------------------+
29
+ |
30
+ v
31
+ +-------------------+ +-------------------+ +-------------------+
32
+ | Impact Simulation| <-- | Visualization | <-- | User Interface |
33
+ | - What-If Scenarios| | - Dashboards | | - Web App (Streamlit)|
34
+ | - Network Propagation| | - Geospatial Maps| | - Interactive Queries|
35
+ +-------------------+ +-------------------+ +-------------------+
36
+ ```
37
+
38
+ - **Data Layer**: Ingests raw datasets (e.g., CSV files on beneficiary counts, budgets by state/year). Handles challenges like missing values or inconsistent formats using Pandas.
39
+ - **Preprocessing Layer**: Normalizes data, engineers features (e.g., deriving "impact score" from outcomes), and merges datasets (e.g., linking MNREGA jobs to PMAY completions via district codes).
40
+ - **ML Core**:
41
+ - Predictive: Time-series models (LSTM for forecasting beneficiary growth) and regression (XGBoost for impact scores).
42
+ - Unsupervised: Clustering districts into performance groups.
43
+ - Graph-Based: Represents schemes as nodes/edges in a network (using NetworkX/PyTorch Geometric) to model ripple effects.
44
+ - Optimization: Solves allocation problems (e.g., maximize total impact under budget constraints) with linear programming or reinforcement learning.
45
+ - **Simulation Layer**: Runs scenarios, e.g., "Increase PM-KISAN funding by 10% in drought areasβ€”predict GDP lift."
46
+ - **Visualization & UI Layer**: Outputs charts, maps, and reports. E.g., a heatmap showing predicted poverty reduction across India.
47
+
48
+ #### Data Flow and Workflow
49
+ 1. **Ingestion**: Automatically pull or upload data from gov sites (focus on 5-10 schemes initially, like agriculture and rural development ones relevant to Maharashtra, given your location).
50
+ 2. **Analysis Pipeline**: Feed cleaned data into models. Train on 80% historical data (e.g., 2015-2023), test on recent years.
51
+ 3. **Output Generation**: Generate predictions (e.g., "MNREGA in Pimpri-Chinchwad could generate 50,000 jobs by 2027, reducing unemployment by 8%"), optimizations, and visuals.
52
+ 4. **Iteration**: Users refine inputs via the dashboard, re-running simulations for refined insights.
53
+
54
+ #### Implementation Considerations
55
+ - **Scalability**: Start with subsets (e.g., Maharashtra-focused data for local relevance), expand nationwide.
56
+ - **Challenges and Solutions**: Data privacy (use anonymized aggregates); model accuracy (cross-validate with real outcomes); computational needs (run on local machines with GPU for GNNs if available).
57
+ - **Evaluation**: Measure success via metrics like prediction accuracy (RMSE < 0.1 for forecasts) and optimization gains (e.g., 10-25% efficiency boost in simulations). Compare against baselines like non-ML rule-based allocators.
58
+ - **Future Enhancements**: Integrate NLP for scheme document analysis (e.g., extracting eligibility rules) or blockchain for transparent tracking.
59
+
60
+ This elaborate overview positions SchemeImpactNet as a cutting-edge project that not only impresses with its technical sophistication but also its potential societal impact. If diagrams are crucial, I can suggest generating one via tools like Draw.io or Python's Matplotlib in your codeβ€”e.g., a simple flowchart script you can run locally. Let me know if you'd like code snippets for that!
61
+
62
+
63
+ The core problem
64
+ India spends β‚Ή70,000–90,000 crore per year on MNREGA alone. But nobody can reliably answer questions like:
65
+
66
+ Will this district generate enough employment next year, or will it fall short?
67
+ Is this district spending efficiently, or is money being wasted?
68
+ Where should we prioritize resources to get the most impact?
69
+
70
+ Right now, decisions are made reactively β€” officials look at last year's numbers and make gut calls. There's no forecasting, no early warning system, no optimization.
71
+
72
+ What SchemeImpactNet actually tries to solve
73
+ At its core, three concrete questions:
74
+ 1. Prediction β€” "What will this district's MNREGA performance look like next year?"
75
+ Given historical persondays, expenditure, and other factors β†’ predict future performance. Early warning if a district is going to underperform.
76
+ 2. Efficiency Analysis β€” "Is this district getting good value for money?"
77
+ Some districts generate 50 persondays per β‚Ή1000 spent. Others generate 20. Why? What separates high-performers from low-performers?
78
+ 3. Resource Optimization β€” "Where should budget go to maximize employment generated?"
79
+ Given a fixed budget, which districts should get more funding to maximize total persondays across Maharashtra?
80
+
81
+
82
+
83
+ -----------------------------------------------------------------------------------------------------
84
+
85
+ 3. Data Sources and Processing
86
+ The dataset used in this study combines real government data with domain-informed estimates to produce a comprehensive district-level MNREGA dataset spanning 2014-15 to 2024-25 across 759 districts and 34 states.
87
+ Primary Source β€” MNREGA Employment Data: Person days generated and households engaged in work were sourced from the Ministry of Rural Development's official MIS portal via Dataful.in (Dataset ID: 20063), which aggregates monthly district-level records from nreganarep.nic.in. Monthly figures were summed to produce annual totals. Person days were converted from absolute numbers to lakh units by dividing by 100,000.
88
+ Wage and Expenditure Derivation: District-level expenditure was derived using MoRD's officially notified state wage rates, which are revised annually. Expenditure in Rs. lakhs was computed as the product of person days (lakhs) and the prevailing wage rate (Rs./day). Budget allocated was estimated as expenditure divided by 0.89, reflecting the national average budget utilization rate of approximately 89%.
89
+ Rainfall Data: Annual subdivision-level rainfall data was sourced from the India Meteorological Department (IMD) historical dataset covering 1901-2017. IMD meteorological subdivisions were mapped to states. For years 2018-2024 where IMD data was unavailable, rainfall was estimated using each subdivision's 2000-2017 average with Β±5% stochastic variation using a fixed random seed for reproducibility.
90
+ Demographic and Poverty Data: Rural population figures were derived from Census of India 2011 state-level rural headcounts, distributed across districts proportional to each district's share of state-level person days. A 1.2% annual growth rate was applied to project values from 2011 to 2024. Poverty rates were sourced from NITI Aayog's National Multidimensional Poverty Index (MPI) 2021 report, with district-level variation introduced based on relative MNREGA activity.
91
+ Scheme Interdependency Features: PM-KISAN beneficiary estimates were computed from rural population assuming 35% farmer household penetration from 2019-20 onwards (scheme launch year), scaled by district activity. PMAY-G housing figures were estimated from 2016-17 onwards using poverty-weighted rural population ratios, with completion rates linearly interpolated from 30% (2016-17) to 85% (2023-24) based on reported national progress.
92
+ Final Dataset: 7,758 district-year observations, 22 features, zero missing values.
reports/model_comparison.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ model,rmse,mae,r2,selected
2
+ XGBoost,2.3301,1.3795,0.9963,β˜…
3
+ GradientBoostingRegressor,1.9383,1.1863,0.9975,
4
+ RandomForestRegressor,2.2926,1.0879,0.9965,
reports/model_report.txt ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SchemeImpactNet β€” V4 Model Selection Report
2
+ ============================================================
3
+
4
+ Best Model : GradientBoosting
5
+ Selection : max mean RΒ² excl. 2022 (walk-forward CV)
6
+ Features : 17
7
+ Evaluation : Walk-forward CV (2018–2024)
8
+
9
+ Algorithm Comparison:
10
+ Model RΒ² ex22 RΒ² MAE RMSE
11
+ ------------------------------------------------------------
12
+ GradientBoosting 0.8510 0.9078 8.554 16.334 ← BEST
13
+ RandomForest 0.8417 0.9063 8.739 16.679
14
+ Ridge 0.8018 0.8824 9.975 18.545
15
+ ElasticNet 0.7982 0.8811 9.890 18.678
16
+ XGBoost 0.8533 0.9034 8.457 16.409
17
+
18
+ Best Model (GradientBoosting) Walk-Forward CV:
19
+ Mean RΒ² : 0.8510
20
+ excl.2022 RΒ²: 0.9078
21
+ Mean MAE : 8.554 lakh
22
+ Mean RMSE : 16.334 lakh
23
+ RΒ² gain : +0.0737 vs naive lag-1
24
+
25
+ Previous (leaked) RΒ²: 0.9963
26
+ Leakage source: works_completed (r=1.0 with target)
27
+
28
+ 2022 anomaly: West Bengal -93 to -98% reporting drop. Excl. RΒ²=0.9078
29
+
30
+ Feature Importances:
31
+ lag1_pd 0.5270
32
+ lag1_adj 0.2512
33
+ state_lag1_zscore 0.0837
34
+ roll2_mean 0.0612
35
+ blended_capacity 0.0199
36
+ roll3_mean 0.0188
37
+ is_covid 0.0067
38
+ lag3_pd 0.0065
39
+ state_lag1_norm 0.0056
40
+ lag2_pd 0.0054
41
+ relative_to_state 0.0034
42
+ state_enc 0.0033
43
+ roll3_std 0.0030
44
+ avg_wage_rate 0.0015
45
+ lag1_vs_capacity 0.0014
46
+ wage_yoy 0.0013
47
+ lag1_is_covid 0.0000
48
+
49
+ Year-by-year CV (GradientBoosting):
50
+ year n r2 mae rmse mape naive_r2 naive_mae r2_gain mae_gain
51
+ 2018 689 0.9160 6.639 13.168 1.996413e+09 0.9124 7.556 0.0036 0.916
52
+ 2019 701 0.9262 6.380 11.111 1.571437e+10 0.8651 7.484 0.0611 1.104
53
+ 2020 695 0.8354 12.681 23.825 1.346619e+09 0.7526 18.279 0.0828 5.598
54
+ 2021 698 0.9261 7.150 14.966 6.480334e+08 0.9384 7.988 -0.0122 0.839
55
+ 2022 713 0.5101 13.954 28.022 2.442193e+08 0.1804 14.288 0.3297 0.334
56
+ 2023 709 0.9089 7.403 13.336 3.815669e+10 0.9227 6.984 -0.0139 -0.419
57
+ 2024 727 0.9345 5.673 9.911 2.038457e+10 0.8697 7.278 0.0648 1.605
requirements.txt CHANGED
@@ -1,3 +1,30 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ contourpy==1.3.3
2
+ cycler==0.12.1
3
+ fonttools==4.61.1
4
+ joblib==1.5.3
5
+ kiwisolver==1.4.9
6
+ matplotlib==3.10.8
7
+ numpy==2.4.2
8
+ nvidia-nccl-cu12==2.29.3
9
+ packaging==26.0
10
+ pandas==3.0.1
11
+ pillow==12.1.1
12
+ pyparsing==3.3.2
13
+ python-dateutil==2.9.0.post0
14
+ scikit-learn==1.8.0
15
+ scipy==1.17.0
16
+ seaborn==0.13.2
17
+ six==1.17.0
18
+ threadpoolctl==3.6.0
19
+ xgboost==3.2.0
20
+ # Backend
21
+ fastapi>=0.104.0
22
+ uvicorn[standard]>=0.24.0
23
+ sqlalchemy>=2.0.0
24
+ pydantic>=2.0.0
25
+
26
+ # Frontend
27
+ streamlit
28
+ plotly>=5.17.0
29
+ requests>=2.31.0
30
+
src/__init__.py ADDED
File without changes
src/clean.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ clean.py
3
+ --------
4
+ Cleans and standardizes the unified MNREGA dataset.
5
+ Works for Stage 1 (Maharashtra) through Stage 3 (All-India + scheme data).
6
+ """
7
+
8
+ import pandas as pd
9
+ import numpy as np
10
+
11
+ CRITICAL_COLS = ["person_days_lakhs", "expenditure_lakhs", "avg_wage_rate"]
12
+
13
+ NON_CRITICAL_COLS = [
14
+ "households_demanded", "households_offered", "households_availed",
15
+ "works_completed", "rainfall_mm", "crop_season_index",
16
+ "rural_population_lakhs", "poverty_rate_pct",
17
+ "pmkisan_beneficiaries", "pmkisan_amount_lakhs",
18
+ "pmay_houses_sanctioned", "pmay_houses_completed",
19
+ "pmay_expenditure_lakhs", "budget_allocated_lakhs"
20
+ ]
21
+
22
+
23
+ def clean(df: pd.DataFrame) -> pd.DataFrame:
24
+ print("[clean] Starting cleaning pipeline...")
25
+ df = _strip_strings(df)
26
+ df = _parse_financial_year(df)
27
+ df = _cast_numerics(df)
28
+ df = _handle_missing(df)
29
+ df = _enforce_logical_constraints(df)
30
+ print(f"[clean] Done. Shape: {df.shape}")
31
+ return df
32
+
33
+
34
+ def _strip_strings(df: pd.DataFrame) -> pd.DataFrame:
35
+ for col in df.select_dtypes(include="object").columns:
36
+ df[col] = df[col].str.strip()
37
+ return df
38
+
39
+
40
+ def _parse_financial_year(df: pd.DataFrame) -> pd.DataFrame:
41
+ """Convert '2018-19' β†’ integer 2018."""
42
+ def _parse(val):
43
+ val = str(val).strip()
44
+ return int(val.split("-")[0]) if "-" in val else int(val)
45
+
46
+ df["financial_year"] = df["financial_year"].apply(_parse)
47
+ print(f"[clean] financial_year range: {df['financial_year'].min()} – {df['financial_year'].max()}")
48
+ return df
49
+
50
+
51
+ def _cast_numerics(df: pd.DataFrame) -> pd.DataFrame:
52
+ all_numeric = CRITICAL_COLS + NON_CRITICAL_COLS
53
+ for col in all_numeric:
54
+ if col in df.columns:
55
+ df[col] = pd.to_numeric(df[col], errors="coerce")
56
+ return df
57
+
58
+
59
+ def _handle_missing(df: pd.DataFrame) -> pd.DataFrame:
60
+ """
61
+ Critical cols β†’ forward-fill within district, drop if still null.
62
+ Non-critical β†’ forward-fill within district, leave remaining NaN.
63
+ """
64
+ df = df.sort_values(["state", "district", "financial_year"])
65
+
66
+ for col in CRITICAL_COLS + NON_CRITICAL_COLS:
67
+ if col not in df.columns:
68
+ continue
69
+ before = df[col].isna().sum()
70
+ if before > 0:
71
+ df[col] = df.groupby(["state", "district"])[col].transform(lambda s: s.ffill())
72
+ filled = before - df[col].isna().sum()
73
+ if filled > 0:
74
+ print(f"[clean] '{col}': forward-filled {filled} value(s)")
75
+
76
+ before = len(df)
77
+ df = df.dropna(subset=CRITICAL_COLS).reset_index(drop=True)
78
+ if len(df) < before:
79
+ print(f"[clean] Dropped {before - len(df)} rows with unresolvable critical nulls")
80
+
81
+ return df
82
+
83
+
84
+ def _enforce_logical_constraints(df: pd.DataFrame) -> pd.DataFrame:
85
+ """Clip any constraint violations that slipped through generation."""
86
+ if all(c in df.columns for c in ["households_offered", "households_demanded"]):
87
+ violations = (df["households_offered"] > df["households_demanded"]).sum()
88
+ if violations:
89
+ df["households_offered"] = df[["households_offered", "households_demanded"]].min(axis=1)
90
+ print(f"[clean] Fixed {violations} households_offered > households_demanded")
91
+
92
+ if all(c in df.columns for c in ["households_availed", "households_offered"]):
93
+ violations = (df["households_availed"] > df["households_offered"]).sum()
94
+ if violations:
95
+ df["households_availed"] = df[["households_availed", "households_offered"]].min(axis=1)
96
+ print(f"[clean] Fixed {violations} households_availed > households_offered")
97
+
98
+ return df
src/eda.py ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ eda.py
3
+ ------
4
+ Exploratory Data Analysis for MNREGA unified dataset.
5
+ Automatically adapts to Maharashtra-only or All-India data.
6
+
7
+ Figures produced:
8
+ 01_statewide_trend.png
9
+ 02_district_performance_ranking.png
10
+ 03_efficiency_ranking.png
11
+ 04_covid_impact.png
12
+ 05_correlation_heatmap.png
13
+ """
14
+
15
+ import os
16
+ import pandas as pd
17
+ import numpy as np
18
+ import matplotlib.pyplot as plt
19
+ import matplotlib.font_manager as fm
20
+ import seaborn as sns
21
+
22
+ FIGURES_DIR = os.path.join("reports", "figures")
23
+ os.makedirs(FIGURES_DIR, exist_ok=True)
24
+
25
+ sns.set_theme(style="whitegrid", palette="muted")
26
+ plt.rcParams.update({"figure.dpi": 120, "font.size": 10})
27
+
28
+ # Use a font that supports the rupee symbol if available, else fallback
29
+ def _get_font():
30
+ available = [f.name for f in fm.fontManager.ttflist]
31
+ for font in ["DejaVu Sans", "FreeSans", "Liberation Sans", "Arial"]:
32
+ if font in available:
33
+ return font
34
+ return None
35
+
36
+ FONT = _get_font()
37
+ if FONT:
38
+ plt.rcParams["font.family"] = FONT
39
+
40
+
41
+ def run_eda(df: pd.DataFrame, scope: str = "Maharashtra") -> None:
42
+ print(f"\n[eda] Starting EDA β€” scope: {scope}")
43
+ _summary_stats(df)
44
+ _plot_trend(df, scope)
45
+ _plot_top_bottom_districts(df, scope)
46
+ _plot_efficiency_ranking(df, scope)
47
+ _plot_covid_impact(df)
48
+ _plot_correlation_heatmap(df)
49
+ print(f"[eda] All figures saved to: {FIGURES_DIR}/")
50
+
51
+
52
+ # ── 1. Summary ────────────────────────────────────────────────────────────────
53
+
54
+ def _summary_stats(df: pd.DataFrame) -> None:
55
+ print(f"\n[eda] {'─'*50}")
56
+ print(f"[eda] Rows : {len(df)}")
57
+ print(f"[eda] States : {df['state'].nunique()}")
58
+ print(f"[eda] Districts : {df['district'].nunique()}")
59
+ print(f"[eda] Years : {df['financial_year'].min()} – {df['financial_year'].max()}")
60
+ print(f"[eda] Total persondays: {df['person_days_lakhs'].sum():,.1f} lakh")
61
+ if "expenditure_lakhs" in df.columns:
62
+ print(f"[eda] Total expenditure: Rs. {df['expenditure_lakhs'].sum():,.1f} lakh")
63
+
64
+ print(f"\n[eda] Person days by year (state-aggregated mean):")
65
+ by_year = df.groupby("financial_year")["person_days_lakhs"].mean()
66
+ max_val = by_year.max()
67
+ for yr, val in by_year.items():
68
+ bar = "β–ˆ" * int(val / max_val * 28)
69
+ print(f" {yr}: {bar} {val:.2f}")
70
+ print(f"[eda] {'─'*50}")
71
+
72
+
73
+ # ── 2. Trend ──────────────────────────────────────────────────────────────────
74
+
75
+ def _plot_trend(df: pd.DataFrame, scope: str) -> None:
76
+ yearly = df.groupby("financial_year").agg(
77
+ total_persondays=("person_days_lakhs", "sum"),
78
+ ).reset_index()
79
+
80
+ fig, ax1 = plt.subplots(figsize=(11, 5))
81
+ ax1.bar(yearly["financial_year"], yearly["total_persondays"],
82
+ color="#2196F3", alpha=0.75, label="Person Days (lakh)")
83
+ ax1.set_ylabel("Total Person Days (lakh)", color="#2196F3")
84
+ ax1.tick_params(axis="y", labelcolor="#2196F3")
85
+ ax1.set_xlabel("Financial Year")
86
+ plt.title(f"MNREGA Trend β€” {scope} (Person Days)")
87
+ fig.tight_layout()
88
+ _save("01_statewide_trend.png")
89
+
90
+
91
+ # ── 3. District rankings ──────────────────────────────────────────────────────
92
+
93
+ def _plot_top_bottom_districts(df: pd.DataFrame, scope: str) -> None:
94
+ avg = df.groupby("district")["person_days_lakhs"].mean().sort_values(ascending=False)
95
+ n = min(10, len(avg) // 2)
96
+ top = avg.head(n)
97
+ bot = avg.tail(n).sort_values()
98
+
99
+ fig, axes = plt.subplots(1, 2, figsize=(14, max(5, n * 0.55)))
100
+ axes[0].barh(top.index, top.values, color="#4CAF50")
101
+ axes[0].set_title(f"Top {n} Districts")
102
+ axes[0].set_xlabel("Avg Person Days (lakh)")
103
+ axes[0].invert_yaxis()
104
+
105
+ axes[1].barh(bot.index, bot.values, color="#FF7043")
106
+ axes[1].set_title(f"Bottom {n} Districts")
107
+ axes[1].set_xlabel("Avg Person Days (lakh)")
108
+ axes[1].invert_yaxis()
109
+
110
+ plt.suptitle(f"MNREGA District Performance β€” {scope}", fontsize=13)
111
+ plt.tight_layout()
112
+ _save("02_district_performance_ranking.png")
113
+
114
+ print(f"\n[eda] Top 5 districts:")
115
+ for d, v in avg.head(5).items():
116
+ print(f" {d:35s}: {v:.2f} lakh")
117
+ print(f"[eda] Bottom 5 districts:")
118
+ for d, v in avg.tail(5).items():
119
+ print(f" {d:35s}: {v:.2f} lakh")
120
+
121
+
122
+ # ── 4. Efficiency ranking ─────────────────────────────────────────────────────
123
+
124
+ def _plot_efficiency_ranking(df: pd.DataFrame, scope: str) -> None:
125
+ if "expenditure_per_personday" not in df.columns:
126
+ print("[eda] Skipping efficiency ranking β€” expenditure_per_personday not in V3 features")
127
+ return
128
+ eff = (
129
+ df.groupby("district")["expenditure_per_personday"]
130
+ .mean().sort_values().dropna()
131
+ )
132
+ if len(eff) > 30:
133
+ eff = pd.concat([eff.head(15), eff.tail(15)])
134
+ fig, ax = plt.subplots(figsize=(10, max(6, len(eff) * 0.3)))
135
+ colors = ["#43A047" if v <= eff.median() else "#EF5350" for v in eff.values]
136
+ ax.barh(eff.index, eff.values, color=colors)
137
+ ax.axvline(eff.median(), color="navy", linestyle="--",
138
+ linewidth=1.5, label=f"Median: {eff.median():.1f}")
139
+ ax.set_title(f"Cost Efficiency β€” {scope}\n(Rs. expenditure per lakh persondays β€” lower is better)")
140
+ ax.set_xlabel("Rs. lakh per lakh persondays")
141
+ ax.legend()
142
+ plt.tight_layout()
143
+ _save("03_efficiency_ranking.png")
144
+ print(f"\n[eda] Most efficient : {eff.idxmin()} ({eff.min():.1f})")
145
+ print(f"[eda] Least efficient: {eff.idxmax()} ({eff.max():.1f})")
146
+
147
+
148
+ # ── 5. COVID impact ───────────────────────────────────────────────────────────
149
+
150
+ def _plot_covid_impact(df: pd.DataFrame) -> None:
151
+ pre = df[df["financial_year"] == 2019].groupby("district")["person_days_lakhs"].mean()
152
+ post = df[df["financial_year"] == 2020].groupby("district")["person_days_lakhs"].mean()
153
+ common = pre.index.intersection(post.index)
154
+ change = ((post[common] - pre[common]) / pre[common] * 100).sort_values(ascending=False)
155
+
156
+ # Cap at 20 districts for readability
157
+ show = pd.concat([change.head(10), change.tail(10)]) if len(change) > 20 else change
158
+
159
+ fig, ax = plt.subplots(figsize=(10, max(6, len(show) * 0.35)))
160
+ colors = ["#388E3C" if v >= 0 else "#D32F2F" for v in show.values]
161
+ ax.barh(show.index, show.values, color=colors)
162
+ ax.axvline(0, color="black", linewidth=0.8)
163
+ ax.set_title("COVID Impact: % Change in Person Days\n(2019-20 to 2020-21)")
164
+ ax.set_xlabel("% Change")
165
+ plt.tight_layout()
166
+ _save("04_covid_impact.png")
167
+
168
+ print(f"\n[eda] COVID β€” biggest spike : {change.idxmax()} (+{change.max():.1f}%)")
169
+ print(f"[eda] COVID β€” least impacted : {change.idxmin()} ({change.min():.1f}%)")
170
+
171
+
172
+ # ── 6. Correlation heatmap ────────────────────────────────────────────────────
173
+
174
+ def _plot_correlation_heatmap(df: pd.DataFrame) -> None:
175
+ candidates = [
176
+ "person_days_lakhs", "expenditure_lakhs", "avg_wage_rate",
177
+ "expenditure_per_personday", "lag_person_days", "yoy_growth",
178
+ "demand_fulfillment_rate", "district_avg_persondays",
179
+ "rainfall_mm", "poverty_rate_pct", "scheme_overlap_score",
180
+ "budget_utilization_rate"
181
+ ]
182
+ cols = [c for c in candidates if c in df.columns]
183
+ corr = df[cols].corr()
184
+
185
+ fig, ax = plt.subplots(figsize=(11, 9))
186
+ mask = np.triu(np.ones_like(corr, dtype=bool))
187
+ sns.heatmap(corr, mask=mask, annot=True, fmt=".2f",
188
+ cmap="coolwarm", center=0, ax=ax,
189
+ linewidths=0.5, annot_kws={"size": 8})
190
+ ax.set_title("Feature Correlation Heatmap")
191
+ plt.tight_layout()
192
+ _save("05_correlation_heatmap.png")
193
+
194
+
195
+ # ── Helper ────────────────────────────────────────────────────────────────────
196
+
197
+ def _save(filename: str) -> None:
198
+ path = os.path.join(FIGURES_DIR, filename)
199
+ plt.savefig(path, bbox_inches="tight")
200
+ plt.close()
201
+ print(f"[eda] Saved: {path}")
src/extract.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ extract.py
3
+ ----------
4
+ Loads and validates the unified MNREGA CSV.
5
+ Supports both the synthetic unified dataset and any real CSV
6
+ that matches the schema.
7
+ """
8
+
9
+ import pandas as pd
10
+
11
+ REQUIRED_COLUMNS = {
12
+ "state", "district", "financial_year",
13
+ "person_days_lakhs", "expenditure_lakhs", "avg_wage_rate"
14
+ }
15
+
16
+ STAGE1_COLUMNS = REQUIRED_COLUMNS
17
+ STAGE2_COLUMNS = STAGE1_COLUMNS | {"rainfall_mm", "crop_season_index", "rural_population_lakhs", "poverty_rate_pct"}
18
+ STAGE3_COLUMNS = STAGE2_COLUMNS | {"pmkisan_beneficiaries", "pmay_houses_sanctioned", "budget_allocated_lakhs"}
19
+
20
+
21
+ def load_csv(filepath: str, state_filter: str = None) -> pd.DataFrame:
22
+ """
23
+ Load unified MNREGA CSV.
24
+
25
+ Args:
26
+ filepath : Path to CSV file.
27
+ state_filter : If provided, filter to a single state e.g. "Maharashtra".
28
+ Pass None for all-India (Stage 2+).
29
+
30
+ Returns:
31
+ Raw DataFrame.
32
+ """
33
+ print(f"[extract] Loading: {filepath}")
34
+ try:
35
+ df = pd.read_csv(filepath)
36
+ except FileNotFoundError:
37
+ raise FileNotFoundError(f"[extract] File not found: {filepath}")
38
+
39
+ # Normalize column names
40
+ df.columns = [c.strip().lower().replace(" ", "_") for c in df.columns]
41
+
42
+ _validate_columns(df)
43
+
44
+ if state_filter:
45
+ before = len(df)
46
+ df = df[df["state"] == state_filter].reset_index(drop=True)
47
+ print(f"[extract] Filtered to '{state_filter}': {before} β†’ {len(df)} rows")
48
+
49
+ print(f"[extract] Loaded {len(df)} rows | {df['state'].nunique()} state(s) | {df['district'].nunique()} districts | {df['financial_year'].nunique()} years")
50
+ print(f"[extract] Validation passed βœ“")
51
+ return df
52
+
53
+
54
+ def _validate_columns(df: pd.DataFrame) -> None:
55
+ actual = set(df.columns)
56
+ missing = REQUIRED_COLUMNS - actual
57
+ if missing:
58
+ raise ValueError(f"[extract] Missing required columns: {missing}")
src/features.py ADDED
@@ -0,0 +1,320 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ features.py
3
+ -----------
4
+ V3 leak-free feature engineering for MNREGA district-level forecasting.
5
+
6
+ LEAKAGE AUDIT (what was removed vs original):
7
+ REMOVED β€” works_completed : r=1.00 with target (formula of person_days)
8
+ REMOVED β€” expenditure_lakhs : r=0.976 (person_days Γ— wage_rate)
9
+ REMOVED β€” budget_allocated_lakhs : r=0.976 (derived from expenditure)
10
+ REMOVED β€” households_demanded/offered/availed : r=0.94 (copies of target structure)
11
+ REMOVED β€” lag_expenditure : r=0.866 (derived from target)
12
+ REMOVED β€” district_avg_persondays : replaced with blended_capacity (safer)
13
+ REMOVED β€” yoy_growth : computed from current-year target β†’ leaky
14
+ REMOVED β€” demand_fulfillment_rate : uses current-year availed (target-correlated)
15
+ REMOVED β€” all synthetic columns : rainfall, poverty, pmkisan, pmay (fabricated)
16
+
17
+ V3 FEATURES (all computed from lagged/historical values only):
18
+ lag1_pd : person_days_lakhs shifted 1 year per district
19
+ lag2_pd : shifted 2 years
20
+ lag3_pd : shifted 3 years
21
+ roll2_mean : 2-year rolling mean of lag1
22
+ roll3_mean : 3-year rolling mean of lag1
23
+ roll3_std : 3-year rolling std of lag1 (volatility)
24
+ lag1_adj : lag1 deflated by COVID multiplier when lag year = 2020
25
+ lag_yoy : YoY growth of lag1 vs lag2 (historical, not current)
26
+ lag2_yoy : YoY growth of lag2 vs lag3
27
+ momentum : lag_yoy - lag2_yoy (acceleration)
28
+ district_capacity : expanding mean of lag1 (long-run structural level)
29
+ blended_capacity : district_capacity blended with state mean when history < 3yr
30
+ relative_to_state : lag1 / state-year lag1 mean (district's share)
31
+ state_lag1_norm : state total lag1 / state historical mean
32
+ lag1_vs_capacity : lag1 / district_capacity (how anomalous last year was)
33
+ lag1_zscore : z-score of lag1 vs district expanding history
34
+ state_lag1_zscore : z-score of state-level lag1
35
+ lag1_extreme : flag when |lag1_zscore| > 2.5
36
+ lag1_is_covid : flag when lag year = 2020
37
+ history_length : cumulative count of observations per district
38
+ avg_wage_rate : official wage schedule (genuinely exogenous)
39
+ wage_yoy : year-on-year % change in wage rate
40
+ is_covid : flag for FY 2020 (COVID demand shock year)
41
+ is_post_covid : flag for FY >= 2021
42
+ is_2022_anomaly : flag for FY 2022 (West Bengal + others reporting anomaly)
43
+ year_trend : years since dataset start (linear time trend)
44
+ state_enc : label-encoded state
45
+ district_enc : label-encoded district (state|district composite)
46
+
47
+ Walk-forward CV results (GBR, max_depth=4, lr=0.03, n_est=200, subsample=0.7):
48
+ Mean RΒ² : 0.7722 (excl. 2022: 0.8618)
49
+ Mean MAE : 10.68L
50
+ Old RΒ² : 0.9963 ← was leakage from works_completed (r=1.0)
51
+ """
52
+
53
+ import pandas as pd
54
+ import numpy as np
55
+ from sklearn.preprocessing import LabelEncoder
56
+
57
+ # COVID multiplier: how much 2020 inflated vs 2019 nationally
58
+ # Computed from real data: 55.01L / 38.04L = 1.447
59
+ COVID_MULTIPLIER = 1.447
60
+
61
+ TARGET = "person_days_lakhs"
62
+
63
+
64
+ def build_features(df: pd.DataFrame) -> pd.DataFrame:
65
+ """
66
+ Main entry point. Takes a cleaned DataFrame and returns it with
67
+ all V3 features added. Drops rows with no lag1/lag2 (first 1-2 years
68
+ per district cannot be used for training).
69
+
70
+ Args:
71
+ df : Cleaned DataFrame with at minimum:
72
+ state, district, financial_year, person_days_lakhs,
73
+ households_availed, avg_wage_rate
74
+
75
+ Returns:
76
+ Feature-engineered DataFrame ready for model training/inference.
77
+ """
78
+ print("[features] Building V3 leak-free features...")
79
+
80
+ df = df.sort_values(["state", "district", "financial_year"]).reset_index(drop=True)
81
+
82
+ df = _lag_features(df)
83
+ df = _rolling_features(df)
84
+ df = _covid_features(df)
85
+ df = _trend_features(df)
86
+ df = _capacity_features(df)
87
+ df = _anomaly_features(df)
88
+ df = _state_features(df)
89
+ df = _temporal_flags(df)
90
+ df = _wage_features(df)
91
+ df = _encode_categoricals(df)
92
+
93
+ # Drop rows with no lag1/lag2 β€” cannot train or predict without history
94
+ before = len(df)
95
+ df = df.dropna(subset=["lag1_pd", "lag2_pd"]).reset_index(drop=True)
96
+ print(f"[features] Dropped {before - len(df)} rows (insufficient history)")
97
+ print(f"[features] Done. Final shape: {df.shape}")
98
+ return df
99
+
100
+
101
+ # ── Lag features ──────────────────────────────────────────────────────────────
102
+
103
+ def _lag_features(df: pd.DataFrame) -> pd.DataFrame:
104
+ grp = df.groupby(["state", "district"])
105
+ df["lag1_pd"] = grp[TARGET].shift(1)
106
+ df["lag2_pd"] = grp[TARGET].shift(2)
107
+ df["lag3_pd"] = grp[TARGET].shift(3)
108
+ df["lag1_hh"] = grp["households_availed"].shift(1)
109
+ return df
110
+
111
+
112
+ # ── Rolling statistics (computed on lag1, so no leakage) ─────────────────────
113
+
114
+ def _rolling_features(df: pd.DataFrame) -> pd.DataFrame:
115
+ lag1 = df.groupby(["state", "district"])["lag1_pd"]
116
+ df["roll2_mean"] = lag1.transform(lambda s: s.rolling(2, min_periods=1).mean())
117
+ df["roll3_mean"] = lag1.transform(lambda s: s.rolling(3, min_periods=1).mean())
118
+ df["roll3_std"] = lag1.transform(
119
+ lambda s: s.rolling(3, min_periods=1).std().fillna(0)
120
+ )
121
+ return df
122
+
123
+
124
+ # ── COVID-aware lag adjustment ────────────────────────────────────────────────
125
+
126
+ def _covid_features(df: pd.DataFrame) -> pd.DataFrame:
127
+ """
128
+ When predicting year T and lag1 comes from 2020 (COVID spike),
129
+ the model would otherwise extrapolate the spike forward. We:
130
+ 1. Flag that lag1 is a COVID year value.
131
+ 2. Provide a deflated version (lag1_adj) so the model has a
132
+ COVID-corrected signal alongside the raw lag1.
133
+ """
134
+ df["lag1_is_covid"] = (df["financial_year"] - 1 == 2020).astype(int)
135
+ df["lag1_adj"] = np.where(
136
+ df["lag1_is_covid"] == 1,
137
+ df["lag1_pd"] / COVID_MULTIPLIER,
138
+ df["lag1_pd"]
139
+ )
140
+ return df
141
+
142
+
143
+ # ── YoY trend / momentum (all historical β€” no current-year leakage) ───────────
144
+
145
+ def _trend_features(df: pd.DataFrame) -> pd.DataFrame:
146
+ df["lag_yoy"] = (
147
+ (df["lag1_pd"] - df["lag2_pd"]) / df["lag2_pd"].replace(0, np.nan)
148
+ ).clip(-1, 3)
149
+ df["lag2_yoy"] = (
150
+ (df["lag2_pd"] - df["lag3_pd"]) / df["lag3_pd"].replace(0, np.nan)
151
+ ).clip(-1, 3)
152
+ df["momentum"] = df["lag_yoy"] - df["lag2_yoy"]
153
+ return df
154
+
155
+
156
+ # ── District structural capacity ──────────────────────────────────────────────
157
+
158
+ def _capacity_features(df: pd.DataFrame) -> pd.DataFrame:
159
+ """
160
+ district_capacity: expanding mean of lag1 β€” the district's long-run level.
161
+ blended_capacity : when history is short (<3 years), blend district mean
162
+ with state mean to reduce cold-start noise.
163
+ """
164
+ df["district_capacity"] = df.groupby(["state", "district"])["lag1_pd"].transform(
165
+ lambda s: s.expanding().mean()
166
+ )
167
+ df["history_length"] = df.groupby(["state", "district"]).cumcount()
168
+
169
+ state_mean = df.groupby(["state", "financial_year"])["lag1_pd"].transform("mean")
170
+ df["blended_capacity"] = np.where(
171
+ df["history_length"] < 3,
172
+ 0.5 * df["district_capacity"].fillna(state_mean) + 0.5 * state_mean,
173
+ df["district_capacity"]
174
+ )
175
+
176
+ # How anomalous was last year vs the district's own history?
177
+ df["lag1_vs_capacity"] = (
178
+ df["lag1_pd"] / df["blended_capacity"].replace(0, np.nan)
179
+ ).clip(0, 5).fillna(1.0)
180
+
181
+ # Lagged household ratio (demand signal β€” uses only lagged values)
182
+ df["lag1_hh_ratio"] = (
183
+ df["lag1_hh"] / df["blended_capacity"].replace(0, np.nan)
184
+ ).clip(0, 5).fillna(1.0)
185
+
186
+ return df
187
+
188
+
189
+ # ── Anomaly detection ─────────────────────────────────────────────────────────
190
+
191
+ def _rolling_zscore(s: pd.Series) -> pd.Series:
192
+ """Z-score of each value vs its own expanding historical mean/std."""
193
+ exp_mean = s.shift(1).expanding().mean()
194
+ exp_std = s.shift(1).expanding().std().fillna(1).replace(0, 1)
195
+ return ((s - exp_mean) / exp_std).clip(-4, 4)
196
+
197
+
198
+ def _anomaly_features(df: pd.DataFrame) -> pd.DataFrame:
199
+ """
200
+ Detect when lag1_pd is anomalous for this district or state.
201
+ The model uses these to discount or adjust its reliance on lag1
202
+ when it was an outlier year (e.g. West Bengal in 2022).
203
+ """
204
+ # District-level z-score of lag1
205
+ df["lag1_zscore"] = df.groupby(["state", "district"])[TARGET].transform(
206
+ lambda s: _rolling_zscore(s).shift(1)
207
+ ).fillna(0)
208
+
209
+ df["lag1_extreme"] = (df["lag1_zscore"].abs() > 2.5).astype(int)
210
+
211
+ return df
212
+
213
+
214
+ # ── State-level features ──────────────────────────────────────────────────────
215
+
216
+ def _state_features(df: pd.DataFrame) -> pd.DataFrame:
217
+ """
218
+ State-level lag and z-score. Captures state budget decisions and
219
+ policy changes that affect all districts simultaneously.
220
+ """
221
+ # State total person_days per year
222
+ state_yr = (
223
+ df.groupby(["state", "financial_year"])[TARGET]
224
+ .sum().reset_index()
225
+ .rename(columns={TARGET: "state_total"})
226
+ )
227
+ state_yr["state_total_lag1"] = state_yr.groupby("state")["state_total"].shift(1)
228
+
229
+ # State z-score of lag1
230
+ state_yr["state_lag1_zscore"] = state_yr.groupby("state")["state_total"].transform(
231
+ lambda s: _rolling_zscore(s)
232
+ )
233
+
234
+ # Normalised state lag (state lag relative to its own history)
235
+ state_hist_mean = state_yr.groupby("state")["state_total_lag1"].transform("mean")
236
+ state_yr["state_lag1_norm"] = (
237
+ state_yr["state_total_lag1"] / state_hist_mean.replace(0, np.nan)
238
+ ).clip(0, 5).fillna(1.0)
239
+
240
+ df = df.merge(
241
+ state_yr[["state", "financial_year",
242
+ "state_lag1_zscore", "state_lag1_norm"]],
243
+ on=["state", "financial_year"],
244
+ how="left"
245
+ )
246
+
247
+ # District's position relative to state mean (its structural share)
248
+ state_yr_lag = df.groupby(["state", "financial_year"])["lag1_pd"].transform("mean")
249
+ df["relative_to_state"] = (
250
+ df["lag1_pd"] / state_yr_lag.replace(0, np.nan)
251
+ ).clip(0, 10).fillna(1.0)
252
+
253
+ return df
254
+
255
+
256
+ # ── Temporal flags ────────────────────────────────────────────────────────────
257
+
258
+ def _temporal_flags(df: pd.DataFrame) -> pd.DataFrame:
259
+ fy_min = df["financial_year"].min()
260
+ df["year_trend"] = df["financial_year"] - fy_min
261
+ df["is_covid"] = (df["financial_year"] == 2020).astype(int)
262
+ df["is_post_covid"] = (df["financial_year"] >= 2021).astype(int)
263
+ df["is_2022_anomaly"] = (df["financial_year"] == 2022).astype(int)
264
+ return df
265
+
266
+
267
+ # ── Wage features ─────────────────────────────────────────────────────────────
268
+
269
+ def _wage_features(df: pd.DataFrame) -> pd.DataFrame:
270
+ """
271
+ avg_wage_rate is the official state-notified wage schedule β€” genuinely
272
+ exogenous (set by government, not derived from person_days).
273
+ wage_yoy captures the policy signal of wage revision speed.
274
+ """
275
+ if "avg_wage_rate" not in df.columns:
276
+ return df
277
+ df["wage_yoy"] = (
278
+ df.groupby(["state", "district"])["avg_wage_rate"]
279
+ .pct_change(fill_method=None)
280
+ .fillna(0)
281
+ .clip(-0.2, 0.5)
282
+ )
283
+ return df
284
+
285
+
286
+ # ── Categorical encoding ──────────────────────────────────────────────────────
287
+
288
+ def _encode_categoricals(df: pd.DataFrame) -> pd.DataFrame:
289
+ le_state = LabelEncoder()
290
+ le_dist = LabelEncoder()
291
+ df["state_enc"] = le_state.fit_transform(df["state"].astype(str))
292
+ df["district_enc"] = le_dist.fit_transform(
293
+ (df["district"] + "|" + df["state"]).astype(str)
294
+ )
295
+ return df
296
+
297
+
298
+ # ── Feature list for model ────────────────────────────────────────────────────
299
+
300
+ # Canonical lean feature set β€” chosen by permutation importance analysis.
301
+ # All features are computed from lagged/historical values only.
302
+ FEATURE_COLS = [
303
+ "lag1_pd",
304
+ "roll2_mean",
305
+ "roll3_mean",
306
+ "lag1_adj",
307
+ "lag2_pd",
308
+ "lag3_pd",
309
+ "roll3_std",
310
+ "state_lag1_norm",
311
+ "relative_to_state",
312
+ "blended_capacity",
313
+ "lag1_vs_capacity",
314
+ "state_lag1_zscore",
315
+ "state_enc",
316
+ "is_covid",
317
+ "lag1_is_covid",
318
+ "wage_yoy",
319
+ "avg_wage_rate",
320
+ ]
src/generate_synthetic.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ generate_synthetic.py
3
+ ----------------------
4
+ Generates realistic synthetic MNREGA district-level data for Maharashtra.
5
+
6
+ Mimics the structure of real data available from:
7
+ - nregarep1.nic.in (MoRD official portal)
8
+ - dataful.in (district-wise persondays + expenditure)
9
+
10
+ Columns produced match what you'd get from real sources:
11
+ state, district, financial_year,
12
+ households_demanded, households_offered, households_availed,
13
+ person_days, expenditure_lakhs, avg_wage_rate, works_completed
14
+
15
+ Design principles for realism:
16
+ - Each district has a stable "base capacity" (some districts are
17
+ structurally larger / more active than others)
18
+ - Year-on-year growth follows real MNREGA trends (spike in 2020-21
19
+ due to COVID reverse migration, slowdown in urban-adjacent districts)
20
+ - Expenditure correlates with person_days but has noise (efficiency varies)
21
+ - Wage rate increases over years (matches real wage revision schedule)
22
+ - ~8% missing values injected randomly to simulate real data quality
23
+ """
24
+
25
+ import numpy as np
26
+ import pandas as pd
27
+ import os
28
+
29
+ # ── Maharashtra districts (all 36) ───────────────────────────────────────────
30
+ MAHARASHTRA_DISTRICTS = [
31
+ "Ahmednagar", "Akola", "Amravati", "Aurangabad", "Beed",
32
+ "Bhandara", "Buldhana", "Chandrapur", "Dhule", "Gadchiroli",
33
+ "Gondia", "Hingoli", "Jalgaon", "Jalna", "Kolhapur",
34
+ "Latur", "Mumbai City", "Mumbai Suburban", "Nagpur", "Nanded",
35
+ "Nandurbar", "Nashik", "Osmanabad", "Palghar", "Parbhani",
36
+ "Pune", "Raigad", "Ratnagiri", "Sangli", "Satara",
37
+ "Sindhudurg", "Solapur", "Thane", "Wardha", "Washim", "Yavatmal"
38
+ ]
39
+
40
+ YEARS = [
41
+ "2014-15", "2015-16", "2016-17", "2017-18", "2018-19",
42
+ "2019-20", "2020-21", "2021-22", "2022-23", "2023-24"
43
+ ]
44
+
45
+ # Real MNREGA wage rates in Maharashtra (approx β‚Ή/day by year)
46
+ WAGE_RATES = {
47
+ "2014-15": 162, "2015-16": 174, "2016-17": 183, "2017-18": 194,
48
+ "2018-19": 203, "2019-20": 213, "2020-21": 238, "2021-22": 256,
49
+ "2022-23": 273, "2023-24": 289
50
+ }
51
+
52
+ # Year-level demand multipliers based on real MNREGA trends
53
+ # COVID year (2020-21) saw massive spike due to reverse migration
54
+ YEAR_MULTIPLIERS = {
55
+ "2014-15": 0.85, "2015-16": 0.90, "2016-17": 0.92, "2017-18": 0.95,
56
+ "2018-19": 1.00, "2019-20": 1.05, "2020-21": 1.45, "2021-22": 1.20,
57
+ "2022-23": 1.10, "2023-24": 1.08
58
+ }
59
+
60
+ # District profile: (base_persondays_lakhs, efficiency_score, rural_weight)
61
+ # Urban/peri-urban districts have lower base; tribal/rural have higher
62
+ DISTRICT_PROFILES = {
63
+ "Gadchiroli": (18.5, 0.72, 0.95),
64
+ "Nandurbar": (16.2, 0.68, 0.93),
65
+ "Yavatmal": (15.8, 0.74, 0.91),
66
+ "Amravati": (14.3, 0.76, 0.88),
67
+ "Chandrapur": (13.9, 0.71, 0.87),
68
+ "Washim": (12.1, 0.73, 0.89),
69
+ "Buldhana": (11.8, 0.75, 0.86),
70
+ "Beed": (11.5, 0.70, 0.90),
71
+ "Hingoli": (10.9, 0.72, 0.88),
72
+ "Osmanabad": (10.7, 0.69, 0.87),
73
+ "Latur": (10.4, 0.71, 0.85),
74
+ "Nanded": (10.2, 0.73, 0.84),
75
+ "Jalna": (9.8, 0.74, 0.85),
76
+ "Parbhani": (9.5, 0.72, 0.84),
77
+ "Akola": (9.3, 0.75, 0.83),
78
+ "Dhule": (9.1, 0.70, 0.85),
79
+ "Gondia": (8.9, 0.76, 0.82),
80
+ "Bhandara": (8.6, 0.74, 0.81),
81
+ "Wardha": (8.3, 0.77, 0.80),
82
+ "Ahmednagar": (8.1, 0.78, 0.79),
83
+ "Solapur": (7.9, 0.76, 0.80),
84
+ "Aurangabad": (7.6, 0.79, 0.75),
85
+ "Jalgaon": (7.4, 0.77, 0.77),
86
+ "Nashik": (7.1, 0.80, 0.73),
87
+ "Satara": (6.8, 0.81, 0.74),
88
+ "Sangli": (6.5, 0.80, 0.73),
89
+ "Kolhapur": (6.2, 0.82, 0.71),
90
+ "Palghar": (6.0, 0.75, 0.78),
91
+ "Nandurbar": (5.8, 0.71, 0.82),
92
+ "Ratnagiri": (5.5, 0.79, 0.74),
93
+ "Sindhudurg": (5.1, 0.80, 0.72),
94
+ "Raigad": (4.8, 0.78, 0.68),
95
+ "Pune": (4.2, 0.83, 0.55),
96
+ "Thane": (3.5, 0.81, 0.45),
97
+ "Mumbai Suburban": (1.2, 0.85, 0.15),
98
+ "Mumbai City": (0.4, 0.88, 0.05),
99
+ }
100
+
101
+
102
+ def generate(seed: int = 42, missing_rate: float = 0.08) -> pd.DataFrame:
103
+ """
104
+ Generate a synthetic MNREGA dataset for Maharashtra.
105
+
106
+ Args:
107
+ seed : Random seed for reproducibility.
108
+ missing_rate: Fraction of cells to nullify (simulates real data gaps).
109
+
110
+ Returns:
111
+ DataFrame with realistic MNREGA data.
112
+ """
113
+ rng = np.random.default_rng(seed)
114
+ records = []
115
+
116
+ for district in MAHARASHTRA_DISTRICTS:
117
+ profile = DISTRICT_PROFILES.get(district, (7.0, 0.75, 0.70))
118
+ base_pd, efficiency, rural_w = profile
119
+
120
+ for year in YEARS:
121
+ year_mult = YEAR_MULTIPLIERS[year]
122
+ wage = WAGE_RATES[year]
123
+
124
+ # ── Person days (in lakhs) ────────────────────────────────────
125
+ noise = rng.normal(1.0, 0.07)
126
+ person_days_lakhs = base_pd * year_mult * noise
127
+ person_days_lakhs = max(person_days_lakhs, 0.1)
128
+
129
+ # ── Households ───────────────────────────────────────────────
130
+ # Avg ~45 days per household β†’ households = person_days / 45
131
+ hh_demanded = int(person_days_lakhs * 1e5 / 38 * rng.uniform(1.05, 1.15))
132
+ hh_offered = int(hh_demanded * rng.uniform(0.92, 0.99))
133
+ hh_availed = int(hh_offered * rng.uniform(0.88, 0.97))
134
+
135
+ # ── Expenditure (β‚Ή lakhs) ────────────────────────────────────
136
+ # Base = person_days * wage_rate, efficiency introduces noise
137
+ base_expenditure = person_days_lakhs * 1e5 * wage / 1e5
138
+ expenditure_lakhs = base_expenditure / efficiency * rng.uniform(0.93, 1.07)
139
+
140
+ # ── Works completed ──────────────────────────────────────────
141
+ works = int(person_days_lakhs * rng.uniform(18, 35))
142
+
143
+ records.append({
144
+ "state": "Maharashtra",
145
+ "district": district,
146
+ "financial_year": year,
147
+ "households_demanded": hh_demanded,
148
+ "households_offered": hh_offered,
149
+ "households_availed": hh_availed,
150
+ "person_days_lakhs": round(person_days_lakhs, 3),
151
+ "expenditure_lakhs": round(expenditure_lakhs, 2),
152
+ "avg_wage_rate": wage,
153
+ "works_completed": works,
154
+ })
155
+
156
+ df = pd.DataFrame(records)
157
+
158
+ # ── Inject realistic missing values ──────────────────────────────────────
159
+ nullable_cols = [
160
+ "households_demanded", "households_offered",
161
+ "households_availed", "works_completed"
162
+ ]
163
+ for col in nullable_cols:
164
+ mask = rng.random(len(df)) < missing_rate
165
+ df.loc[mask, col] = np.nan
166
+
167
+ print(f"[generate] Created {len(df)} rows Γ— {len(df.columns)} columns")
168
+ print(f"[generate] Districts: {df['district'].nunique()} | Years: {df['financial_year'].nunique()}")
169
+ print(f"[generate] Missing values injected: ~{missing_rate*100:.0f}% per nullable column")
170
+
171
+ return df
172
+
173
+
174
+ def save(df: pd.DataFrame, path: str = "data/raw/mnrega_maharashtra_synthetic.csv") -> None:
175
+ os.makedirs(os.path.dirname(path), exist_ok=True)
176
+ df.to_csv(path, index=False)
177
+ print(f"[generate] Saved β†’ {path}")
178
+
179
+
180
+ if __name__ == "__main__":
181
+ df = generate()
182
+ save(df)
183
+ print("\nSample:")
184
+ print(df.head(6).to_string(index=False))
src/model.py ADDED
@@ -0,0 +1,656 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ model.py
3
+ --------
4
+ V4 Multi-Algorithm Model Selection for MNREGA district-level forecasting.
5
+
6
+ Algorithms compared via walk-forward CV:
7
+ - GradientBoostingRegressor (current champion)
8
+ - RandomForestRegressor
9
+ - XGBoost
10
+ - LightGBM
11
+ - Ridge (linear baseline)
12
+ - ElasticNet (regularised linear baseline)
13
+
14
+ Selection criterion: mean RΒ² across walk-forward CV years (excl. 2022 anomaly).
15
+ Best model is saved to models/mnrega_best_model.pkl.
16
+
17
+ W&B logging:
18
+ - Each algorithm gets its own W&B run (group="mnrega_model_selection")
19
+ - Per-year CV metrics logged as time-series
20
+ - Feature importance logged as bar chart
21
+ - Model comparison summary table logged
22
+ - Best model flagged with tag "champion"
23
+
24
+ Usage:
25
+ export WANDB_API_KEY=your_key # or wandb login
26
+ python main.py --stage 3
27
+ """
28
+
29
+ import os
30
+ import pickle
31
+ import warnings
32
+ import numpy as np
33
+ import pandas as pd
34
+ import matplotlib
35
+ matplotlib.use("Agg")
36
+ import matplotlib.pyplot as plt
37
+
38
+ from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
39
+ from sklearn.linear_model import Ridge, ElasticNet
40
+ from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
41
+ from sklearn.preprocessing import StandardScaler
42
+ from sklearn.pipeline import Pipeline
43
+
44
+ warnings.filterwarnings("ignore")
45
+
46
+ # Optional imports β€” graceful fallback if not installed
47
+ try:
48
+ from xgboost import XGBRegressor
49
+ HAS_XGB = True
50
+ except ImportError:
51
+ HAS_XGB = False
52
+ print("[model] xgboost not installed β€” skipping")
53
+
54
+ try:
55
+ from lightgbm import LGBMRegressor
56
+ HAS_LGB = True
57
+ except ImportError:
58
+ HAS_LGB = False
59
+ print("[model] lightgbm not installed β€” skipping")
60
+
61
+ try:
62
+ import wandb
63
+ HAS_WANDB = True
64
+ except ImportError:
65
+ HAS_WANDB = False
66
+ print("[model] wandb not installed β€” metrics will be logged locally only")
67
+
68
+ from src.features import FEATURE_COLS
69
+
70
+ TARGET = "person_days_lakhs"
71
+ FIGURES_DIR = os.path.join("reports", "figures")
72
+ OUTPUT_DIR = os.path.join("data", "processed")
73
+ MODELS_DIR = "models"
74
+ MODEL_PATH = os.path.join(MODELS_DIR, "mnrega_best_model.pkl")
75
+ WANDB_PROJECT = "SchemeImpactNet"
76
+ WANDB_GROUP = "mnrega_model_selection"
77
+
78
+ os.makedirs(FIGURES_DIR, exist_ok=True)
79
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
80
+ os.makedirs(MODELS_DIR, exist_ok=True)
81
+
82
+ # Walk-forward CV test years
83
+ WF_TEST_YEARS = [2018, 2019, 2020, 2021, 2022, 2023, 2024]
84
+
85
+ # ── Algorithm registry ────────────────────────────────────────────────────────
86
+ def _build_candidates() -> dict:
87
+ """
88
+ Returns dict of {name: estimator}.
89
+ Each estimator is either a plain sklearn estimator or a Pipeline
90
+ (for linear models that need scaling).
91
+ """
92
+ candidates = {
93
+ "GradientBoosting": GradientBoostingRegressor(
94
+ n_estimators=200, max_depth=4, learning_rate=0.03,
95
+ subsample=0.7, min_samples_leaf=10, random_state=42,
96
+ ),
97
+ "RandomForest": RandomForestRegressor(
98
+ n_estimators=300, max_depth=8, min_samples_leaf=10,
99
+ n_jobs=-1, random_state=42,
100
+ ),
101
+ "Ridge": Pipeline([
102
+ ("scaler", StandardScaler()),
103
+ ("model", Ridge(alpha=10.0)),
104
+ ]),
105
+ "ElasticNet": Pipeline([
106
+ ("scaler", StandardScaler()),
107
+ ("model", ElasticNet(alpha=0.1, l1_ratio=0.5, max_iter=2000)),
108
+ ]),
109
+ }
110
+ if HAS_XGB:
111
+ candidates["XGBoost"] = XGBRegressor(
112
+ n_estimators=200, max_depth=4, learning_rate=0.03,
113
+ subsample=0.7, colsample_bytree=0.8,
114
+ reg_alpha=0.1, reg_lambda=1.0,
115
+ random_state=42, verbosity=0,
116
+ )
117
+ if HAS_LGB:
118
+ candidates["LightGBM"] = LGBMRegressor(
119
+ n_estimators=200, max_depth=4, learning_rate=0.03,
120
+ subsample=0.7, colsample_bytree=0.8,
121
+ reg_alpha=0.1, reg_lambda=1.0,
122
+ random_state=42, verbosity=-1,
123
+ )
124
+ return candidates
125
+
126
+
127
+ # ── Main entry point ──────────────────────────────────────────────────────────
128
+
129
+ def run_model(df: pd.DataFrame) -> pd.DataFrame:
130
+ """
131
+ Full model selection pipeline:
132
+ 1. Walk-forward CV for each algorithm candidate
133
+ 2. Select best by mean RΒ² (excl. 2022)
134
+ 3. Train winner on all data
135
+ 4. Save model + metadata pkl
136
+ 5. Generate figures + W&B logs
137
+ 6. Return predictions DataFrame
138
+ """
139
+ print("\n[model] ── V4 Multi-Algorithm Model Selection ───────────────")
140
+
141
+ features = _get_features(df)
142
+ print(f"[model] Features ({len(features)}): {features}")
143
+ print(f"[model] Algorithms: {list(_build_candidates().keys())}")
144
+
145
+ candidates = _build_candidates()
146
+
147
+ # ── Walk-forward CV for all candidates ───────���────────────────────────
148
+ all_cv_results = {}
149
+ for name, estimator in candidates.items():
150
+ print(f"\n[model] ── {name} ──")
151
+ cv = _walk_forward_cv(df, features, estimator, name)
152
+ all_cv_results[name] = cv
153
+
154
+ # ── Select best model ─────────────────────────────────────────────────
155
+ best_name, best_cv = _select_best(all_cv_results)
156
+ print(f"\n[model] βœ“ Best model: {best_name}")
157
+
158
+ # ── Print full comparison table ───────────────────────────────────────
159
+ _print_comparison_table(all_cv_results)
160
+
161
+ # ── Train winner on all data ──────────────────────────────────────────
162
+ print(f"\n[model] Training {best_name} on all {len(df):,} district-years...")
163
+ best_estimator = candidates[best_name]
164
+ X_all = df[features].fillna(0)
165
+ y_all = df[TARGET]
166
+ best_estimator.fit(X_all, y_all)
167
+
168
+ # ── Log to W&B ────────────────────────────────────────────────────────
169
+ if HAS_WANDB:
170
+ _wandb_log_all(all_cv_results, best_name, best_estimator, features, df)
171
+
172
+ # ── Save best model ───────────────────────────────────────────────────
173
+ _save_model(best_name, best_estimator, features, best_cv, all_cv_results, df)
174
+
175
+ # ── Figures ───────────────────────────────────────────────────────────
176
+ _plot_model_comparison(all_cv_results, best_name)
177
+ _plot_cv_per_year(all_cv_results, best_name)
178
+ _plot_feature_importance(best_name, best_estimator, features)
179
+
180
+ # ── Predictions + report ──────────────────────────────────────────────
181
+ predictions_df = _predict_all(best_estimator, df, features)
182
+ _save_predictions(predictions_df)
183
+ _save_model_report(best_name, best_cv, all_cv_results, features, best_estimator)
184
+
185
+ print("\n[model] ── V4 Pipeline Complete ─────────────────────────────\n")
186
+ return predictions_df
187
+
188
+
189
+ # ── Walk-forward CV ───────────────────────────────────────────────────────────
190
+
191
+ def _walk_forward_cv(
192
+ df: pd.DataFrame,
193
+ features: list,
194
+ estimator,
195
+ name: str,
196
+ ) -> pd.DataFrame:
197
+ """Walk-forward CV: train on years < T, evaluate on T."""
198
+ print(f" {'Year':<6} {'n':>5} {'RΒ²':>8} {'MAE':>8} {'RMSE':>8} {'Naive RΒ²':>10} {'RΒ² gain':>8}")
199
+ print(f" {'-'*68}")
200
+
201
+ rows = []
202
+ for test_yr in WF_TEST_YEARS:
203
+ tr = df[df["financial_year"] < test_yr]
204
+ te = df[df["financial_year"] == test_yr]
205
+ if len(tr) < 200 or len(te) < 50:
206
+ continue
207
+
208
+ import copy
209
+ m = copy.deepcopy(estimator)
210
+ m.fit(tr[features].fillna(0), tr[TARGET])
211
+ pred = m.predict(te[features].fillna(0))
212
+ naive = te["lag1_pd"].fillna(te[TARGET].mean()).values
213
+
214
+ r2 = r2_score(te[TARGET], pred)
215
+ mae = mean_absolute_error(te[TARGET], pred)
216
+ rmse = np.sqrt(mean_squared_error(te[TARGET], pred))
217
+ naive_r2 = r2_score(te[TARGET], naive)
218
+ naive_mae = mean_absolute_error(te[TARGET], naive)
219
+ mape = np.mean(np.abs((te[TARGET].values - pred) / (te[TARGET].values + 1e-9))) * 100
220
+
221
+ print(f" {test_yr:<6} {len(te):>5} {r2:>8.4f} {mae:>8.3f} {rmse:>8.3f} "
222
+ f"{naive_r2:>10.4f} {r2-naive_r2:>+8.4f}")
223
+
224
+ rows.append({
225
+ "year": test_yr, "n": len(te),
226
+ "r2": round(r2, 4),
227
+ "mae": round(mae, 3),
228
+ "rmse": round(rmse, 3),
229
+ "mape": round(mape, 3),
230
+ "naive_r2": round(naive_r2, 4),
231
+ "naive_mae": round(naive_mae, 3),
232
+ "r2_gain": round(r2 - naive_r2, 4),
233
+ "mae_gain": round(naive_mae - mae, 3),
234
+ })
235
+
236
+ cv = pd.DataFrame(rows)
237
+ ex22 = cv[cv["year"] != 2022]
238
+ print(f" β†’ Mean RΒ²={cv['r2'].mean():.4f} excl.2022 RΒ²={ex22['r2'].mean():.4f} "
239
+ f"MAE={cv['mae'].mean():.3f}L")
240
+ return cv
241
+
242
+
243
+ # ── Model selection ───────────────────────────────────────────────────────────
244
+
245
+ def _select_best(all_cv: dict) -> tuple:
246
+ """Select best model by mean RΒ² excluding 2022 anomaly year."""
247
+ scores = {}
248
+ for name, cv in all_cv.items():
249
+ ex22 = cv[cv["year"] != 2022]
250
+ scores[name] = ex22["r2"].mean()
251
+
252
+ best_name = max(scores, key=scores.get)
253
+ print(f"\n[model] Model selection (mean RΒ² excl. 2022):")
254
+ for name, score in sorted(scores.items(), key=lambda x: -x[1]):
255
+ marker = " ← BEST" if name == best_name else ""
256
+ print(f" {name:<20}: {score:.4f}{marker}")
257
+
258
+ return best_name, all_cv[best_name]
259
+
260
+
261
+ def _print_comparison_table(all_cv: dict) -> None:
262
+ print(f"\n[model] Full comparison (all years):")
263
+ print(f" {'Model':<20} {'RΒ²':>8} {'excl22 RΒ²':>10} {'MAE':>8} {'RMSE':>8} {'RΒ²gain':>8}")
264
+ print(f" {'-'*72}")
265
+ for name, cv in all_cv.items():
266
+ ex22 = cv[cv["year"] != 2022]
267
+ print(f" {name:<20} {cv['r2'].mean():>8.4f} {ex22['r2'].mean():>10.4f} "
268
+ f"{cv['mae'].mean():>8.3f} {cv['rmse'].mean():>8.3f} "
269
+ f"{cv['r2_gain'].mean():>+8.4f}")
270
+
271
+
272
+ # ── W&B logging ───────────────────────────────────────────────────────────────
273
+
274
+ def _wandb_log_all(
275
+ all_cv: dict,
276
+ best_name: str,
277
+ best_estimator,
278
+ features: list,
279
+ df: pd.DataFrame,
280
+ ) -> None:
281
+ """Log all model results to W&B β€” one run per algorithm + one summary run."""
282
+
283
+ # ── Per-algorithm runs ────────────────────────────────────────────────
284
+ for name, cv in all_cv.items():
285
+ ex22 = cv[cv["year"] != 2022]
286
+ tags = ["champion"] if name == best_name else []
287
+
288
+ run = wandb.init(
289
+ project=WANDB_PROJECT,
290
+ group=WANDB_GROUP,
291
+ name=name,
292
+ tags=tags,
293
+ config={
294
+ "algorithm": name,
295
+ "n_features": len(features),
296
+ "features": features,
297
+ "wf_test_years": WF_TEST_YEARS,
298
+ "target": TARGET,
299
+ "is_best": name == best_name,
300
+ },
301
+ reinit=True,
302
+ )
303
+
304
+ # Per-year CV metrics as time series
305
+ for _, row in cv.iterrows():
306
+ run.log({
307
+ "year": int(row["year"]),
308
+ "r2": row["r2"],
309
+ "mae": row["mae"],
310
+ "rmse": row["rmse"],
311
+ "mape": row["mape"],
312
+ "naive_r2": row["naive_r2"],
313
+ "r2_gain": row["r2_gain"],
314
+ "mae_gain": row["mae_gain"],
315
+ "is_anomaly_year": int(row["year"]) == 2022,
316
+ })
317
+
318
+ # Summary metrics
319
+ run.summary.update({
320
+ "cv_mean_r2": round(cv["r2"].mean(), 4),
321
+ "cv_ex22_r2": round(ex22["r2"].mean(), 4),
322
+ "cv_mean_mae": round(cv["mae"].mean(), 3),
323
+ "cv_mean_rmse": round(cv["rmse"].mean(), 3),
324
+ "cv_mean_mape": round(cv["mape"].mean(), 3),
325
+ "cv_r2_gain": round(cv["r2_gain"].mean(), 4),
326
+ "n_districts": df["district"].nunique(),
327
+ "n_states": df["state"].nunique(),
328
+ "train_years": len(df["financial_year"].unique()),
329
+ })
330
+
331
+ # Feature importance (tree-based only)
332
+ fi = _get_feature_importance(name, best_estimator if name == best_name else None, features)
333
+ if fi is not None and name == best_name:
334
+ fi_table = wandb.Table(
335
+ columns=["feature", "importance"],
336
+ data=[[f, v] for f, v in sorted(fi.items(), key=lambda x: -x[1])]
337
+ )
338
+ run.log({"feature_importance": wandb.plot.bar(
339
+ fi_table, "feature", "importance",
340
+ title=f"Feature Importance β€” {name}"
341
+ )})
342
+
343
+ # CV RΒ² chart per year
344
+ cv_table = wandb.Table(dataframe=cv[["year","r2","naive_r2","mae","rmse","r2_gain"]])
345
+ run.log({
346
+ "cv_results_table": cv_table,
347
+ "cv_r2_chart": wandb.plot.line_series(
348
+ xs=cv["year"].tolist(),
349
+ ys=[cv["r2"].tolist(), cv["naive_r2"].tolist()],
350
+ keys=["Model RΒ²", "Naive RΒ²"],
351
+ title=f"Walk-Forward CV RΒ² β€” {name}",
352
+ xname="Financial Year",
353
+ ),
354
+ })
355
+
356
+ run.finish()
357
+
358
+ # ── Summary comparison run ────────────────────────────────────────────
359
+ run = wandb.init(
360
+ project=WANDB_PROJECT,
361
+ group=WANDB_GROUP,
362
+ name="model_selection_summary",
363
+ tags=["summary"],
364
+ reinit=True,
365
+ )
366
+
367
+ summary_rows = []
368
+ for name, cv in all_cv.items():
369
+ ex22 = cv[cv["year"] != 2022]
370
+ summary_rows.append([
371
+ name,
372
+ round(cv["r2"].mean(), 4),
373
+ round(ex22["r2"].mean(), 4),
374
+ round(cv["mae"].mean(), 3),
375
+ round(cv["rmse"].mean(), 3),
376
+ round(cv["mape"].mean(), 3),
377
+ round(cv["r2_gain"].mean(), 4),
378
+ name == best_name,
379
+ ])
380
+
381
+ summary_table = wandb.Table(
382
+ columns=["model", "mean_r2", "ex22_r2", "mean_mae",
383
+ "mean_rmse", "mean_mape", "r2_gain", "is_best"],
384
+ data=summary_rows,
385
+ )
386
+ run.log({
387
+ "model_comparison": summary_table,
388
+ "best_model": best_name,
389
+ "best_ex22_r2": round(all_cv[best_name][all_cv[best_name]["year"] != 2022]["r2"].mean(), 4),
390
+ })
391
+
392
+ # Comparison bar chart
393
+ run.log({
394
+ "r2_comparison": wandb.plot.bar(
395
+ wandb.Table(
396
+ columns=["model", "ex22_r2"],
397
+ data=[[r[0], r[2]] for r in summary_rows]
398
+ ),
399
+ "model", "ex22_r2",
400
+ title="Model Comparison β€” RΒ² excl. 2022",
401
+ )
402
+ })
403
+
404
+ run.finish()
405
+ print(f"[model] W&B logs complete β†’ project: {WANDB_PROJECT} / group: {WANDB_GROUP}")
406
+
407
+
408
+ # ── Figures ───────────────────────────────────────────────────────────────────
409
+
410
+ def _plot_model_comparison(all_cv: dict, best_name: str) -> None:
411
+ """Bar chart comparing all models on mean RΒ² (all years and excl. 2022)."""
412
+ names = list(all_cv.keys())
413
+ mean_r2 = [all_cv[n]["r2"].mean() for n in names]
414
+ ex22_r2 = [all_cv[n][all_cv[n]["year"] != 2022]["r2"].mean() for n in names]
415
+ mean_mae = [all_cv[n]["mae"].mean() for n in names]
416
+
417
+ x = np.arange(len(names))
418
+ w = 0.35
419
+
420
+ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
421
+
422
+ bars1 = ax1.bar(x - w/2, mean_r2, w, label="All years", alpha=0.8, color="#42A5F5")
423
+ bars2 = ax1.bar(x + w/2, ex22_r2, w, label="excl. 2022", alpha=0.8, color="#26A69A")
424
+ ax1.set_xticks(x); ax1.set_xticklabels(names, rotation=20, ha="right")
425
+ ax1.set_ylabel("Mean RΒ² (Walk-Forward CV)")
426
+ ax1.set_title("Model Comparison β€” RΒ² Score")
427
+ ax1.set_ylim(0, 1)
428
+ ax1.legend()
429
+ # Annotate best
430
+ best_idx = names.index(best_name)
431
+ ax1.annotate("β˜… BEST", xy=(best_idx + w/2, ex22_r2[best_idx] + 0.01),
432
+ ha="center", color="#E53935", fontsize=9, fontweight="bold")
433
+
434
+ bars3 = ax2.bar(x, mean_mae, alpha=0.8,
435
+ color=["#E53935" if n == best_name else "#78909C" for n in names])
436
+ ax2.set_xticks(x); ax2.set_xticklabels(names, rotation=20, ha="right")
437
+ ax2.set_ylabel("Mean MAE (lakh person-days)")
438
+ ax2.set_title("Model Comparison β€” MAE")
439
+ for bar in bars3:
440
+ ax2.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.1,
441
+ f"{bar.get_height():.2f}", ha="center", va="bottom", fontsize=8)
442
+
443
+ plt.suptitle("SchemeImpactNet V4 β€” Algorithm Selection Results", fontsize=12, fontweight="bold")
444
+ plt.tight_layout()
445
+ path = os.path.join(FIGURES_DIR, "06_model_comparison.png")
446
+ plt.savefig(path, dpi=150, bbox_inches="tight")
447
+ plt.close()
448
+ print(f"[model] Saved: {path}")
449
+
450
+
451
+ def _plot_cv_per_year(all_cv: dict, best_name: str) -> None:
452
+ """Line chart: RΒ² per year for every algorithm."""
453
+ fig, axes = plt.subplots(1, 2, figsize=(14, 5))
454
+
455
+ colors = plt.cm.tab10(np.linspace(0, 1, len(all_cv)))
456
+ for (name, cv), color in zip(all_cv.items(), colors):
457
+ lw = 2.5 if name == best_name else 1.2
458
+ ls = "-" if name == best_name else "--"
459
+ alpha = 1.0 if name == best_name else 0.65
460
+ axes[0].plot(cv["year"], cv["r2"], marker="o", label=name,
461
+ linewidth=lw, linestyle=ls, alpha=alpha, color=color)
462
+ axes[1].plot(cv["year"], cv["mae"], marker="o", label=name,
463
+ linewidth=lw, linestyle=ls, alpha=alpha, color=color)
464
+
465
+ for ax in axes:
466
+ ax.axvspan(2021.5, 2022.5, alpha=0.08, color="red", label="2022 anomaly")
467
+ ax.axvspan(2019.5, 2020.5, alpha=0.05, color="orange", label="COVID-2020")
468
+ ax.set_xticks(WF_TEST_YEARS)
469
+ ax.set_xlabel("Financial Year")
470
+ ax.legend(fontsize=8)
471
+
472
+ axes[0].set_ylabel("RΒ²"); axes[0].set_title("Walk-Forward CV RΒ² by Year")
473
+ axes[1].set_ylabel("MAE (lakh PD)"); axes[1].set_title("Walk-Forward CV MAE by Year")
474
+
475
+ plt.suptitle("All Models β€” Walk-Forward CV Results", fontsize=12, fontweight="bold")
476
+ plt.tight_layout()
477
+ path = os.path.join(FIGURES_DIR, "07_cv_per_year.png")
478
+ plt.savefig(path, dpi=150, bbox_inches="tight")
479
+ plt.close()
480
+ print(f"[model] Saved: {path}")
481
+
482
+
483
+ def _plot_feature_importance(name: str, estimator, features: list) -> None:
484
+ fi = _get_feature_importance(name, estimator, features)
485
+ if fi is None:
486
+ return
487
+ imp = pd.Series(fi).sort_values()
488
+ fig, ax = plt.subplots(figsize=(8, max(5, len(imp) * 0.35)))
489
+ colors = ["#E53935" if imp[f] > imp.quantile(0.75) else "#42A5F5" for f in imp.index]
490
+ imp.plot(kind="barh", ax=ax, color=colors)
491
+ ax.set_title(f"Feature Importances β€” {name} (Best Model)")
492
+ ax.set_xlabel("Importance Score")
493
+ plt.tight_layout()
494
+ path = os.path.join(FIGURES_DIR, "08_feature_importance.png")
495
+ plt.savefig(path, dpi=150, bbox_inches="tight")
496
+ plt.close()
497
+ print(f"[model] Saved: {path}")
498
+ print(f"\n[model] Top 5 features ({name}):")
499
+ for feat, val in imp.sort_values(ascending=False).head(5).items():
500
+ print(f" {feat:<35}: {val:.4f}")
501
+
502
+
503
+ def _get_feature_importance(name: str, estimator, features: list):
504
+ """Extract feature importance β€” works for tree models and linear models."""
505
+ if estimator is None:
506
+ return None
507
+ try:
508
+ # Tree-based: direct feature_importances_
509
+ if hasattr(estimator, "feature_importances_"):
510
+ return dict(zip(features, estimator.feature_importances_))
511
+ # Pipeline with tree inside
512
+ if hasattr(estimator, "named_steps"):
513
+ inner = list(estimator.named_steps.values())[-1]
514
+ if hasattr(inner, "feature_importances_"):
515
+ return dict(zip(features, inner.feature_importances_))
516
+ if hasattr(inner, "coef_"):
517
+ return dict(zip(features, np.abs(inner.coef_)))
518
+ # XGBoost / LightGBM
519
+ if hasattr(estimator, "feature_importances_"):
520
+ return dict(zip(features, estimator.feature_importances_))
521
+ except Exception:
522
+ pass
523
+ return None
524
+
525
+
526
+ # ── Model persistence ─────────────────────────────────────────────────────────
527
+
528
+ def _save_model(
529
+ best_name: str,
530
+ best_estimator,
531
+ features: list,
532
+ best_cv: pd.DataFrame,
533
+ all_cv: dict,
534
+ df: pd.DataFrame,
535
+ ) -> None:
536
+ ex22 = best_cv[best_cv["year"] != 2022]
537
+
538
+ # Build comparison summary for the bundle
539
+ comparison = {}
540
+ for name, cv in all_cv.items():
541
+ e22 = cv[cv["year"] != 2022]
542
+ comparison[name] = {
543
+ "mean_r2": round(cv["r2"].mean(), 4),
544
+ "ex22_r2": round(e22["r2"].mean(), 4),
545
+ "mean_mae": round(cv["mae"].mean(), 3),
546
+ "mean_rmse": round(cv["rmse"].mean(), 3),
547
+ }
548
+
549
+ bundle = {
550
+ "model": best_estimator,
551
+ "model_name": best_name,
552
+ "features": features,
553
+ "target": TARGET,
554
+ "covid_multiplier": 1.447,
555
+ "train_years": sorted(df["financial_year"].unique().tolist()),
556
+ "n_districts": df["district"].nunique(),
557
+ "n_states": df["state"].nunique(),
558
+ "feature_importance": _get_feature_importance(best_name, best_estimator, features),
559
+ "cv_results": best_cv.to_dict(),
560
+ "cv_mean_r2": round(best_cv["r2"].mean(), 4),
561
+ "cv_ex22_r2": round(ex22["r2"].mean(), 4),
562
+ "cv_mean_mae": round(best_cv["mae"].mean(), 3),
563
+ "all_model_comparison": comparison,
564
+ }
565
+ with open(MODEL_PATH, "wb") as f:
566
+ pickle.dump(bundle, f)
567
+ print(f"\n[model] Model saved β†’ {MODEL_PATH}")
568
+ print(f"[model] Best: {best_name} | ex22 RΒ²={ex22['r2'].mean():.4f} | MAE={best_cv['mae'].mean():.3f}L")
569
+
570
+
571
+ def load_model(path: str = MODEL_PATH) -> dict:
572
+ """Load the saved best model bundle."""
573
+ with open(path, "rb") as f:
574
+ bundle = pickle.load(f)
575
+ print(f"[model] Loaded: {bundle['model_name']} from {path}")
576
+ print(f"[model] ex22 RΒ²={bundle['cv_ex22_r2']} | MAE={bundle['cv_mean_mae']}L")
577
+ return bundle
578
+
579
+
580
+ # ── Prediction helpers ────────────────────────────────────────────────────────
581
+
582
+ def _predict_all(estimator, df: pd.DataFrame, features: list) -> pd.DataFrame:
583
+ preds = estimator.predict(df[features].fillna(0))
584
+ out = df[["state", "district", "financial_year", TARGET]].copy()
585
+ out["predicted_persondays"] = preds.round(3)
586
+ out["prediction_error"] = (out[TARGET] - out["predicted_persondays"]).round(3)
587
+ out["abs_error"] = out["prediction_error"].abs()
588
+ return out
589
+
590
+
591
+ def _save_predictions(df: pd.DataFrame) -> None:
592
+ path = os.path.join(OUTPUT_DIR, "mnrega_predictions.csv")
593
+ df.to_csv(path, index=False)
594
+ print(f"[model] Predictions saved β†’ {path}")
595
+
596
+
597
+ # ── Report ────────────────────────────────────────────────────────────────────
598
+
599
+ def _save_model_report(
600
+ best_name: str,
601
+ best_cv: pd.DataFrame,
602
+ all_cv: dict,
603
+ features: list,
604
+ best_estimator,
605
+ ) -> None:
606
+ ex22 = best_cv[best_cv["year"] != 2022]
607
+ path = os.path.join("reports", "model_report.txt")
608
+ os.makedirs("reports", exist_ok=True)
609
+ with open(path, "w") as f:
610
+ f.write("SchemeImpactNet β€” V4 Model Selection Report\n")
611
+ f.write("=" * 60 + "\n\n")
612
+ f.write(f"Best Model : {best_name}\n")
613
+ f.write(f"Selection : max mean RΒ² excl. 2022 (walk-forward CV)\n")
614
+ f.write(f"Features : {len(features)}\n")
615
+ f.write(f"Evaluation : Walk-forward CV (2018–2024)\n\n")
616
+
617
+ f.write("Algorithm Comparison:\n")
618
+ f.write(f" {'Model':<20} {'RΒ²':>8} {'ex22 RΒ²':>10} {'MAE':>8} {'RMSE':>8}\n")
619
+ f.write(f" {'-'*60}\n")
620
+ for name, cv in all_cv.items():
621
+ e22 = cv[cv["year"] != 2022]
622
+ marker = " ← BEST" if name == best_name else ""
623
+ f.write(f" {name:<20} {cv['r2'].mean():>8.4f} "
624
+ f"{e22['r2'].mean():>10.4f} {cv['mae'].mean():>8.3f} "
625
+ f"{cv['rmse'].mean():>8.3f}{marker}\n")
626
+
627
+ f.write(f"\nBest Model ({best_name}) Walk-Forward CV:\n")
628
+ f.write(f" Mean RΒ² : {best_cv['r2'].mean():.4f}\n")
629
+ f.write(f" excl.2022 RΒ²: {ex22['r2'].mean():.4f}\n")
630
+ f.write(f" Mean MAE : {best_cv['mae'].mean():.3f} lakh\n")
631
+ f.write(f" Mean RMSE : {best_cv['rmse'].mean():.3f} lakh\n")
632
+ f.write(f" RΒ² gain : {best_cv['r2_gain'].mean():+.4f} vs naive lag-1\n\n")
633
+
634
+ f.write(f"Previous (leaked) RΒ²: 0.9963\n")
635
+ f.write(f"Leakage source: works_completed (r=1.0 with target)\n\n")
636
+ f.write(f"2022 anomaly: West Bengal -93 to -98% reporting drop. Excl. RΒ²={ex22['r2'].mean():.4f}\n\n")
637
+
638
+ fi = _get_feature_importance(best_name, best_estimator, features)
639
+ if fi:
640
+ f.write("Feature Importances:\n")
641
+ for feat, val in sorted(fi.items(), key=lambda x: -x[1]):
642
+ f.write(f" {feat:<35} {val:.4f}\n")
643
+
644
+ f.write(f"\nYear-by-year CV ({best_name}):\n")
645
+ f.write(best_cv.to_string(index=False))
646
+ print(f"[model] Report saved β†’ {path}")
647
+
648
+
649
+ # ── Feature list helper ───────────────────────────────────────────────────────
650
+
651
+ def _get_features(df: pd.DataFrame) -> list:
652
+ available = [f for f in FEATURE_COLS if f in df.columns]
653
+ missing = [f for f in FEATURE_COLS if f not in df.columns]
654
+ if missing:
655
+ print(f"[model] Warning: {len(missing)} features not in df: {missing}")
656
+ return available
src/optimize.py ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ optimize.py (v2 β€” proportional rank-based LP)
3
+ -----------------------------------------------
4
+ Fixes the LP bang-bang problem caused by low efficiency variance (~7.7% CV).
5
+
6
+ Root cause: With efficiency ranging only 0.0026–0.0039, pure LP pushes
7
+ every district to either MIN_FRACTION floor or MAX_FRACTION ceiling.
8
+ 462 districts hit -60%, 262 hit +150%, only 1 in-between.
9
+
10
+ Fix: Two-stage allocation
11
+ Stage 1 β€” Proportional rank allocation
12
+ Compute efficiency percentile rank (0β†’1) per district.
13
+ Assign multiplier: rank 0 β†’ 0.60Γ—, rank 1 β†’ 1.80Γ—
14
+ Rescale to preserve total budget.
15
+ β†’ Produces a continuous, meaningful spread of -40% to +80%
16
+
17
+ Stage 2 β€” LP refinement within Β±15% of stage1
18
+ Tighter LP bounds around the proportional solution.
19
+ LP fills in genuine optimality within the constrained band.
20
+ β†’ Adds economic rigour without collapsing to bang-bang.
21
+
22
+ Result: 725 unique budget_change_pct values, realistic distribution,
23
+ same total budget, higher total employment.
24
+ """
25
+
26
+ import os
27
+ import numpy as np
28
+ import pandas as pd
29
+ import matplotlib.pyplot as plt
30
+ import matplotlib.patches as mpatches
31
+ from scipy.optimize import linprog
32
+
33
+ FIGURES_DIR = os.path.join("reports", "figures")
34
+ OUTPUT_DIR = os.path.join("data", "processed")
35
+ os.makedirs(FIGURES_DIR, exist_ok=True)
36
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
37
+
38
+ # Stage 1 bounds
39
+ RANK_FLOOR = 0.60 # worst district keeps 60% of budget β†’ -40%
40
+ RANK_CEIL = 1.80 # best district gets 180% of budget β†’ +80%
41
+
42
+ # Stage 2 LP refinement band around stage1
43
+ LP_REFINE_BAND = 0.15 # Β±15% around stage1 solution
44
+
45
+ # Hard absolute limits
46
+ ABS_MIN_FRACTION = 0.40
47
+ ABS_MAX_FRACTION = 2.00
48
+
49
+
50
+ def run_optimizer(
51
+ predictions_path: str = "data/processed/mnrega_predictions.csv",
52
+ raw_path: str = "data/raw/mnrega_real_data_final_clean.csv",
53
+ scope_state: str = None,
54
+ total_budget_override: float = None,
55
+ target_year: int = 2024,
56
+ ) -> pd.DataFrame:
57
+
58
+ print("\n[optimizer-v2] ── Budget Allocation Optimizer (Proportional-LP) ──")
59
+
60
+ df = _prepare_data(predictions_path, raw_path, scope_state, target_year)
61
+ result = _optimize(df, total_budget_override)
62
+ _print_summary(result)
63
+ _plot_allocation_comparison(result, scope_state or "All-India")
64
+ _plot_efficiency_gain(result, scope_state or "All-India")
65
+ _save_results(result)
66
+
67
+ print("[optimizer-v2] ── Optimization Complete ────────────────────────────\n")
68
+ return result
69
+
70
+
71
+ def _prepare_data(predictions_path, raw_path, scope_state, target_year):
72
+ preds = pd.read_csv(predictions_path)
73
+ preds = preds[preds["financial_year"] == target_year].copy()
74
+
75
+ raw = pd.read_csv(raw_path)
76
+ raw["financial_year"] = raw["financial_year"].apply(
77
+ lambda v: int(str(v).split("-")[0])
78
+ )
79
+ budget = raw[raw["financial_year"] == target_year][
80
+ ["state", "district", "budget_allocated_lakhs", "expenditure_lakhs"]
81
+ ].copy()
82
+
83
+ df = preds.merge(budget, on=["state", "district"], how="inner")
84
+ df = df.dropna(subset=["budget_allocated_lakhs", "predicted_persondays"])
85
+ df = df[df["budget_allocated_lakhs"] > 0].reset_index(drop=True)
86
+
87
+ if scope_state:
88
+ df = df[df["state"] == scope_state].reset_index(drop=True)
89
+
90
+ print(f"[optimizer-v2] Scope: {scope_state or 'All-India'} | Districts: {len(df)} | Year: {target_year}")
91
+ df["persondays_per_lakh"] = df["predicted_persondays"] / df["budget_allocated_lakhs"]
92
+ print(f"[optimizer-v2] Efficiency CV: {df['persondays_per_lakh'].std()/df['persondays_per_lakh'].mean()*100:.1f}%")
93
+ print(f"[optimizer-v2] Total budget: β‚Ή{df['budget_allocated_lakhs'].sum():,.0f} lakh")
94
+ return df
95
+
96
+
97
+ def _optimize(df: pd.DataFrame, total_budget_override: float = None) -> pd.DataFrame:
98
+ current_budgets = df["budget_allocated_lakhs"].values
99
+ efficiency = df["persondays_per_lakh"].values
100
+ total_budget = total_budget_override or current_budgets.sum()
101
+
102
+ # ── Stage 1: Proportional rank allocation ──────────────────────────────
103
+ eff_rank = pd.Series(efficiency).rank(pct=True).values # 0 β†’ 1
104
+
105
+ # Linear interpolation: worst district β†’ RANK_FLOORΓ—, best β†’ RANK_CEILΓ—
106
+ multipliers = RANK_FLOOR + eff_rank * (RANK_CEIL - RANK_FLOOR)
107
+ stage1_raw = current_budgets * multipliers
108
+
109
+ # Rescale to preserve total budget
110
+ scale = total_budget / stage1_raw.sum()
111
+ stage1 = stage1_raw * scale
112
+
113
+ print(f"[optimizer-v2] Stage 1 (proportional rank) range: "
114
+ f"{((stage1-current_budgets)/current_budgets*100).min():.1f}% to "
115
+ f"{((stage1-current_budgets)/current_budgets*100).max():.1f}%")
116
+
117
+ # ── Stage 2: LP refinement within Β±LP_REFINE_BAND of stage1 ──────────
118
+ lb = np.maximum(stage1 * (1 - LP_REFINE_BAND),
119
+ current_budgets * ABS_MIN_FRACTION)
120
+ ub = np.minimum(stage1 * (1 + LP_REFINE_BAND),
121
+ current_budgets * ABS_MAX_FRACTION)
122
+
123
+ res = linprog(
124
+ -efficiency,
125
+ A_ub=[np.ones(len(df))],
126
+ b_ub=[total_budget],
127
+ bounds=list(zip(lb, ub)),
128
+ method="highs",
129
+ )
130
+
131
+ if res.success:
132
+ optimized = res.x
133
+ print(f"[optimizer-v2] Stage 2 LP converged βœ“ | Unique values: {pd.Series(optimized.round(2)).nunique()}")
134
+ else:
135
+ print(f"[optimizer-v2] LP failed, using stage1 allocation")
136
+ optimized = stage1
137
+
138
+ df = df.copy()
139
+ df["optimized_budget"] = optimized.round(2)
140
+ df["budget_change"] = df["optimized_budget"] - df["budget_allocated_lakhs"]
141
+ df["budget_change_pct"] = (df["budget_change"] / df["budget_allocated_lakhs"] * 100).round(2)
142
+ df["sq_persondays"] = df["predicted_persondays"]
143
+ df["opt_persondays"] = (df["persondays_per_lakh"] * df["optimized_budget"]).round(3)
144
+ df["persondays_gain"] = (df["opt_persondays"] - df["sq_persondays"]).round(3)
145
+ df["persondays_gain_pct"] = (df["persondays_gain"] / df["sq_persondays"] * 100).round(2)
146
+ return df
147
+
148
+
149
+ def _print_summary(df):
150
+ sq = df["sq_persondays"].sum()
151
+ opt = df["opt_persondays"].sum()
152
+ gain = opt - sq
153
+
154
+ print(f"\n[optimizer-v2] ── Results ───────────────────────────────────────")
155
+ print(f" budget_change_pct β€” min: {df['budget_change_pct'].min():.1f}% "
156
+ f"max: {df['budget_change_pct'].max():.1f}% "
157
+ f"std: {df['budget_change_pct'].std():.1f}% "
158
+ f"unique: {df['budget_change_pct'].nunique()}")
159
+ print(f" Status quo : {sq:>10,.2f} lakh PD")
160
+ print(f" Optimized : {opt:>10,.2f} lakh PD")
161
+ print(f" Net gain : {gain:>+10,.2f} lakh PD ({gain/sq*100:+.2f}%)")
162
+ print(f" Budget : β‚Ή{df['budget_allocated_lakhs'].sum():,.0f} lakh (unchanged)")
163
+ print(f"[optimizer-v2] ────────────────────────────────────────────────────")
164
+
165
+ print("\n[optimizer-v2] Top 5 districts to INCREASE:")
166
+ print(df.nlargest(5, "persondays_gain")[
167
+ ["state","district","budget_allocated_lakhs","optimized_budget","budget_change_pct","persondays_gain"]
168
+ ].to_string(index=False))
169
+ print("\n[optimizer-v2] Top 5 districts to REDUCE:")
170
+ print(df.nsmallest(5, "budget_change")[
171
+ ["state","district","budget_allocated_lakhs","optimized_budget","budget_change_pct","persondays_gain"]
172
+ ].to_string(index=False))
173
+
174
+
175
+ def _plot_allocation_comparison(df, scope):
176
+ show = pd.concat([df.nlargest(10,"budget_change"), df.nsmallest(10,"budget_change")]).drop_duplicates()
177
+ show = show.sort_values("budget_change")
178
+ fig, ax = plt.subplots(figsize=(12, max(7, len(show)*0.4)))
179
+ x = np.arange(len(show)); w = 0.38
180
+ ax.barh(x-w/2, show["budget_allocated_lakhs"].values, height=w, color="#90CAF9", label="Status Quo")
181
+ ax.barh(x+w/2, show["optimized_budget"].values, height=w, color="#1565C0", label="Optimized")
182
+ ax.set_yticks(x); ax.set_yticklabels(show["district"], fontsize=8)
183
+ ax.set_xlabel("Budget (Rs. lakh)"); ax.set_title(f"Budget Reallocation β€” {scope}"); ax.legend()
184
+ plt.tight_layout(); _save_fig("08_budget_allocation_comparison.png")
185
+
186
+
187
+ def _plot_efficiency_gain(df, scope):
188
+ fig, ax = plt.subplots(figsize=(10, 7))
189
+ colors = df["budget_change"].apply(lambda v: "#2E7D32" if v > 0 else "#C62828")
190
+ ax.scatter(df["persondays_per_lakh"], df["budget_change_pct"], c=colors, alpha=0.55, s=40)
191
+ ax.axhline(0, color="black", linewidth=0.8, linestyle="--")
192
+ ax.set_xlabel("Efficiency (PD per β‚Ή lakh)"); ax.set_ylabel("Budget Change (%)")
193
+ ax.set_title(f"Efficiency vs Budget Change β€” {scope}")
194
+ gain = mpatches.Patch(color="#2E7D32", label="Increase"); cut = mpatches.Patch(color="#C62828", label="Decrease")
195
+ ax.legend(handles=[gain, cut]); plt.tight_layout(); _save_fig("09_efficiency_gain_by_district.png")
196
+
197
+
198
+ def _save_results(df):
199
+ cols = ["state","district","budget_allocated_lakhs","optimized_budget",
200
+ "budget_change","budget_change_pct","sq_persondays","opt_persondays",
201
+ "persondays_gain","persondays_gain_pct","persondays_per_lakh"]
202
+ path = os.path.join(OUTPUT_DIR, "optimized_budget_allocation.csv")
203
+ df[cols].sort_values("persondays_gain", ascending=False).to_csv(path, index=False)
204
+ print(f"[optimizer-v2] Saved β†’ {path}")
205
+
206
+
207
+ def _save_fig(filename):
208
+ path = os.path.join(FIGURES_DIR, filename)
209
+ plt.savefig(path, bbox_inches="tight"); plt.close()
210
+ print(f"[optimizer-v2] Saved: {path}")
src/pipeline.py ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ pipeline.py
3
+ -----------
4
+ V3 pipeline orchestrator for SchemeImpactNet.
5
+
6
+ Changes from original:
7
+ - RAW_PATH now points to the real Dataful government CSV
8
+ (confirmed 99% match with mnrega_india_unified.csv, <0.005L diff)
9
+ - Feature engineering uses V3 leak-free features (src/features.py)
10
+ - Model uses GBR V3 with walk-forward CV (src/model.py)
11
+ - Model saved to models/mnrega_gbr_v3.pkl
12
+ - Removed generate_synthetic dependency from Stage 1
13
+ - Stage 3 model comparison retained but flags honest metrics
14
+
15
+ Data sources:
16
+ Real: data/raw/20063- Dataful/mnrega-...-persondays-...csv
17
+ β†’ person_days_lakhs, households_availed (real gov data)
18
+ β†’ avg_wage_rate (official wage schedule, exogenous)
19
+ Synthetic: all other columns (rainfall, poverty, pmkisan, pmay)
20
+ β†’ EXCLUDED from V3 model features
21
+ """
22
+
23
+ import os
24
+ import pandas as pd
25
+ import numpy as np
26
+
27
+ from src.clean import clean
28
+ from src.features import build_features
29
+ from src.eda import run_eda
30
+ from src.model import run_model
31
+
32
+ # ── Data paths ────────────────────────────────────────────────────────────────
33
+ DATAFUL_PATH = os.path.join(
34
+ "data", "raw", "20063- Dataful",
35
+ "mnrega-year-month-state-and-district-wise-total-persondays-"
36
+ "and-households-engaged-in-work.csv"
37
+ )
38
+ UNIFIED_PATH = os.path.join("data", "raw", "mnrega_india_unified.csv")
39
+ PROCESSED_PATH = os.path.join("data", "processed", "mnrega_cleaned.csv")
40
+ MODEL_PATH = os.path.join("models", "mnrega_best_model.pkl")
41
+
42
+ SCOPE_LABEL = {
43
+ 1: "Maharashtra",
44
+ 2: "All-India",
45
+ 3: "All-India (V3 leak-free)",
46
+ }
47
+
48
+
49
+ def run_pipeline(stage: int = 3) -> pd.DataFrame:
50
+ assert stage in [1, 2, 3], "Stage must be 1, 2, or 3"
51
+
52
+ print("\n" + "=" * 60)
53
+ print(f" SchemeImpactNet V3 β€” Stage {stage} Pipeline")
54
+ print(f" Scope : {SCOPE_LABEL[stage]}")
55
+ print("=" * 60)
56
+
57
+ # ── Extract ───────────────────────────────────────────────────
58
+ print(f"\n[pipeline] Step 1: Extract (real government data)")
59
+ df = _load_real_data(state_filter="Maharashtra" if stage == 1 else None)
60
+
61
+ # ── Clean ─────────────────────────────────────────────────────
62
+ print(f"\n[pipeline] Step 2: Clean")
63
+ df = _clean_real(df)
64
+
65
+ # ── Features ──────────────────────────────────────────────────
66
+ print(f"\n[pipeline] Step 3: V3 Feature Engineering (leak-free)")
67
+ df = build_features(df)
68
+
69
+ # ── Save processed ────────────────────────────────────────────
70
+ os.makedirs(os.path.dirname(PROCESSED_PATH), exist_ok=True)
71
+ df.to_csv(PROCESSED_PATH, index=False)
72
+ print(f"\n[pipeline] Processed data saved β†’ {PROCESSED_PATH}")
73
+
74
+ # ── EDA ───────────────────────────────────────────────────────
75
+ print(f"\n[pipeline] Step 4: EDA")
76
+ run_eda(df, scope=SCOPE_LABEL[stage])
77
+
78
+ # ── Model ─────────────────────────────────────────────────────
79
+ print(f"\n[pipeline] Step 5: V3 Model (walk-forward CV + pkl save)")
80
+ predictions = run_model(df)
81
+
82
+ print("\n" + "=" * 60)
83
+ print(f" Stage {stage} Complete!")
84
+ print(f" Processed : {PROCESSED_PATH}")
85
+ print(f" Model : {MODEL_PATH}")
86
+ print(f" Figures : reports/figures/")
87
+ print(f" Predictions : data/processed/mnrega_predictions.csv")
88
+ print(f" Report : reports/model_report.txt")
89
+ print("=" * 60 + "\n")
90
+
91
+ return predictions
92
+
93
+
94
+ # ── Real data loader ──────────────────────────────────────────────────────────
95
+
96
+ def _load_real_data(state_filter: str = None) -> pd.DataFrame:
97
+ """
98
+ Load and pivot the Dataful government CSV from long format
99
+ (one row per district-month-category) to annual wide format
100
+ (one row per district-year with person_days_lakhs + households_availed).
101
+
102
+ Falls back to unified CSV if Dataful not found.
103
+ """
104
+ if os.path.exists(DATAFUL_PATH):
105
+ print(f"[pipeline] Loading Dataful government CSV: {DATAFUL_PATH}")
106
+ df_raw = pd.read_csv(DATAFUL_PATH)
107
+ df_raw.columns = [c.strip().lower().replace(" ", "_") for c in df_raw.columns]
108
+
109
+ # Parse fiscal year start integer from '2014-15' β†’ 2014
110
+ df_raw["fy"] = df_raw["fiscal_year"].apply(
111
+ lambda v: int(str(v).split("-")[0]) if "-" in str(v) else int(v)
112
+ )
113
+ # Exclude incomplete current fiscal year
114
+ df_raw = df_raw[df_raw["fy"] <= 2024]
115
+
116
+ # Pivot: sum monthly values to annual per district
117
+ pivot = df_raw.pivot_table(
118
+ index=["fiscal_year", "fy", "state", "district"],
119
+ columns="category",
120
+ values="value",
121
+ aggfunc="sum"
122
+ ).reset_index()
123
+ pivot.columns.name = None
124
+
125
+ # Rename to match model schema
126
+ pivot = pivot.rename(columns={
127
+ "Persondays": "person_days",
128
+ "Household": "households_availed",
129
+ "fy": "financial_year",
130
+ })
131
+ pivot["person_days_lakhs"] = (pivot["person_days"] / 1e5).round(3)
132
+
133
+ # Bring in avg_wage_rate from unified CSV (official schedule, exogenous)
134
+ if os.path.exists(UNIFIED_PATH):
135
+ df_uni = pd.read_csv(UNIFIED_PATH)
136
+ df_uni.columns = [c.strip().lower().replace(" ", "_") for c in df_uni.columns]
137
+ df_uni["financial_year"] = df_uni["financial_year"].apply(
138
+ lambda v: int(str(v).split("-")[0]) if "-" in str(v) else int(v)
139
+ )
140
+ wage_map = df_uni[["state", "financial_year", "avg_wage_rate"]].drop_duplicates()
141
+ pivot = pivot.merge(wage_map, on=["state", "financial_year"], how="left")
142
+
143
+ # Keep only needed columns
144
+ keep = ["state", "district", "financial_year",
145
+ "person_days_lakhs", "households_availed", "avg_wage_rate"]
146
+ df = pivot[[c for c in keep if c in pivot.columns]].copy()
147
+
148
+ else:
149
+ print(f"[pipeline] Dataful CSV not found, falling back to unified CSV")
150
+ print(f"[pipeline] NOTE: unified CSV contains synthetic columns β€” "
151
+ f"V3 features ignore them")
152
+ df = pd.read_csv(UNIFIED_PATH)
153
+ df.columns = [c.strip().lower().replace(" ", "_") for c in df.columns]
154
+ df["financial_year"] = df["financial_year"].apply(
155
+ lambda v: int(str(v).split("-")[0]) if "-" in str(v) else int(v)
156
+ )
157
+
158
+ if state_filter:
159
+ before = len(df)
160
+ df = df[df["state"] == state_filter].reset_index(drop=True)
161
+ print(f"[pipeline] Filtered to {state_filter}: {before} β†’ {len(df)} rows")
162
+
163
+ print(f"[pipeline] Loaded {len(df):,} rows | "
164
+ f"{df['state'].nunique()} states | "
165
+ f"{df['district'].nunique()} districts | "
166
+ f"{df['financial_year'].nunique()} years "
167
+ f"({df['financial_year'].min()}–{df['financial_year'].max()})")
168
+ return df
169
+
170
+
171
+ def _clean_real(df: pd.DataFrame) -> pd.DataFrame:
172
+ """
173
+ Lightweight clean for the real Dataful data.
174
+ The full clean() from src/clean.py expects synthetic columns β€”
175
+ we do a minimal version here.
176
+ """
177
+ df = df.sort_values(["state", "district", "financial_year"]).reset_index(drop=True)
178
+
179
+ # Strip strings
180
+ for col in df.select_dtypes(include="object").columns:
181
+ df[col] = df[col].str.strip()
182
+
183
+ # Numeric cast
184
+ for col in ["person_days_lakhs", "households_availed", "avg_wage_rate"]:
185
+ if col in df.columns:
186
+ df[col] = pd.to_numeric(df[col], errors="coerce")
187
+
188
+ # Forward-fill wage within state (official schedule rarely changes mid-year)
189
+ if "avg_wage_rate" in df.columns:
190
+ df["avg_wage_rate"] = df.groupby("state")["avg_wage_rate"].transform(
191
+ lambda s: s.ffill().bfill()
192
+ )
193
+
194
+ # Drop rows with no person_days_lakhs
195
+ before = len(df)
196
+ df = df.dropna(subset=["person_days_lakhs"]).reset_index(drop=True)
197
+ if len(df) < before:
198
+ print(f"[pipeline] Dropped {before - len(df)} rows with null person_days_lakhs")
199
+
200
+ print(f"[pipeline] Cleaned. Shape: {df.shape}")
201
+ return df
202
+
203
+
204
+ def run_optimizer_step(scope_state: str = None) -> None:
205
+ """Run the budget optimizer after predictions are generated."""
206
+ from src.optimize import run_optimizer
207
+ run_optimizer(
208
+ predictions_path=os.path.join("data", "processed", "mnrega_predictions.csv"),
209
+ raw_path=UNIFIED_PATH,
210
+ scope_state=scope_state,
211
+ target_year=2024,
212
+ )
src/streamlit_app.py DELETED
@@ -1,40 +0,0 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
- import streamlit as st
5
-
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
start.sh ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # ============================================================
3
+ # SchemeImpactNet β€” Start Script
4
+ # Usage: ./start.sh [options]
5
+ #
6
+ # Options:
7
+ # --skip-pipeline Skip data generation even if files missing
8
+ # --backend-only Start only the FastAPI backend
9
+ # --frontend-only Start only the Streamlit frontend
10
+ # --port-backend N Backend port (default: 8000)
11
+ # --port-frontend N Frontend port (default: 8501)
12
+ # --stage N Pipeline stage to run if needed (1|2|3, default: 3)
13
+ # ============================================================
14
+
15
+ set -euo pipefail
16
+
17
+ # ── Defaults ──────────────────────────────────────────────────────────────────
18
+ BACKEND_PORT=8000
19
+ FRONTEND_PORT=8501
20
+ PIPELINE_STAGE=3
21
+ SKIP_PIPELINE=false
22
+ BACKEND_ONLY=false
23
+ FRONTEND_ONLY=false
24
+ BACKEND_PID=""
25
+ FRONTEND_PID=""
26
+
27
+ # ── Always resolve project root (where this script lives) ─────────────────────
28
+ PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
29
+
30
+ # ── Colours ───────────────────────────────────────────────────────────────────
31
+ RED='\033[0;31m'
32
+ GREEN='\033[0;32m'
33
+ AMBER='\033[0;33m'
34
+ BLUE='\033[0;34m'
35
+ BOLD='\033[1m'
36
+ RESET='\033[0m'
37
+ ok() { echo -e "${GREEN} βœ“${RESET} $*"; }
38
+ info() { echo -e "${BLUE} β†’${RESET} $*"; }
39
+ warn() { echo -e "${AMBER} ⚠${RESET} $*"; }
40
+ err() { echo -e "${RED} βœ—${RESET} $*"; }
41
+ hr() { echo -e "${BOLD}──────────────────────────────────────────────────${RESET}"; }
42
+
43
+ # ── Arg parsing ───────────────────────────────────────────────────────────────
44
+ while [[ $# -gt 0 ]]; do
45
+ case $1 in
46
+ --skip-pipeline) SKIP_PIPELINE=true ;;
47
+ --backend-only) BACKEND_ONLY=true ;;
48
+ --frontend-only) FRONTEND_ONLY=true ;;
49
+ --port-backend)
50
+ BACKEND_PORT="$2"
51
+ shift
52
+ ;;
53
+ --port-frontend)
54
+ FRONTEND_PORT="$2"
55
+ shift
56
+ ;;
57
+ --stage)
58
+ PIPELINE_STAGE="$2"
59
+ shift
60
+ ;;
61
+ *) warn "Unknown option: $1" ;;
62
+ esac
63
+ shift
64
+ done
65
+
66
+ # ── Cleanup handler ───────────────────────────────────────────────────────────
67
+ cleanup() {
68
+ echo ""
69
+ hr
70
+ info "Shutting down services…"
71
+ [[ -n "$BACKEND_PID" ]] && kill "$BACKEND_PID" 2>/dev/null && ok "Backend stopped"
72
+ [[ -n "$FRONTEND_PID" ]] && kill "$FRONTEND_PID" 2>/dev/null && ok "Frontend stopped"
73
+ hr
74
+ }
75
+ trap cleanup EXIT INT TERM
76
+
77
+ # ── Banner ────────────────────────────────────────────────────────────────────
78
+ echo ""
79
+ echo -e "${BOLD} β—ˆ SchemeImpactNet β€” Service Manager${RESET}"
80
+ hr
81
+ echo ""
82
+
83
+ # ── Prerequisite checks ───────────────────────────────────────────────────────
84
+ info "Checking prerequisites…"
85
+
86
+ if ! command -v python &>/dev/null && ! command -v python3 &>/dev/null; then
87
+ err "Python not found. Install Python 3.9+."
88
+ exit 1
89
+ fi
90
+ PYTHON=$(command -v python3 2>/dev/null || command -v python)
91
+ ok "Python β†’ $($PYTHON --version 2>&1)"
92
+
93
+ if ! $PYTHON -m uvicorn --version &>/dev/null; then
94
+ warn "uvicorn not found β€” attempting install…"
95
+ $PYTHON -m pip install "uvicorn[standard]" --quiet || {
96
+ err "uvicorn install failed."
97
+ exit 1
98
+ }
99
+ fi
100
+ ok "uvicorn ready"
101
+
102
+ if ! $PYTHON -m streamlit --version &>/dev/null; then
103
+ warn "streamlit not found β€” attempting install…"
104
+ $PYTHON -m pip install streamlit --quiet || {
105
+ err "streamlit install failed."
106
+ exit 1
107
+ }
108
+ fi
109
+ STREAMLIT_VER=$($PYTHON -m streamlit --version 2>&1 | awk '{print $3}')
110
+ ok "streamlit $STREAMLIT_VER ready"
111
+
112
+ STREAMLIT_MAJOR=$(echo "$STREAMLIT_VER" | cut -d. -f1)
113
+ STREAMLIT_MINOR=$(echo "$STREAMLIT_VER" | cut -d. -f2)
114
+ if [[ "$STREAMLIT_MAJOR" -lt 1 ]] || { [[ "$STREAMLIT_MAJOR" -eq 1 ]] && [[ "$STREAMLIT_MINOR" -lt 36 ]]; }; then
115
+ warn "Streamlit $STREAMLIT_VER β€” upgrade to 1.36+ for st.navigation():"
116
+ warn " pip install --upgrade streamlit"
117
+ fi
118
+
119
+ if [[ ! -f "$PROJECT_ROOT/frontend/app.py" ]]; then
120
+ err "frontend/app.py not found at $PROJECT_ROOT/frontend/app.py"
121
+ exit 1
122
+ fi
123
+ ok "frontend/app.py found"
124
+
125
+ if [[ ! -f "$PROJECT_ROOT/backend/main.py" ]]; then
126
+ err "backend/main.py not found at $PROJECT_ROOT/backend/main.py"
127
+ exit 1
128
+ fi
129
+ ok "backend/main.py found"
130
+
131
+ echo ""
132
+
133
+ # ── Data pipeline ─────────��───────────────────────────────────────────────────
134
+ if [[ "$FRONTEND_ONLY" == false && "$SKIP_PIPELINE" == false ]]; then
135
+ PROCESSED_FILES=(
136
+ "$PROJECT_ROOT/data/processed/mnrega_cleaned.csv"
137
+ "$PROJECT_ROOT/data/processed/mnrega_predictions.csv"
138
+ "$PROJECT_ROOT/data/processed/optimized_budget_allocation.csv"
139
+ )
140
+
141
+ MISSING=false
142
+ for f in "${PROCESSED_FILES[@]}"; do
143
+ if [[ ! -f "$f" ]]; then
144
+ warn "Missing: $f"
145
+ MISSING=true
146
+ fi
147
+ done
148
+
149
+ if [[ "$MISSING" == true ]]; then
150
+ hr
151
+ info "Processed data not found β€” running Stage $PIPELINE_STAGE pipeline…"
152
+ info "This may take several minutes on first run."
153
+ hr
154
+ echo ""
155
+ cd "$PROJECT_ROOT" && $PYTHON main.py --stage "$PIPELINE_STAGE" || {
156
+ err "Pipeline failed. Check errors above."
157
+ exit 1
158
+ }
159
+ echo ""
160
+ ok "Pipeline complete"
161
+ hr
162
+ echo ""
163
+ else
164
+ ok "Processed data found β€” skipping pipeline"
165
+ for f in "${PROCESSED_FILES[@]}"; do
166
+ info " $(basename $f) ($(wc -l <"$f") rows)"
167
+ done
168
+ echo ""
169
+ fi
170
+ fi
171
+
172
+ # ── Start backend ─────────────────────────────────────────────────────────────
173
+ if [[ "$FRONTEND_ONLY" == false ]]; then
174
+ if lsof -i ":$BACKEND_PORT" &>/dev/null 2>&1; then
175
+ warn "Port $BACKEND_PORT already in use β€” stopping existing process…"
176
+ lsof -ti ":$BACKEND_PORT" | xargs kill -9 2>/dev/null || true
177
+ sleep 1
178
+ fi
179
+
180
+ info "Starting FastAPI backend on port $BACKEND_PORT…"
181
+ # Backend must run from project root so 'backend.main' import resolves
182
+ (cd "$PROJECT_ROOT" && $PYTHON -m uvicorn backend.main:app \
183
+ --host 0.0.0.0 \
184
+ --port "$BACKEND_PORT" \
185
+ --reload \
186
+ --log-level warning \
187
+ 2>&1 | sed "s/^/ [backend] /") &
188
+ BACKEND_PID=$!
189
+
190
+ info "Waiting for backend health check…"
191
+ MAX_WAIT=15
192
+ WAITED=0
193
+ until curl -sf "http://localhost:$BACKEND_PORT/health" &>/dev/null; do
194
+ sleep 1
195
+ WAITED=$((WAITED + 1))
196
+ if [[ $WAITED -ge $MAX_WAIT ]]; then
197
+ warn "Backend health check timed out after ${MAX_WAIT}s β€” continuing anyway"
198
+ break
199
+ fi
200
+ done
201
+ curl -sf "http://localhost:$BACKEND_PORT/health" &>/dev/null && ok "Backend live β†’ http://localhost:$BACKEND_PORT"
202
+ echo ""
203
+ fi
204
+
205
+ # ── Start frontend ────────────────────────────────────────────────────────────
206
+ if [[ "$BACKEND_ONLY" == false ]]; then
207
+ if lsof -i ":$FRONTEND_PORT" &>/dev/null 2>&1; then
208
+ warn "Port $FRONTEND_PORT already in use β€” stopping existing process…"
209
+ lsof -ti ":$FRONTEND_PORT" | xargs kill -9 2>/dev/null || true
210
+ sleep 1
211
+ fi
212
+
213
+ info "Starting Streamlit frontend on port $FRONTEND_PORT…"
214
+ cd "$PROJECT_ROOT/frontend"
215
+ $PYTHON -m streamlit run app.py --server.port "$FRONTEND_PORT" --server.headless true --browser.gatherUsageStats false &
216
+ FRONTEND_PID=$!
217
+ cd "$PROJECT_ROOT"
218
+
219
+ sleep 2
220
+ ok "Frontend live β†’ http://localhost:$FRONTEND_PORT"
221
+ echo ""
222
+ fi
223
+
224
+ # ── Ready banner ──────────────────────────────────────────────────────────────
225
+ hr
226
+ echo ""
227
+ echo -e "${BOLD} β—ˆ SchemeImpactNet is running${RESET}"
228
+ echo ""
229
+ [[ "$FRONTEND_ONLY" == false ]] && echo -e " ${GREEN}Backend${RESET} http://localhost:$BACKEND_PORT"
230
+ [[ "$FRONTEND_ONLY" == false ]] && echo -e " ${GREEN}API docs${RESET} http://localhost:$BACKEND_PORT/docs"
231
+ [[ "$BACKEND_ONLY" == false ]] && echo -e " ${GREEN}Dashboard${RESET} http://localhost:$FRONTEND_PORT"
232
+ echo ""
233
+ echo -e " ${BOLD}Press Ctrl+C to stop all services${RESET}"
234
+ echo ""
235
+ hr
236
+ echo ""
237
+
238
+ # ── Keep alive ────────────────────────────────────────────────────────────────
239
+ wait