| """ |
| Simulated farmer registry β pre-generates a DPI profile for every entry |
| in SAMPLE_FARMERS at import time. |
| |
| Deterministic: each farmer's profile is seeded by |
| `hashlib.md5(farmer_id)`, so the same 100 profiles appear on every |
| restart and across environments. Phone numbers match the farmer's |
| `FarmerPersona.farmer_id` via the same seed, which means DPI lookups |
| resolve consistently whether keyed by farmer_id or phone. |
| |
| Design rule: **land record acreage must plausibly support the claimed |
| quintal yield** on the FarmerPersona. If Lakshmi grows 25 quintals of |
| rice, her land record needs to be at least 0.6 ha (rice yield ~40-50 |
| q/ha in the Cauvery delta). The simulator enforces this so the credit |
| readiness assessment can trust the land record when cross-checking. |
| |
| Similarly, KCC credit limits are scaled against land holdings using |
| realistic NABARD-style formulas (Rs 50kβ150k per hectare for Tamil Nadu |
| crops), so "strong/moderate/not_yet" classifications end up grounded |
| in numbers a real lender would produce. |
| |
| No DB, no network. Entire module loads in ~20 ms. |
| """ |
|
|
| from __future__ import annotations |
|
|
| import hashlib |
| import random |
| from typing import Dict, List, Optional |
|
|
| from config import MANDI_MAP, SAMPLE_FARMERS |
|
|
| from src.dpi.models import ( |
| AadhaarProfile, |
| FarmerProfile, |
| KCCRecord, |
| LandRecord, |
| ) |
| from src.geo import haversine_km |
|
|
|
|
| |
| |
| |
| |
| |
| |
|
|
| _TAMIL_LOCAL_NAMES: Dict[str, str] = { |
| "Lakshmi": "\u0bb2\u0b9f\u0bcd\u0b9a\u0bc1\u0bae\u0bbf", |
| "Kumar": "\u0b95\u0bc1\u0bae\u0bbe\u0bb0\u0bcd", |
| "Meena": "\u0bae\u0bc0\u0ba9\u0bbe", |
| "Arun": "\u0b85\u0bb0\u0bc1\u0ba3\u0bcd", |
| "Bala": "\u0baa\u0bbe\u0bb2\u0bbe", |
| "Divya": "\u0ba4\u0bbf\u0bb5\u0bcd\u0baf\u0bbe", |
| "Ganesh": "\u0b95\u0ba3\u0bc7\u0bb7\u0bcd", |
| "Priya": "\u0baa\u0bbf\u0bb0\u0bbf\u0baf\u0bbe", |
| "Raja": "\u0bb0\u0bbe\u0b9c\u0bbe", |
| "Selvi": "\u0b9a\u0bc6\u0bb2\u0bcd\u0bb5\u0bbf", |
| "Vel": "\u0bb5\u0bc7\u0bb2\u0bcd", |
| "Saroja": "\u0b9a\u0bb0\u0bcb\u0b9c\u0bbe", |
| "Karthik": "\u0b95\u0bbe\u0bb0\u0bcd\u0ba4\u0bcd\u0ba4\u0bbf\u0b95\u0bcd", |
| "Uma": "\u0b89\u0bae\u0bbe", |
| "Murugan": "\u0bae\u0bc1\u0bb0\u0bc1\u0b95\u0ba9\u0bcd", |
| } |
|
|
|
|
| def _local_name(english_name: str) -> str: |
| """Return Tamil-script name if known, otherwise a generic placeholder. |
| |
| We don't try to transliterate programmatically β getting Tamil |
| transliteration right is non-trivial and outside the scope of a |
| demo registry. Unknown names get a deterministic Tamil placeholder |
| so the field is never empty. |
| """ |
| first = english_name.split()[0] |
| if first in _TAMIL_LOCAL_NAMES: |
| return _TAMIL_LOCAL_NAMES[first] |
| |
| return "\u0bb5\u0bbf\u0bb5\u0b9a\u0bbe\u0baf\u0bbf" |
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
|
|
| _YIELD_Q_PER_HA: Dict[str, float] = { |
| "RICE-SAMBA": 40.0, |
| "TUR-FIN": 25.0, |
| "GNUT-POD": 20.0, |
| "COT-MCU": 15.0, |
| "ONI-RED": 200.0, |
| "COP-DRY": 80.0, |
| "MZE-YEL": 50.0, |
| "URD-BLK": 7.0, |
| "MNG-GRN": 8.0, |
| "BAN-ROB": 300.0, |
| } |
|
|
|
|
| |
| |
| |
| _KCC_PER_HA: Dict[str, float] = { |
| "RICE-SAMBA": 70_000, |
| "TUR-FIN": 140_000, |
| "GNUT-POD": 55_000, |
| "COT-MCU": 85_000, |
| "ONI-RED": 95_000, |
| "COP-DRY": 90_000, |
| "MZE-YEL": 50_000, |
| "URD-BLK": 45_000, |
| "MNG-GRN": 45_000, |
| "BAN-ROB": 180_000, |
| } |
|
|
|
|
| def _seed_rng(farmer_id: str) -> random.Random: |
| """Return a deterministic RNG seeded from the farmer_id.""" |
| h = hashlib.md5(farmer_id.encode()).hexdigest() |
| return random.Random(int(h, 16)) |
|
|
|
|
| def _make_masked_aadhaar(rng: random.Random) -> str: |
| return f"XXXX-XXXX-{rng.randint(1000, 9999)}" |
|
|
|
|
| def _make_phone(rng: random.Random) -> str: |
| """Generate a realistic Indian mobile phone number.""" |
| return f"+91{rng.randint(7_000_000_000, 9_999_999_999)}" |
|
|
|
|
| def _make_kcc_number(rng: random.Random) -> str: |
| return f"KCC-TN-{rng.randint(100_000, 999_999)}" |
|
|
|
|
| def _make_survey_number(rng: random.Random) -> str: |
| """Tamil Nadu land survey numbers are like '142/3B'.""" |
| return f"{rng.randint(100, 999)}/{rng.randint(1, 9)}{rng.choice('ABCDE')}" |
|
|
|
|
| def _size_land_for_yield(commodity_id: str, claimed_quintals: float, rng: random.Random) -> float: |
| """Return a plausible land area (hectares) that supports the claimed quintals. |
| |
| Adds 20-80% headroom on top of the minimum needed area so the farmer |
| looks like they have realistic slack β a real farmer growing 25 q of |
| rice has more than the bare minimum 0.6 ha, because not all their land |
| is at peak yield. |
| """ |
| yield_per_ha = _YIELD_Q_PER_HA.get(commodity_id, 20.0) |
| min_area = claimed_quintals / max(yield_per_ha, 1.0) |
| headroom_factor = 1.0 + rng.uniform(0.2, 0.8) |
| return round(min_area * headroom_factor, 2) |
|
|
|
|
| def _make_land_record( |
| farmer, commodity_id: str, area_ha: float, rng: random.Random |
| ) -> LandRecord: |
| soil_choices = ["alluvial", "red", "black cotton", "laterite", "sandy loam"] |
| irrigation_choices = ["canal", "borewell", "tank", "rainfed"] |
| |
| lat = round(farmer.latitude + rng.uniform(-0.02, 0.02), 4) |
| lon = round(farmer.longitude + rng.uniform(-0.02, 0.02), 4) |
| |
| |
| rotation_partners = { |
| "RICE-SAMBA": ["URD-BLK", "MNG-GRN"], |
| "GNUT-POD": ["MZE-YEL"], |
| "TUR-FIN": ["MZE-YEL"], |
| "COT-MCU": ["GNUT-POD"], |
| "MZE-YEL": ["URD-BLK"], |
| } |
| crops = [commodity_id] |
| partner_pool = rotation_partners.get(commodity_id, []) |
| if partner_pool and rng.random() < 0.65: |
| crops.append(rng.choice(partner_pool)) |
|
|
| return LandRecord( |
| survey_number=_make_survey_number(rng), |
| area_hectares=area_ha, |
| soil_type=rng.choice(soil_choices), |
| irrigation_type=rng.choice(irrigation_choices), |
| gps_lat=lat, |
| gps_lon=lon, |
| crops_registered=crops, |
| nearest_mandi_id="", |
| ) |
|
|
|
|
| def _make_kcc(commodity_id: str, area_ha: float, rng: random.Random) -> KCCRecord: |
| """Generate a KCC record scaled to land holdings.""" |
| per_ha_limit = _KCC_PER_HA.get(commodity_id, 60_000) |
| base_limit = per_ha_limit * area_ha |
| |
| credit_limit = round(base_limit / 5_000) * 5_000 |
| credit_limit = max(credit_limit, 15_000) |
|
|
| |
| |
| |
| roll = rng.random() |
| if roll < 0.1: |
| util_pct = rng.uniform(0.85, 0.98) |
| elif roll < 0.3: |
| util_pct = rng.uniform(0.05, 0.35) |
| else: |
| util_pct = rng.uniform(0.35, 0.75) |
| outstanding = round(credit_limit * util_pct / 100) * 100 |
|
|
| |
| status_roll = rng.random() |
| if status_roll < 0.85: |
| repayment_status = "current" |
| elif status_roll < 0.97: |
| repayment_status = "overdue" |
| else: |
| repayment_status = "defaulted" |
|
|
| return KCCRecord( |
| kcc_number=_make_kcc_number(rng), |
| credit_limit=credit_limit, |
| outstanding=outstanding, |
| crops_financed=[commodity_id], |
| repayment_status=repayment_status, |
| last_payment_date=f"2025-{rng.randint(1, 12):02d}-{rng.randint(1, 28):02d}", |
| ) |
|
|
|
|
| |
| |
| |
|
|
|
|
| class SimulatedDPIRegistry: |
| """Pre-generates one FarmerProfile per entry in SAMPLE_FARMERS. |
| |
| Indexed by farmer_id (primary) and phone (secondary). Phone β profile |
| lookups work because each farmer's phone is deterministically seeded |
| from the same farmer_id used here. |
| """ |
|
|
| def __init__(self): |
| self._by_farmer_id: Dict[str, FarmerProfile] = {} |
| self._by_phone: Dict[str, FarmerProfile] = {} |
| self._generate_all() |
|
|
| def _generate_all(self) -> None: |
| for farmer in SAMPLE_FARMERS: |
| rng = _seed_rng(farmer.farmer_id) |
|
|
| aadhaar = AadhaarProfile( |
| aadhaar_id=_make_masked_aadhaar(rng), |
| name=farmer.name, |
| name_local=_local_name(farmer.name), |
| phone=_make_phone(rng), |
| district=farmer.location_name, |
| state="Tamil Nadu", |
| language="ta", |
| dob_year=rng.randint(1965, 1995), |
| ) |
|
|
| area_ha = _size_land_for_yield( |
| farmer.primary_commodity, farmer.quantity_quintals, rng |
| ) |
| land = _make_land_record(farmer, farmer.primary_commodity, area_ha, rng) |
|
|
| |
| |
| best_mandi_id = "" |
| best_dist = float("inf") |
| for mandi in MANDI_MAP.values(): |
| dist = haversine_km(land.gps_lat, land.gps_lon, mandi.latitude, mandi.longitude) |
| if dist < best_dist: |
| best_dist = dist |
| best_mandi_id = mandi.mandi_id |
| land.nearest_mandi_id = best_mandi_id |
|
|
| kcc = _make_kcc(farmer.primary_commodity, area_ha, rng) |
|
|
| profile = FarmerProfile( |
| aadhaar=aadhaar, |
| land_records=[land], |
| kcc=kcc, |
| ) |
| self._by_farmer_id[farmer.farmer_id] = profile |
| self._by_phone[aadhaar.phone] = profile |
|
|
| |
|
|
| def lookup_by_farmer_id(self, farmer_id: str) -> Optional[FarmerProfile]: |
| return self._by_farmer_id.get(farmer_id) |
|
|
| def lookup_by_phone(self, phone: str) -> Optional[FarmerProfile]: |
| return self._by_phone.get(phone) |
|
|
| def list_profiles(self) -> List[FarmerProfile]: |
| return list(self._by_farmer_id.values()) |
|
|
| @property |
| def profile_count(self) -> int: |
| return len(self._by_farmer_id) |
|
|
|
|
| _REGISTRY: Optional[SimulatedDPIRegistry] = None |
|
|
|
|
| def get_registry() -> SimulatedDPIRegistry: |
| """Lazy-init module-level singleton. Generated on first access.""" |
| global _REGISTRY |
| if _REGISTRY is None: |
| _REGISTRY = SimulatedDPIRegistry() |
| return _REGISTRY |
|
|