Spaces:
Sleeping
Sleeping
| # ========================================================= | |
| # FLASK API — MULTILINGUAL GRIEVANCE + XPE + GFAS | |
| # INTEGRATED GRADIENTS ONLY (PRODUCTION VERSION) | |
| # Hugging Face Spaces — Production Deployment | |
| # Multimodal: text / audio / image(evidence) support | |
| # ========================================================= | |
| from flask import Flask, request, jsonify | |
| import re | |
| import io | |
| import traceback | |
| import logging | |
| import math | |
| import os | |
| from concurrent.futures import ThreadPoolExecutor, as_completed | |
| from datetime import datetime, timezone | |
| logging.getLogger("prophet").setLevel(logging.ERROR) | |
| logging.getLogger("cmdstanpy").setLevel(logging.ERROR) | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| from prophet import Prophet | |
| import pandas as pd | |
| from PIL import Image | |
| import piexif | |
| from classification.bert_classify import ( | |
| predict as predict_category_en, | |
| get_model_and_tokenizer as get_cat_en, | |
| ) | |
| from classification.indic_bert_classify import ( | |
| predict as predict_category_indic, | |
| get_model_and_tokenizer as get_cat_indic, | |
| ) | |
| from sentiment_analysis.bert_predict import ( | |
| predict_urgency as predict_urgency_en, | |
| get_model_and_tokenizer as get_urg_en, | |
| ) | |
| from sentiment_analysis.indic_bert_predict import ( | |
| predict as predict_urgency_indic, | |
| get_model_and_tokenizer as get_urg_indic, | |
| ) | |
| from multi_modal.audio_to_text import transcribe_audio | |
| from multi_modal.image_to_text import extract_text_from_image | |
| from xpe.priority_engine import compute_priority_score | |
| from xpe.integrated_gradients_explainer import IntegratedGradientsExplainer | |
| from xpe.hybrid_explainer import generate_final_reason | |
| from gfas import audit as gfas_audit | |
| _RE_HINDI = re.compile(r'[\u0900-\u097F]') | |
| _RE_TELUGU = re.compile(r'[\u0C00-\u0C7F]') | |
| _RE_JUNK = re.compile(r'^[\d\W_]+$') | |
| UTC = timezone.utc | |
| # ========================================================= | |
| # GRIEVANCE VALIDATION — TWO-SIGNAL GATE | |
| # | |
| # Signal 1 (_CIVIC_TOPIC): Does text mention a civic issue? | |
| # Expanded to cover easy ("pothole on road") through | |
| # difficult ("residents experiencing waterborne illness | |
| # due to contaminated municipal supply") phrasings. | |
| # | |
| # Signal 2 (_COMPLAINT_INTENT): Does text express complaint | |
| # intent — duration, harm, request, frustration, urgency? | |
| # Civic topic ALONE is now sufficient (Stage 2b); intent | |
| # check remains for borderline observation texts. | |
| # | |
| # Difficulty levels handled: | |
| # EASY — simple nouns: "pothole", "no water", "garbage" | |
| # MEDIUM — descriptive: "road is very bad", "light not | |
| # working", "water not coming since 3 days" | |
| # HARD — indirect/narrative: "fell ill after drinking | |
| # tap water", "vehicle tyres burst due to road | |
| # condition", "children afraid to walk to school | |
| # because of stray dogs" | |
| # | |
| # FIX LOG (v2): | |
| # + path hole / path-hole → now matched (Roads) | |
| # + hole on road / hole in the road / big hole on main road → matched | |
| # + road full of holes / road has many holes → matched | |
| # + deep/big/large hole on road/street/lane/highway → matched | |
| # + current gone / current is gone / power gone → matched (Electricity) | |
| # + lights not working (plural) → matched | |
| # + drain is blocked / drain is choked / drainage is blocked → matched | |
| # + "is" connector tolerance added throughout drain/electricity sections | |
| # ========================================================= | |
| # ── Signal 1: Civic topic ───────────────────────────────────────────────────── | |
| _CIVIC_TOPIC = re.compile( | |
| r""" | |
| # ══════════════════════════════════════════════════════ | |
| # ROADS — easy: pothole / medium: road damaged / hard: | |
| # vehicle damaged, tyre burst, accident due to road | |
| # | |
| # FIX v2: Added path hole, hole on/in road variants, | |
| # road full of holes, big/deep/large hole on road, | |
| # "is" connector tolerance for drain-style patterns | |
| # ══════════════════════════════════════════════════════ | |
| \b(pothole|potholes|crater|craters|road\s+hole|road\s+pit | |
| |path[-\s]?hole | |
| |hole\s+(on|in|at|along)\s+(the\s+)?(road|street|lane|path|highway|footpath|pavement|sidewalk) | |
| |holes?\s+(on|in|along)\s+(road|street|lane|highway|footpath) | |
| |road\s+(full\s+of|filled\s+with|has\s+many|with\s+many)\s+holes? | |
| |(big|deep|large|huge|dangerous)\s+(hole|pit|crater)\s+(on|in|at|along)\s+(the\s+)?(road|street|lane|highway|main\s+road|footpath) | |
| |road\s+(damage|damaged|broken|crack|cracked|bad|rough | |
| |repair|condition|cave|collapse|hazard|blocked | |
| |obstruction|construction|excavation|digging|dug\s+up | |
| |not\s+repaired|not\s+fixed|in\s+bad\s+shape|in\s+poor\s+condition | |
| |is\s+(very\s+)?(bad|terrible|horrible|pathetic|worst|dangerous|unsafe|rough) | |
| |has\s+(potholes?|cracks?|damage|holes?)) | |
| |damaged\s+road|broken\s+road|bad\s+road|rough\s+road | |
| |unpaved\s+road|muddy\s+road|dirt\s+road|gravel\s+road | |
| |road\s+not\s+repaired|road\s+not\s+fixed|road\s+not\s+tarred | |
| |open\s+trench|road\s+trench|trench\s+not\s+filled|trench\s+open | |
| |speed\s+breaker\s+(missing|broken|damaged|not\s+marked) | |
| |road\s+sign\s+(missing|damaged|broken|fallen) | |
| |footpath\s+(broken|damaged|blocked|missing|encroached|not\s+there) | |
| |pavement\s+(broken|damaged|cracked|uneven|missing) | |
| |sidewalk\s+(broken|damaged|blocked|missing) | |
| |tyre\s+(burst|puncture|damaged)\s+(due\s+to|because\s+of|from)\s+(road|pothole|pit) | |
| |vehicle\s+(damaged|got\s+damaged|broke\s+down)\s+(due\s+to|because\s+of)\s+(road|pothole) | |
| |accident\s+(due\s+to|because\s+of|caused\s+by)\s+(road|pothole|road\s+condition) | |
| |fell\s+(down|off|into)\s+(pothole|pit|trench|open\s+drain) | |
| |two[-\s]wheeler\s+(fell|skidded|slipped)\s+(due\s+to|because\s+of) | |
| |bike\s+(fell|skidded|slipped|got\s+stuck)\s+(due\s+to|because\s+of) | |
| |road\s+marking\s+(faded|missing|not\s+visible|worn\s+out) | |
| |no\s+road|street\s+(is\s+)?(bad|broken|damaged|not\s+repaired))\b | |
| # ══════════════════════════════════════════════════════ | |
| # WATER SUPPLY — easy: no water / medium: pipe burst / | |
| # hard: contaminated water causing illness, muddy | |
| # water, foul smell from tap, people fell ill | |
| # ══════════════════════════════════════════════════════ | |
| | \b(water\s+(supply|issue|problem|crisis|shortage|scarcity|not\s+coming | |
| |not\s+available|disrupted|cut|stopped|irregular|leakage | |
| |leak|overflow|overflowing|tank|pressure|quality | |
| |contamination|contaminated|pollution|polluted | |
| |connection|pipeline|line|board|department|authority) | |
| |no\s+water|without\s+water|water\s+cut\s+off | |
| |pipe\s+(burst|leak|leaking|broken|damage|damaged|cracked|old|rusted | |
| |not\s+repaired|open|exposed|underground\s+pipe) | |
| |broken\s+pipe|leaking\s+pipe|burst\s+pipe|old\s+pipe|rusted\s+pipe | |
| |water\s+not\s+(coming|supplied|available|received|restored) | |
| |no\s+water\s+supply|water\s+supply\s+(stopped|disrupted|cut|not\s+given) | |
| |drinking\s+water\s+(contaminated|dirty|impure|polluted|not\s+safe|unsafe | |
| |not\s+potable|not\s+clean|issue|problem|shortage) | |
| |tap\s+water\s+(dirty|smells?|contaminated|yellow|brown|black|unsafe) | |
| |water\s+(smells?|stinks?|colour|colored|discoloured|discolored | |
| |yellow|brown|black|muddy|murky|turbid|unclean|unsafe) | |
| |contaminated\s+water|dirty\s+water|impure\s+water|polluted\s+water | |
| |muddy\s+water|murky\s+water|foul\s+water|unclean\s+water | |
| |unsafe\s+water|undrinkable\s+water|unfit\s+for\s+drinking | |
| |waterborne\s+(disease|illness|infection)|water.?borne | |
| |fell\s+ill\s+(after|due\s+to|because\s+of|from)\s+(drinking|water|tap) | |
| |fallen\s+ill\s+(after|due\s+to|because\s+of|from)\s+(drinking|water|tap) | |
| |got\s+sick\s+(after|due\s+to|because\s+of|from)\s+(drinking|water|tap) | |
| |sick\s+(after|due\s+to|because\s+of)\s+(drinking|contaminated|dirty)\s+water | |
| |ill\s+(after\s+drinking|due\s+to\s+water|from\s+water|because\s+of\s+water) | |
| |vomiting\s+(after|due\s+to|from)\s+(drinking|water) | |
| |diarrhea\s+(due\s+to|from|caused\s+by)\s+water | |
| |stomach\s+(pain|ache|issue|problem)\s+(after\s+drinking|due\s+to|from)\s+water | |
| |disease\s+(due\s+to|from|caused\s+by|spread\s+by)\s+(water|contaminated) | |
| |illness\s+(due\s+to|from|caused\s+by)\s+(water|contaminated|dirty) | |
| |people\s+(fell|fallen|got|are\s+getting)\s+ill\s+(due\s+to|from|after|because\s+of) | |
| |residents\s+(fell|fallen|got|are\s+getting)\s+ill | |
| |children\s+(fell|fallen|got|are\s+getting)\s+ill | |
| |families\s+(fell|fallen|got|are\s+getting)\s+ill | |
| |water\s+not\s+fit\s+for\s+drinking|water\s+not\s+safe\s+to\s+drink | |
| |borewell\s+(contaminated|dirty|water|issue|problem|not\s+working) | |
| |overhead\s+tank\s+(dirty|contaminated|overflow|not\s+cleaned|cracked) | |
| |sump\s+(dirty|contaminated|overflow|cracked|not\s+cleaned) | |
| |tanker\s+(water|not\s+coming|not\s+sent|delayed|supply) | |
| |water\s+tanker\s+(not|delayed|irregular) | |
| |municipal\s+water\s+(supply|contaminated|dirty|problem|issue) | |
| |metro\s+water\s+(supply|issue|contaminated) | |
| |stagnant\s+water|standing\s+water|water\s+accumulation|water\s+stagnation | |
| |waterlog(ging)?|flooded?\s+(road|street|area|locality|colony|lane) | |
| |waterlogged\s+(road|street|area) | |
| |water\s+on\s+the\s+(road|street|lane|path|area|ground|footpath))\b | |
| # ══════════════════════════════════════════════════════ | |
| # ELECTRICITY — easy: no power / medium: streetlight | |
| # not working / hard: wire on road causing risk, | |
| # transformer spark, electric shock risk | |
| # | |
| # FIX v2: Added "current gone/is gone", "power gone", | |
| # "lights" plural, "current is (gone|cut|off|failed)" | |
| # ══════════════════════════════════════════════════════ | |
| | \b(power\s+(cut|outage|failure|gone|not\s+restored|supply|shortage | |
| |fluctuation|surge|problem|issue|connection|line|grid) | |
| |electricity\s+(cut|outage|issue|failure|problem|gone|not\s+restored | |
| |fluctuation|supply|shortage|connection|bill|board|department) | |
| |no\s+(electricity|power|current|supply|light) | |
| |current\s+(gone|cut|off|not\s+coming|not\s+there|failed|out|tripped) | |
| |current\s+is\s+(gone|cut|off|not\s+coming|not\s+there|failed|out|tripped) | |
| |power\s+(gone|tripped|out|off|not\s+back|not\s+yet\s+restored) | |
| |light\s+(gone|not\s+working|not\s+there|out|off|missing|broken|damaged) | |
| |lights\s+(gone|not\s+working|not\s+there|out|off|missing|broken|damaged) | |
| |streetlight\s+(broken|not\s+working|damaged|out|dark|missing|gone|off | |
| |flickering|dim|no\s+light) | |
| |broken\s+streetlight|dark\s+street|street\s+(dark|in\s+darkness|no\s+light) | |
| |street\s+light\s+(not\s+working|broken|gone|off|missing|out) | |
| |no\s+street\s+light|no\s+streetlight | |
| |electric\s+(wire|pole|shock|spark|short\s+circuit|box|meter|connection | |
| |supply|board|department|fault|hazard|risk|danger) | |
| |live\s+wire|dangling\s+wire|fallen\s+wire|loose\s+wire|naked\s+wire | |
| |exposed\s+(wire|cable|electric)|wire\s+(on\s+road|hanging|low|dangerous | |
| |sparking|broken|snapped|fallen) | |
| |electric\s+pole\s+(fallen|broken|leaning|damaged|cracked|tilted) | |
| |fallen\s+(pole|wire|electric\s+pole|electric\s+wire|cable|tower) | |
| |transformer\s+(damaged|broken|leaking|fire|sparking|exploded|blast|issue|problem) | |
| |electric\s+(shock|fire|spark)\s+(risk|hazard|danger|incident|reported|near) | |
| |risk\s+of\s+electric\s+shock|electric\s+shock\s+risk | |
| |shock\s+(from|due\s+to|because\s+of)\s+(wire|pole|electric|current) | |
| |current\s+(leakage|leak|spark|shock|passing|through) | |
| |power\s+not\s+restored|no\s+power\s+(for|since)\s+\d+ | |
| |electricity\s+not\s+(restored|coming|available) | |
| |voltage\s+(fluctuation|low|high|problem|issue|drop|surge) | |
| |meter\s+(box|board)\s+(open|broken|damaged|missing|tampered) | |
| |substation\s+(damaged|fire|issue|problem|fault) | |
| |cable\s+(underground|overhead|broken|damaged|exposed|cut) | |
| |junction\s+box\s+(open|broken|damaged|missing) | |
| |area\s+(in\s+darkness|no\s+light|no\s+electricity|no\s+power|dark) | |
| |darkness\s+(in|at|near|around)\s+(colony|area|street|road|ward|locality))\b | |
| # ══════════════════════════════════════════════════════ | |
| # GARBAGE / WASTE — easy: garbage on road / | |
| # medium: bin overflowing / hard: burning waste | |
| # causing health issues, illegal dump attracting pests | |
| # ══════════════════════════════════════════════════════ | |
| | \b(garbage\s+(pile|dump|not\s+collected|overflowing|bin|heap|on\s+road | |
| |on\s+street|burning|fire|disposal|collection|vehicle | |
| |truck|not\s+picked|not\s+cleared|accumulation|issue|problem | |
| |near\s+house|near\s+school|near\s+hospital|smell|stench) | |
| |waste\s+(dump|pile|heap|disposal|burning|collection|not\s+collected | |
| |illegal|dumped|accumulation|management|issue|problem|on\s+road) | |
| |trash\s+(pile|dump|on\s+road|on\s+street|collection|not\s+collected) | |
| |litter\s+(on\s+road|on\s+street|pile|problem|issue|everywhere) | |
| |garbage|litter|debris|rubbish | |
| |overflowing\s+(garbage|bin|dustbin|dumpster|container) | |
| |garbage\s+bin\s+(broken|missing|overflowing|full|not\s+emptied) | |
| |uncollected\s+(garbage|waste|trash|litter) | |
| |garbage\s+not\s+(collected|cleared|picked\s+up|removed) | |
| |waste\s+not\s+(collected|cleared|picked\s+up|removed) | |
| |no\s+garbage\s+collection|no\s+waste\s+collection | |
| |burning\s+(garbage|waste|plastic|tyres?|rubber|trash|litter) | |
| |garbage\s+(fire|burning)|waste\s+(fire|burning) | |
| |illegal\s+(dump|dumping|waste\s+dump|garbage\s+dump) | |
| |open\s+(garbage|dump|waste|dumping) | |
| |rotting\s+(waste|garbage|food|material)|decomposing\s+(waste|garbage) | |
| |foul\s+smell\s+(from|due\s+to)\s+(garbage|waste|dump|bin) | |
| |smell\s+(from|due\s+to|because\s+of)\s+(garbage|waste|dump|rotting) | |
| |stench\s+(from|due\s+to|because\s+of)\s+(garbage|waste|dump) | |
| |mosquito\s+(breeding|menace|problem|due\s+to|from|in)\s*(garbage|waste|dump|stagnant)? | |
| |rats?\s+(due\s+to|from|because\s+of)\s+(garbage|waste|dump) | |
| |pests?\s+(due\s+to|from|because\s+of)\s+(garbage|waste|dump) | |
| |flies\s+(due\s+to|from|because\s+of)\s+(garbage|waste|dump) | |
| |disease\s+(due\s+to|from|caused\s+by|spread\s+by)\s+(garbage|waste|dump) | |
| |illness\s+(due\s+to|from|caused\s+by)\s+(garbage|waste|dump) | |
| |health\s+(hazard|risk|issue|problem)\s+(due\s+to|from|caused\s+by)\s+(garbage|waste) | |
| |construction\s+(debris|waste|material)\s+(dumped|blocking|on\s+road) | |
| |animal\s+(carcass|body|dead)\s+(dumped|on\s+road|on\s+street|lying) | |
| |dead\s+animal\s+(dumped|on\s+road|lying|not\s+removed) | |
| |carcass\s+(on\s+road|on\s+street|not\s+removed|lying|dumped))\b | |
| # ══════════════════════════════════════════════════════ | |
| # DRAINAGE / SEWAGE / SANITATION — easy: drain blocked / | |
| # medium: sewage overflow / hard: disease from open | |
| # drain, sewage entering homes, manhole accident | |
| # | |
| # FIX v2: Added "drain is (blocked|choked|clogged)", | |
| # "drainage is (blocked|choked)", "is" connector | |
| # tolerance throughout drain/sewage patterns | |
| # ══════════════════════════════════════════════════════ | |
| | \b(drain\s+(is\s+)?(blocked|overflow|overflowing|choked|clogged|not\s+cleaned | |
| |broken|damaged|open|uncovered|full|flooding|stagnant | |
| |issue|problem|bad\s+smell|stench|foul\s+smell) | |
| |blocked\s+drain|open\s+drain|choked\s+drain|clogged\s+drain | |
| |drainage\s+(is\s+)?(blocked|overflow|issue|problem|bad|not\s+working | |
| |not\s+cleaned|choked|clogged|system|line|pipe) | |
| |drainage\s+not\s+(working|cleaned|cleared|maintained) | |
| |drain\s+(has\s+)?(bad\s+smell|stench|foul\s+smell|overflowed|collapsed) | |
| |sewage\s+(overflow|overflowing|leak|leaking|on\s+road|on\s+street | |
| |in\s+house|flooding|not\s+cleaned|smell|stench|issue|problem | |
| |entering|flowing\s+on|running\s+on|water|pipe|line) | |
| |sewage\s+water\s+(on\s+road|in\s+colony|in\s+area|entering|flowing) | |
| |sewer\s+(blocked|overflow|issue|problem|line|pipe|not\s+working) | |
| |manhole\s+(open|uncovered|broken|missing|cover\s+missing|no\s+cover | |
| |damaged|dangerous|accident|fell|fallen\s+into) | |
| |open\s+manhole|missing\s+manhole|manhole\s+cover\s+missing | |
| |fell\s+(into|in)\s+(manhole|open\s+drain|drain|sewer) | |
| |fallen\s+(into|in)\s+(manhole|open\s+drain|drain|sewer) | |
| |child\s+(fell|fallen)\s+(into|in)\s+(manhole|drain|open\s+drain) | |
| |accident\s+(due\s+to|caused\s+by|because\s+of)\s+(manhole|open\s+drain|drain) | |
| |public\s+toilet\s+(dirty|broken|not\s+working|blocked|overflow|no\s+water | |
| |locked|missing|damaged|not\s+maintained|stench) | |
| |toilet\s+(blocked|overflow|not\s+working|broken|dirty|no\s+water) | |
| |no\s+public\s+toilet|public\s+toilet\s+not\s+there | |
| |open\s+defecation|urinating\s+in\s+public|defecating\s+in\s+public | |
| |sewage\s+pit\s+(open|uncovered|broken|overflowing) | |
| |septic\s+tank\s+(overflow|full|broken|damaged|issue|problem) | |
| |black\s+water\s+(on\s+road|flooding|overflow|running) | |
| |foul\s+smell\s+(from|due\s+to)\s+(drain|sewage|sewer|toilet|manhole) | |
| |stench\s+(from|due\s+to)\s+(drain|sewage|sewer|toilet|manhole) | |
| |smell\s+(from|due\s+to|because\s+of)\s+(drain|sewage|sewer|gutter) | |
| |disease\s+(from|due\s+to|caused\s+by|spread\s+by)\s+(sewage|drain|open\s+drain) | |
| |illness\s+(from|due\s+to|caused\s+by)\s+(sewage|drain|open\s+drain) | |
| |mosquito\s+(breeding|from|due\s+to)\s+(drain|sewage|stagnant) | |
| |overflowing\s+(drain|sewage|sewer|manhole) | |
| |sewage\s+entering\s+(house|home|building|ground\s+floor|colony|area) | |
| |gutter\s+(blocked|overflow|stench|broken|open|issue))\b | |
| # ══════════════════════════════════════════════════════ | |
| # STRAY ANIMALS — easy: stray dogs on road / medium: | |
| # dogs biting people / hard: child bitten, accident | |
| # due to cattle, people afraid to go out | |
| # ══════════════════════════════════════════════════════ | |
| | \b(stray\s+(dog|dogs|cat|cats|animal|animals|cattle|cow|cows|bull|buffalo | |
| |pig|pigs|horse|goat|goats) | |
| |stray\s+dogs?\s+(on\s+road|in\s+area|in\s+colony|menace|problem|issue | |
| |biting|attacking|chasing|aggressive|ferocious|pack) | |
| |dogs?\s+(biting|bitten|attacked|attacking|chasing|bit|menace|nuisance | |
| |aggressive|ferocious|dangerous|terrorizing|threat) | |
| |cattle\s+(blocking|on\s+road|on\s+highway|menace|issue|problem | |
| |dangerous|accident|causing\s+accident) | |
| |cow\s+(on\s+road|blocking|dangerous|menace|attack|attacked) | |
| |bull\s+(on\s+road|dangerous|menace|attack|attacked|charging) | |
| |buffalo\s+(on\s+road|dangerous|menace|attack|attacked) | |
| |animal\s+(attack|attacked|biting|menace|carcass|dead|body|on\s+road) | |
| |bitten\s+by\s+(dog|stray|animal|cow|bull)|dog\s+bite|animal\s+bite | |
| |dog\s+(attacked|bit|chased|chasing|biting)\s+(me|child|children|person|people|resident|woman|man) | |
| |child\s+(bitten|attacked|chased)\s+by\s+(dog|stray|animal|cow|bull) | |
| |children\s+(bitten|attacked|chased|scared|afraid)\s+(by|of)\s+(dog|stray|animal) | |
| |people\s+(bitten|attacked|chased|scared|afraid)\s+(by|of)\s+(dog|stray|animal) | |
| |residents\s+(bitten|attacked|chased|scared|afraid)\s+(by|of)\s+(dog|stray|animal) | |
| |afraid\s+(to\s+(go\s+out|walk|come\s+out|step\s+out))\s+(due\s+to|because\s+of)\s+(dog|stray|animal) | |
| |scared\s+(to\s+(go\s+out|walk|come\s+out|step\s+out))\s+(due\s+to|because\s+of)\s+(dog|stray|animal) | |
| |cannot\s+(go\s+out|walk|step\s+out|come\s+out)\s+(due\s+to|because\s+of)\s+(dog|stray|animal) | |
| |accident\s+(due\s+to|caused\s+by|because\s+of)\s+(cattle|cow|stray|animal|dog) | |
| |vehicle\s+(hit|hits|collided|accident)\s+(cattle|cow|stray|animal|dog) | |
| |dead\s+animal|animal\s+carcass|carcass\s+on\s+road | |
| |dog\s+population\s+(increase|out\s+of\s+control|uncontrolled) | |
| |pack\s+of\s+(stray\s+)?dogs)\b | |
| # ══════════════════════════════════════════════════════ | |
| # POLLUTION — easy: smoke / medium: burning garbage | |
| # smoke / hard: factory emissions causing disease, | |
| # polluted water body, chemical smell | |
| # ══════════════════════════════════════════════════════ | |
| | \b(pollution|polluted|contamination|contaminated | |
| |smoke\s+(from|due\s+to|because\s+of|coming\s+from)? | |
| |black\s+smoke|thick\s+smoke|foul\s+smell|toxic\s+smell|chemical\s+smell | |
| |burning\s+(garbage|waste|plastic|tyres?|rubber|crop|biomass|trash|wood) | |
| |garbage\s+(fire|burning)|waste\s+(fire|burning) | |
| |air\s+(pollution|quality|bad|polluted|hazardous|toxic|smog|smoke) | |
| |factory\s+(smoke|emission|discharge|waste|effluent|chemical|fumes|noise|pollution) | |
| |industry\s+(smoke|emission|discharge|waste|effluent|chemical|fumes|pollution) | |
| |industrial\s+(waste|pollution|discharge|effluent|emission|smoke) | |
| |chimney\s+(smoke|emission|fumes|black\s+smoke) | |
| |exhaust\s+(fumes?|smoke|emission|smell) | |
| |vehicle\s+(pollution|smoke|emission|exhaust) | |
| |dust\s+(pollution|cloud|storm|on\s+road|due\s+to|from\s+construction) | |
| |construction\s+(dust|noise|pollution|debris) | |
| |noise\s+(pollution|from\s+factory|from\s+vehicle|from\s+construction|disturbing|nuisance) | |
| |chemical\s+(spill|discharge|waste|smell|fumes?|dumped|effluent) | |
| |oil\s+spill|chemical\s+spill|toxic\s+(waste|discharge|spill|fumes?|smell) | |
| |polluted\s+(river|lake|pond|water\s+body|canal|nala|stream|groundwater) | |
| |river\s+(garbage|waste|polluted|dirty|sewage|discharge) | |
| |lake\s+(garbage|waste|polluted|dirty|sewage|discharge) | |
| |sewage\s+(discharge|released\s+into|dumped\s+into)\s+(river|lake|pond|water) | |
| |stench\s+(from|due\s+to|because\s+of|coming\s+from) | |
| |foul\s+(smell|odour|odor)\s+(from|due\s+to|because\s+of|in\s+area) | |
| |smell\s+(unbearable|intolerable|very\s+bad|horrible|foul|terrible) | |
| |breathing\s+(problem|issue|difficulty)\s+(due\s+to|from|caused\s+by)\s+(smoke|pollution|dust|fumes) | |
| |respiratory\s+(problem|issue|disease)\s+(due\s+to|from|caused\s+by)\s+(smoke|pollution|dust) | |
| |health\s+(issue|problem|hazard)\s+(due\s+to|from|caused\s+by)\s+(pollution|smoke|fumes|dust) | |
| |eyes?\s+(burning|irritation|watering)\s+(due\s+to|from|because\s+of)\s+(smoke|pollution|fumes|dust))\b | |
| # ══════════════════════════════════════════════════════ | |
| # PUBLIC TRANSPORT / INFRASTRUCTURE — easy: bus stop | |
| # broken / medium: no bus service / hard: accident | |
| # at bus stop, overcrowding, no shelter | |
| # ══════════════════════════════════════════════════════ | |
| | \b(bus\s+(stop|stand|shelter|route|service|not\s+running|not\s+coming | |
| broken|damaged|missing|no\s+shelter|encroached|dirty) | |
| |bus\s+stop\s+(broken|damaged|missing|encroached|no\s+shelter|no\s+bench) | |
| |auto\s+(stand|encroachment|blocking|menace|issue|problem) | |
| |no\s+bus\s+(service|route|coming|running|available) | |
| |bus\s+(not\s+running|not\s+coming|not\s+available|cancelled|delayed) | |
| |public\s+(transport|vehicle|bus|transit)\s+(issue|problem|not\s+running|poor) | |
| |transport\s+(problem|issue|no\s+service|poor\s+service) | |
| |overcrowding\s+(in|at|on)\s+(bus|bus\s+stop|transport) | |
| |road\s+accident\s+(at|near|due\s+to)|vehicle\s+accident|accident\s+on\s+road | |
| |accident\s+scene|accident\s+spot\s+(not\s+cleared|blocked|dangerous) | |
| |accident\s+blackspot|accident\s+prone\s+(area|spot|zone) | |
| |traffic\s+(jam|congestion|signal\s+not\s+working|signal\s+broken | |
| |light\s+not\s+working|police|issue|problem|blocked|chaos) | |
| |traffic\s+signal\s+(broken|not\s+working|missing|damaged|off) | |
| |signal\s+(not\s+working|broken|missing|damaged|off|malfunctioning) | |
| |no\s+traffic\s+signal|traffic\s+light\s+(not\s+working|broken|missing) | |
| |encroachment\s+(on\s+road|on\s+footpath|on\s+pavement|blocking) | |
| |road\s+encroachment|footpath\s+encroachment|pavement\s+encroachment | |
| |illegal\s+(parking|encroachment|construction|shop|vendor)\s+(blocking|on\s+road|on\s+footpath) | |
| |parking\s+(problem|issue|blocking|road|footpath|encroachment) | |
| |no\s+parking\s+space|vehicles\s+parked\s+(on\s+road|blocking|footpath))\b | |
| # ══════════════════════════════════════════════════════ | |
| # BUILDING / STRUCTURAL — easy: wall collapsed / medium: | |
| # building unsafe / hard: structural collapse risk, | |
| # encroachment on public land, illegal construction | |
| # ══════════════════════════════════════════════════════ | |
| | \b(building\s+(collapse|collapsed|dangerous|unsafe|crumbling|dilapidated | |
| |crack|cracked|illegal|encroachment|debris|rubble) | |
| |wall\s+(collapse|collapsed|fallen|crumbling|cracked|broken|dangerous | |
| |damaged|leaning|about\s+to\s+fall) | |
| |structure\s+(collapse|collapsed|unsafe|dangerous|crumbling|illegal) | |
| |illegal\s+(construction|building|structure|encroachment) | |
| |encroachment\s+(on|of)\s+(public|government|road|footpath|park|land) | |
| |construction\s+(debris|rubble|material|blocking\s+road|on\s+road|hazard) | |
| |demolition\s+(debris|rubble|blocking|on\s+road|not\s+cleared) | |
| |asbestos\s+(roof|sheet|material)\s+(broken|damaged|dangerous|hazard) | |
| |collapsed\s+(wall|building|structure|roof|ceiling) | |
| |roof\s+(collapse|fallen|dangerous|leaking|cracked) | |
| |ceiling\s+(collapse|fallen|dangerous|cracked|leaking) | |
| |slab\s+(crack|broken|fallen|dangerous) | |
| |pillar\s+(crack|broken|leaning|dangerous))\b | |
| # ══════════════════════════════════════════════════════ | |
| # GENERAL HARM / INJURY — catches difficult phrasings | |
| # where the civic issue is implied through harm: | |
| # "injured", "accident", "children cannot go to school" | |
| # ══════════════════════════════════════════════════════ | |
| | \b(injured|injury|injuries|got\s+injured|people\s+injured|someone\s+injured | |
| |serious\s+injury|minor\s+injury|accident|accidents | |
| |fell\s+down|slipped\s+and\s+fell|tripped\s+and\s+fell | |
| |fell\s+(due\s+to|because\s+of|into|in) | |
| |children\s+(cannot|can.t|unable\s+to)\s+(go\s+to\s+school|walk|play\s+outside) | |
| |residents\s+(unable|cannot|can.t)\s+(sleep|walk|go\s+out|step\s+out) | |
| |people\s+(unable|cannot|can.t)\s+(walk|cross|use\s+road|go\s+out) | |
| |hazard(ous)?|danger(ous)?|risk(y)?|unsafe|life\s+(risk|threatening|danger) | |
| |life\s+(at\s+)?risk|risk\s+to\s+life|threat\s+to\s+life | |
| |major\s+(accident|risk|hazard|danger|issue|problem) | |
| |emergency\s+(situation|condition|issue)|civic\s+(issue|problem|complaint|grievance))\b | |
| # ══════════════════════════════════════════════════════ | |
| # INDIC SCRIPTS — Telugu & Hindi civic keywords | |
| # ══════════════════════════════════════════════════════ | |
| | (\u0c38\u0c2e\u0c38\u0c4d\u0c2f|\u0c2b\u0c3f\u0c30\u0c4d\u0c2f\u0c3e\u0c26\u0c41 | |
| |\u0c30\u0c4b\u0c21\u0c4d\u0c21\u0c41\s+\u0c2a\u0c3e\u0c21\u0c48\u0c02\u0c26\u0c3f | |
| |\u0c28\u0c40\u0c33\u0c4d\u0c33\u0c41\s+\u0c30\u0c3e\u0c35\u0c21\u0c02\s+\u0c32\u0c47\u0c26\u0c41 | |
| |\u0c1a\u0c46\u0c24\u0c4d\u0c24|\u0c2e\u0c41\u0c30\u0c41\u0c17\u0c41|\u0c35\u0c3f\u0c26\u0c4d\u0c2f\u0c41\u0c24\u0c4d | |
| |\u0c15\u0c30\u0c46\u0c02\u0c1f\u0c4d|\u0c28\u0c40\u0c33\u0c4d\u0c33\u0c41|\u0c26\u0c41\u0c2e\u0c4d\u0c2e\u0c41 | |
| |\u0c2e\u0c41\u0c30\u0c41\u0c17\u0c41\u0c28\u0c40\u0c33\u0c41|\u0c38\u0c4d\u0c1f\u0c4d\u0c30\u0c40\u0c1f\u0c4d\s+\u0c32\u0c48\u0c1f\u0c4d) | |
| | (\u0938\u092e\u0938\u094d\u092f\u093e|\u0936\u093f\u0915\u093e\u092f\u0924 | |
| |\u0938\u095c\u0915\s+\u0916\u0930\u093e\u092c|\u092a\u093e\u0928\u0940\s+\u0928\u0939\u0940\u0902 | |
| |\u0915\u091a\u0930\u093e|\u0928\u093e\u0932\u0940|\u092c\u093f\u091c\u0932\u0940 | |
| |\u0915\u0930\u0947\u0902\u091f|\u092a\u094d\u0930\u0926\u0942\u0937\u0923 | |
| |\u092c\u093f\u091c\u0932\u0940\s+\u0928\u0939\u0940\u0902|\u0938\u0921\u093c\u0915\s+\u0916\u0930\u093e\u092c) | |
| # ══════════════════════════════════════════════════════ | |
| # STANDALONE HIGH-SIGNAL NOUNS (easy level) | |
| # ══════════════════════════════════════════════════════ | |
| | \b(pothole|streetlight|sewage|waterlogging|footpath|manhole|drainage | |
| |encroachment|carcass|borewell|transformer|substation|tanker)\b | |
| # ══════════════════════════════════════════════════════ | |
| # ROAD QUALITY DESCRIPTORS (medium level) | |
| # ══════════════════════════════════════════════════════ | |
| | \b(road|street|highway|lane)\s+(is\s+)?(very\s+)?(bad|damaged|broken|cracked | |
| |horrible|terrible|pathetic|worst|hazardous|dangerous|unsafe|rough|uneven | |
| |full\s+of\s+potholes|in\s+bad\s+condition|in\s+poor\s+condition | |
| |not\s+repaired|not\s+fixed|needs\s+repair|needs\s+fixing)\b | |
| """, | |
| re.VERBOSE | re.IGNORECASE, | |
| ) | |
| # ── Signal 2: Complaint intent ──────────────────────────────────────────────── | |
| _COMPLAINT_INTENT = re.compile( | |
| r""" | |
| # ── Not working / not resolved ─────────────────────── | |
| \b(not\s+(working|repaired|fixed|resolved|cleared|collected|done | |
| |responded|addressed|functioning|restored|completed|processed | |
| |cleaned|removed|attended|responding|maintained|inspected | |
| |replaced|upgraded|available|supplied|coming|received | |
| |safe|fit|potable|drinkable|usable)) | |
| | \b(no\s+(action|response|resolution|update|repair|water|electricity | |
| |signal|network|power|supply|gas|light|service|maintenance)) | |
| | \b(still\s+(not|pending|waiting|broken|blocked|overflowing|unresolved | |
| |same|continuing|happening|there|going\s+on)) | |
| | \b(yet\s+to\s+be|never\s+(fixed|repaired|addressed|cleared|resolved | |
| |cleaned|collected|attended|responded)) | |
| # ── Requests / calls to action (easy intent) ───────── | |
| | \b(please\s+(fix|repair|clean|clear|remove|take|help|address|look|attend | |
| |send|deploy|do|act|check|inspect|replace|resolve|respond | |
| |come|visit|arrange|ensure|provide|install|restore|maintain) | |
| |kindly\s+(fix|repair|clean|remove|address|look|attend|send|do|act | |
| |check|inspect|resolve|respond|come|visit|arrange|ensure) | |
| |request\s+(you\s+to|for\s+(immediate|urgent|early|early\s+action)) | |
| |need\s+(immediate|urgent|your|early)\s+(action|help|attention|response|repair) | |
| |take\s+(action|immediate|urgent|necessary|early|swift|quick)(\s+action)? | |
| |must\s+(fix|repair|address|resolve|clear|clean|replace|restore|remove) | |
| |should\s+(fix|repair|address|resolve|clear|clean|replace|restore|remove) | |
| |need\s+to\s+be\s+(fixed|repaired|addressed|resolved|cleared|cleaned | |
| |replaced|restored|removed|attended|inspected) | |
| |do\s+something|do\s+the\s+needful|take\s+notice|look\s+into\s+(this|the\s+matter) | |
| |bring\s+to\s+your\s+(notice|attention)|draw\s+your\s+attention | |
| |requesting\s+(you|the\s+authorities|officials)\s+to | |
| |hope\s+(you\s+will|authorities\s+will|officials\s+will)\s+(act|fix|resolve|address) | |
| |i\s+request|we\s+request|citizens\s+request|residents\s+request)\b | |
| # ── Duration / persistence (medium intent) ─────────── | |
| | \b(since\s+(yesterday|last\s+\w+|\d+\s+(days?|weeks?|months?|years?|hours?) | |
| |morning|night|long\s+time|ages|many\s+(days?|weeks?|months?) | |
| |a\s+(long|very\s+long)\s+time|weeks?|months?|days?|years?|long|ages) | |
| |for\s+(the\s+past\s+)?(\d+\s+)?(days?|weeks?|months?|hours?|years? | |
| |long\s+time|ages|a\s+long\s+time|many\s+(days?|weeks?|months?)) | |
| |for\s+(weeks?|months?|days?|years?|long|ages|a\s+very\s+long\s+time) | |
| |days?\s+ago|weeks?\s+ago|months?\s+ago|years?\s+ago | |
| |\d+\s+(days?|weeks?|months?)\s+(now|already|back|since|passed|gone|over) | |
| |since\s+long|since\s+ages|from\s+past\s+\d+|for\s+long|for\s+ages | |
| |repeatedly|again\s+and\s+again|multiple\s+times|several\s+times|many\s+times | |
| |keeps?\s+(happening|recurring|coming\s+back|repeating) | |
| |ongoing\s+(issue|problem)|persistent\s+(issue|problem|complaint) | |
| |chronic\s+(issue|problem)|long.?standing\s+(issue|problem) | |
| |months?\s+have\s+(passed|gone)|years?\s+have\s+(passed|gone) | |
| |no\s+(improvement|change|action|repair)\s+(for|since|in)\s+\d+ | |
| |not\s+repaired\s+(for|since|in)\s+(the\s+past\s+)?\d+)\b | |
| # ── Harm / impact / consequence (hard intent) ──────── | |
| | \b(affecting|causing\s+(problem|issue|disease|accident|harm|damage|illness | |
| |inconvenience|difficulty|hardship|suffering|injury) | |
| |hazard(ous)?|danger(ous)?|risk(y)?|unsafe | |
| |accident(s)?|injur(y|ies|ed|ing)|hurt | |
| |attack(ed|ing)|attacked|bitten|bit\s+(by|a\s+dog) | |
| |people\s+are\s+(suffering|facing|affected|unable|scared|getting\s+hurt | |
| |being\s+bitten|being\s+attacked|in\s+danger|at\s+risk | |
| |falling\s+ill|getting\s+sick|getting\s+infected) | |
| |residents\s+are\s+(suffering|facing|affected|struggling|scared | |
| |getting\s+hurt|being\s+bitten|falling\s+ill|getting\s+sick | |
| |unable\s+to|cannot) | |
| |children\s+are\s+(being\s+attacked|being\s+bitten|scared|afraid | |
| |falling\s+ill|getting\s+sick|unable\s+to\s+go) | |
| |public\s+(facing|suffering|inconvenienced|at\s+risk|in\s+danger) | |
| |inconvenien(ce|cing)|discomfort|hardship|problem\s+for | |
| |causing\s+inconvenience|creating\s+(problem|issue|hazard|risk) | |
| |life\s+(at\s+risk|threatening|in\s+danger)|risk\s+to\s+life | |
| |health\s+(risk|hazard|danger|issue|problem) | |
| |fell\s+ill|fallen\s+ill|got\s+sick|getting\s+sick|falling\s+ill | |
| |vomiting|diarrhea|stomach\s+(pain|ache|issue)|fever\s+(due\s+to|from|after) | |
| |infection\s+(due\s+to|from|caused\s+by|spread\s+by) | |
| |disease\s+(spreading|spread|due\s+to|from|caused\s+by) | |
| |outbreak\s+(of|due\s+to)|epidemic\s+(due\s+to|from) | |
| |vehicle\s+(damaged|got\s+damaged|broke\s+down|tyres?)\s+(due\s+to|because\s+of) | |
| |tyre\s+(burst|puncture|flat)\s+(due\s+to|because\s+of) | |
| |two.?wheeler\s+(fell|slipped|skidded|accident)\s+(due\s+to|because\s+of))\b | |
| | \b(biting|attacking|chasing|menace|nuisance|threat(ening)?|aggressive|ferocious)\b | |
| # ── Formal complaint / escalation ──────────────────── | |
| | \b(complain(t|ing|ed)?|grievance|report(ing|ed)?|escalat(e|ing|ed) | |
| |filing|lodge[d]?|registered|raising\s+(a\s+)?(complaint|issue|grievance) | |
| |bringing\s+to\s+(notice|attention)|lodging\s+(a\s+)?(complaint|grievance) | |
| |submitting\s+(a\s+)?(complaint|grievance) | |
| |reporting\s+(this|the\s+issue|the\s+problem|the\s+matter) | |
| |writing\s+(to\s+)?(complain|regarding|about|to\s+report) | |
| |informing\s+(you|the\s+authorities|officials)\s+(about|of|regarding) | |
| |notifying\s+(you|the\s+authorities)\s+(about|of|regarding))\b | |
| # ── Urgency / priority ──────────────────────────────── | |
| | \b(urgent(ly)?|immediately|emergency|as\s+soon\s+as\s+possible|asap | |
| |high\s+priority|critical|serious(ly)?|without\s+delay|at\s+the\s+earliest | |
| |earliest\s+possible|top\s+priority|very\s+important|most\s+important | |
| |cannot\s+wait|cannot\s+be\s+delayed|needs\s+immediate\s+attention)\b | |
| # ── Concession / contrast ───────────────────────────── | |
| | \b(despite|although|even\s+(though|after)|in\s+spite\s+of|inspite|notwithstanding | |
| |however|but\s+(no\s+(action|response)|still|nothing\s+done) | |
| |after\s+(many|multiple|several)\s+(complaints?|requests?|visits?) | |
| |after\s+complaining\s+(many\s+times|multiple\s+times|repeatedly) | |
| |despite\s+(complaining|reporting|raising|requesting) | |
| |no\s+(one|official|authority)\s+(came|visited|responded|acted|checked))\b | |
| # ── Rhetorical / questioning (hard intent) ──────────── | |
| | \b(why\s+(is|are|has|have|no|not|isn.t|aren.t|hasn.t|haven.t|was|were) | |
| |when\s+will|how\s+long\s+(will|has|have|does|is|are|do) | |
| |how\s+many\s+(days?|weeks?|months?|times?)\s+(will|has|have|do|does|more) | |
| |till\s+when|until\s+when|for\s+how\s+long | |
| |who\s+is\s+responsible|who\s+will\s+(fix|repair|address|resolve|act|respond) | |
| |what\s+action\s+(has|have|was|were|will)\s+(been\s+)?(taken|done) | |
| |is\s+(anyone|anybody|no\s+one|nobody)\s+(responsible|listening|taking\s+action) | |
| |are\s+(you|they|authorities)\s+(aware|listening|going\s+to\s+fix))\b | |
| # ── Terminal question mark (implied complaint) ──────── | |
| | (\?\s*$) | |
| # ── Indic complaint intent — Telugu ────────────────── | |
| | (\u0c1c\u0c35\u0c3e\u0c2c\u0c41\s*\u0c32\u0c47\u0c26\u0c41|\u0c1a\u0c30\u0c4d\u0c2f\s*\u0c32\u0c47\u0c26\u0c41 | |
| |\u0c06\u0c32\u0c38\u0c4d\u0c2f\u0c02|\u0c26\u0c2f\u0c1a\u0c47\u0c38\u0c3f|\u0c38\u0c30\u0c3f\u0c1a\u0c47\u0c2f\u0c02\u0c21\u0c3f | |
| |\u0c05\u0c2d\u0c4d\u0c2f\u0c30\u0c4d\u0c25\u0c28|\u0c1a\u0c42\u0c38\u0c41\u0c15\u0c4b\u0c02\u0c21\u0c3f | |
| |\u0c35\u0c46\u0c02\u0c1f\u0c28\u0c47|\u0c24\u0c15\u0c4d\u0c37\u0c23\u0c02|\u0c38\u0c2e\u0c38\u0c4d\u0c2f\u0c32\u0c41) | |
| # ── Indic complaint intent — Hindi ─────────────────── | |
| | (\u091c\u0935\u093e\u092c\s*\u0928\u0939\u0940\u0902|\u0915\u093e\u0930\u094d\u0930\u0935\u093e\u0908\s*\u0928\u0939\u0940\u0902 | |
| |\u0926\u0947\u0930\u0940|\u0915\u0943\u092a\u092f\u093e|\u0924\u0941\u0930\u0902\u0924|\u0920\u0940\u0915\s*\u0915\u0930\u0947\u0902 | |
| |\u0928\u093f\u0935\u0947\u0926\u0928|\u0927\u094d\u092f\u093e\u0928\s*\u0926\u0947\u0902|\u0905\u0928\u0941\u0930\u094b\u0927 | |
| |\u0936\u093f\u0915\u093e\u092f\u0924|\u0938\u092e\u0938\u094d\u092f\u093e\s*\u0939\u0948) | |
| """, | |
| re.VERBOSE | re.IGNORECASE, | |
| ) | |
| # ── Special case: animal harm without explicit civic noun ───────────────────── | |
| _ANIMAL_HARM_PATTERN = re.compile( | |
| r""" | |
| \b(dogs?|cats?|cattle|cow|bull|buffalo|animal|stray)\b | |
| .{0,80} | |
| \b(biting|bitten|attacked|attacking|chasing|bit|injured|hurt|menace|nuisance | |
| |aggressive|ferocious|dangerous|terrorizing|chased)\b | |
| .{0,80} | |
| \b(people|residents|children|child|person|public|woman|man|commuters? | |
| |pedestrians?|passers?-?by|student|students|elderly|senior)\b | |
| | | |
| \b(people|residents|children|child|person|public|student|students)\b | |
| .{0,80} | |
| \b(bitten|attacked|chased|injured|hurt|scared|harassed|afraid|terrorized)\b | |
| .{0,80} | |
| \b(dogs?|cats?|cattle|cow|bull|buffalo|animal|stray)\b | |
| | | |
| \b(dog\s+bite|animal\s+bite|bitten\s+by\s+(a\s+)?(dog|stray|animal|cow|bull) | |
| |attacked\s+by\s+(a\s+)?(dog|stray|animal|cow|bull) | |
| |chased\s+by\s+(a\s+)?(dog|stray|animal|cow|bull))\b | |
| """, | |
| re.VERBOSE | re.IGNORECASE, | |
| ) | |
| # ── Conversational / non-grievance rejection ────────────────────────────────── | |
| _NON_GRIEVANCE_PATTERNS = re.compile( | |
| r""" | |
| ^[\s]*( | |
| good\s*(morning|afternoon|evening|night|day|nite) | |
| | (hello+|hi+|hey+|howdy|greetings|namaste|namaskar|vanakkam| | |
| salam|salaam|kem\s+cho|hii+|heyy+|helloo+) | |
| | how\s+are\s+you(\s+doing)?|how\s+r\s+u|how\s+do\s+you\s+do | |
| | how\s+is\s+it\s+going|how.?s\s+(it|everything|life|things) | |
| | what.?s\s+(up|going\s+on|new|happening|cooking|the\s+matter) | |
| | what\s+are\s+you\s+doing|are\s+you\s+there|you\s+there\?? | |
| | thank\s+you(\s+so\s+much)?|thanks(\s+a\s+(lot|ton|bunch))?|thank\s+u|thx|ty | |
| | ok(ay)?\.?|sure\.?|fine\.?|yep\.?|nope\.?|yes\.?|no\.? | |
| | alright\.?|alrite\.?|hmm+\.?|huh\.?|oh\.?|ah\.?|uh\.? | |
| | bye+\.?|goodbye\.?|good\s+bye\.?|see\s+you(\s+later)? | |
| | take\s+care\.?|talk\s+(to\s+you\s+)?later\.?|ttyl|brb|gtg | |
| | test(ing)?[\s\d!.]*|[a-zA-Z]{1,2} | |
| )[\s!?.]*$ | |
| """, | |
| re.VERBOSE | re.IGNORECASE, | |
| ) | |
| _MIN_GRIEVANCE_CHECK_LEN = 8 | |
| _VALIDATION_MESSAGES = { | |
| "too_short": "Text is too short. Please provide at least 5 characters.", | |
| "junk_input": "Input contains only numbers or special characters.", | |
| "not_a_grievance": ( | |
| "Your message does not appear to be a grievance or civic complaint. " | |
| "Please describe the issue you are facing — for example: pothole on " | |
| "the road, water supply disruption, electricity outage, garbage not " | |
| "collected, stray dogs biting residents, or any other civic problem." | |
| ), | |
| } | |
| def _is_grievance(text: str) -> bool: | |
| """ | |
| Returns True if the text is a valid civic grievance. | |
| Handles three difficulty levels: | |
| EASY — direct nouns: "pothole", "no water", "garbage on road" | |
| MEDIUM — descriptive: "road is very bad", "light not working since 3 days" | |
| HARD — indirect/narrative: "fell ill after drinking tap water", | |
| "tyre burst due to road condition", "children afraid to walk | |
| to school because of stray dogs" | |
| Stage 1 — Reject ONLY when entire input is a pure greeting/filler. | |
| Stage 2a — Animal harm self-contained check (independent). | |
| Stage 2b — Civic topic presence ALONE is sufficient. | |
| """ | |
| stripped = text.strip() | |
| # Stage 1: full-string greeting/filler rejection | |
| if _NON_GRIEVANCE_PATTERNS.match(stripped): | |
| return False | |
| if len(stripped) < _MIN_GRIEVANCE_CHECK_LEN: | |
| return False | |
| # Stage 2a: animal harm self-contained | |
| if _ANIMAL_HARM_PATTERN.search(stripped): | |
| return True | |
| # Stage 2b: civic topic alone is sufficient | |
| if _CIVIC_TOPIC.search(stripped): | |
| return True | |
| return False | |
| def validate_text(text) -> tuple: | |
| """ | |
| Validate user-typed grievance text. | |
| Returns (is_valid: bool, error_code: str | None). | |
| """ | |
| if not isinstance(text, str): | |
| return False, "too_short" | |
| stripped = text.strip() | |
| if len(stripped) < 5: | |
| return False, "too_short" | |
| if _RE_JUNK.fullmatch(stripped.lower()): | |
| return False, "junk_input" | |
| if len(stripped) >= _MIN_GRIEVANCE_CHECK_LEN and not _is_grievance(stripped): | |
| return False, "not_a_grievance" | |
| return True, None | |
| def _validate_machine_text(text: str, source: str) -> tuple: | |
| """ | |
| Lightweight validation for machine-generated text. | |
| Skips grievance intent — only checks length and junk. | |
| """ | |
| if not text or len(text.strip()) < 5: | |
| code = "image_unreadable" if source == "image" else "audio_unreadable" | |
| msg = ( | |
| "Could not extract meaningful content from the image. " | |
| "Please upload a clearer photo of the civic issue." | |
| if source == "image" else | |
| "Could not transcribe audio. Please try again with a clearer recording." | |
| ) | |
| return False, {"status": "failed", "code": code, "message": msg} | |
| if _RE_JUNK.fullmatch(text.strip().lower()): | |
| return False, {"status": "failed", "code": "junk_input", | |
| "message": _VALIDATION_MESSAGES["junk_input"]} | |
| return True, None | |
| # ========================================================= | |
| # ADVANCED CIVIC IMAGE RELEVANCE SCORER | |
| # ========================================================= | |
| _CIVIC_LEXICON = { | |
| "roads": { | |
| "primary": [ | |
| "pothole", "crater", "road damage", "road broken", "road crack", | |
| "broken road", "damaged road", "road cave", "road collapse", | |
| "unpaved road", "road debris", "road blocked", "road obstruction", | |
| "road construction", "barricade", "road divider", "speed breaker", | |
| "road marking faded", "road sign missing", "footpath broken", | |
| "pavement crack", "sidewalk damage", "footpath blocked", | |
| "road waterlogging", "road pothole", "tarmac damage", | |
| "road pit", "open trench", "road excavation", | |
| ], | |
| "secondary": [ | |
| "road", "street", "highway", "lane", "path", "pavement", | |
| "footpath", "sidewalk", "tar", "asphalt", "concrete slab", | |
| "gravel", "mud road", "dirt road", "traffic", "pedestrian", | |
| "junction", "intersection", "bridge", "culvert", "divider", | |
| "median", "overpass", "underpass", "flyover", | |
| ], | |
| }, | |
| "water": { | |
| "primary": [ | |
| "water leak", "pipe burst", "pipe leakage", "water overflow", | |
| "broken pipe", "waterlogged road", "waterlogged street", | |
| "flooded road", "flooded street", "open drain overflowing", | |
| "drain overflow", "sewage overflow", "no water supply", | |
| "water supply disrupted", "water stagnation", "water contamination", | |
| "dirty water supply", "muddy water pipe", "water tank overflow", | |
| "exposed water pipe", "water seeping road", | |
| ], | |
| "secondary": [ | |
| "water", "pipe", "drain", "drainage", "sewage", "sewer", | |
| "tank", "pump", "valve", "tap", "leak", "flood", "puddle", | |
| "waterlogging", "stagnant", "overflow", "canal", "borewell", | |
| "water main", "supply line", "gutter", "nala", | |
| ], | |
| }, | |
| "electricity": { | |
| "primary": [ | |
| "broken streetlight", "streetlight not working", "dark street at night", | |
| "fallen electric pole", "fallen wire on road", "dangling wire", | |
| "exposed electric wire", "naked wire", "live wire on ground", | |
| "electric sparks", "transformer damage", "transformer fire", | |
| "electric box open", "power line down", "electric pole broken", | |
| "meter box damaged", "electric pole leaning", "snapped cable", | |
| "wire hanging low", "substation damage", | |
| ], | |
| "secondary": [ | |
| "electric", "electricity", "wire", "cable", "pole", "transformer", | |
| "streetlight", "lamp post", "street lamp", "power", "voltage", | |
| "meter", "fuse box", "tower", "pylon", "grid", "line", | |
| "conductor", "insulator", "junction box", | |
| ], | |
| }, | |
| "garbage": { | |
| "primary": [ | |
| "garbage pile", "waste pile", "trash pile", "litter pile", | |
| "garbage dump", "illegal dump site", "open garbage", | |
| "overflowing garbage bin", "uncollected garbage", "garbage on road", | |
| "waste on street", "burning garbage", "garbage fire", | |
| "plastic waste heap", "construction debris dumped illegally", | |
| "garbage bin broken", "garbage bin missing", "rotting waste", | |
| "animal carcass dumped", "household waste on road", | |
| ], | |
| "secondary": [ | |
| "garbage", "waste", "trash", "litter", "debris", "rubbish", | |
| "bin", "dustbin", "dumpster", "dump", "refuse", "plastic", | |
| "polythene", "bag", "bottle", "can", "heap", "pile", | |
| "filth", "dirty", "unclean", "stench", "decompose", | |
| ], | |
| }, | |
| "sanitation": { | |
| "primary": [ | |
| "broken public toilet", "public toilet dirty", "toilet blocked", | |
| "toilet overflow", "urinating in public", "open defecation spot", | |
| "sewage pit open", "manhole open", "manhole cover missing", | |
| "open manhole on road", "uncovered drain", "drain blocked", | |
| "drain choked", "drain overflow", "mosquito breeding site", | |
| "stagnant sewage pool", "open sewer", "foul drain", | |
| ], | |
| "secondary": [ | |
| "toilet", "latrine", "bathroom", "manhole", "sewer", "sewage", | |
| "drain", "smell", "stench", "odour", "hygiene", "sanitation", | |
| "cleanliness", "mosquito", "rat", "pest", "flies", "filth", | |
| ], | |
| }, | |
| "pollution": { | |
| "primary": [ | |
| "black smoke from chimney", "factory smoke emission", | |
| "smoke from burning garbage", "burning tyres smoke", | |
| "burning plastic smoke", "air pollution haze", | |
| "dust pollution on road", "chemical spill on road", | |
| "oil spill on road", "toxic waste dumped in water", | |
| "polluted river", "polluted lake", "river garbage", | |
| ], | |
| "secondary": [ | |
| "smoke", "smog", "dust", "pollution", "emission", "toxic", | |
| "chemical", "factory", "industry", "exhaust", "fume", | |
| "haze", "contamination", "spill", "river", "lake", | |
| "pond", "burning", "ash", "soot", | |
| ], | |
| }, | |
| "public transport": { | |
| "primary": [ | |
| "bus stop broken", "bus shelter damaged", "bus stop sign missing", | |
| "auto stand encroachment", "bus stand blocked", | |
| "broken down bus on road", "public vehicle accident", | |
| "road accident scene", "vehicle overturned on road", | |
| "bus stop bench broken", | |
| ], | |
| "secondary": [ | |
| "bus", "auto", "rickshaw", "taxi", "stop", "shelter", | |
| "stand", "route", "transport", "vehicle", "commute", | |
| "passenger", "queue", | |
| ], | |
| }, | |
| "stray animals": { | |
| "primary": [ | |
| "stray dogs on road", "pack of stray dogs", | |
| "cattle blocking road", "stray cattle on highway", | |
| "animal carcass on road", "dead animal on street", | |
| "dog bite victim", "injured stray animal on road", | |
| ], | |
| "secondary": [ | |
| "stray", "dog", "cow", "cattle", "animal", "carcass", | |
| "dead animal", "bite", "attack", "menace", "herd", | |
| "buffalo", "goat", "pig", | |
| ], | |
| }, | |
| "building": { | |
| "primary": [ | |
| "building collapse", "wall collapse", "collapsed structure", | |
| "crumbling wall on road", "dangerous building", "unsafe structure", | |
| "major building crack", "illegal construction blocking road", | |
| "encroachment on footpath", "dilapidated building", | |
| "building rubble on road", "demolition debris on road", | |
| ], | |
| "secondary": [ | |
| "building", "wall", "structure", "crack", "collapse", | |
| "construction", "demolition", "rubble", "bricks", | |
| "pillar", "beam", "slab", "roof", "cement", "scaffolding", | |
| ], | |
| }, | |
| } | |
| CIVIC_RELEVANCE_THRESHOLD = 2 | |
| _NON_CIVIC_OVERRIDE = re.compile( | |
| r""" | |
| \b(selfie|portrait\s+photo|person\s+posing|man\s+smiling|woman\s+smiling | |
| |people\s+laughing|group\s+photo|family\s+photo|couple\s+photo | |
| |person\s+standing\s+in\s+front)\b | |
| | \b(food|meal|dish|plate\s+of|cooking|restaurant|cafe|eating|drinking | |
| |biryani|curry|pizza|burger|snack\s+food|fruit\s+bowl | |
| |vegetable\s+market\s+stall)\b | |
| | \b(flower\s+garden|blooming\s+flower|garden\s+path|scenic\s+nature | |
| |beautiful\s+sunset|sunrise\s+sky|rainbow\s+sky|mountain\s+view | |
| |beach\s+scenery|forest\s+trail|green\s+landscape|paddy\s+field | |
| |agricultural\s+field)\b | |
| | \b(baby\s+playing|infant|child\s+playing\s+in\s+park|wedding\s+ceremony | |
| |birthday\s+party|celebration\s+event|festival\s+decoration | |
| |religious\s+ceremony)\b | |
| | \b(cat\s+sitting|dog\s+playing\s+in\s+yard|pet\s+dog|pet\s+cat | |
| |bird\s+perched|butterfly\s+on\s+flower|insect\s+on\s+flower)\b | |
| | \b(screenshot\s+of|meme|advertisement\s+poster|promotional\s+banner | |
| |movie\s+poster|product\s+photo)\b | |
| | \b(bedroom|living\s+room\s+interior|kitchen\s+interior|office\s+desk | |
| |laptop\s+on\s+desk|indoor\s+plant)\b | |
| """, | |
| re.VERBOSE | re.IGNORECASE, | |
| ) | |
| _CATEGORY_TO_LEXICON = { | |
| "electricity": "electricity", | |
| "garbage": "garbage", | |
| "pollution": "pollution", | |
| "public transport": "public transport", | |
| "roads": "roads", | |
| "sanitation": "sanitation", | |
| "stray animals": "stray animals", | |
| "water": "water", | |
| "other": None, | |
| } | |
| _OVERLAPPING_CATEGORIES = [ | |
| {"garbage", "sanitation"}, | |
| {"garbage", "pollution"}, | |
| {"sanitation", "water"}, | |
| {"water", "roads"}, | |
| {"electricity", "roads"}, | |
| {"pollution", "sanitation"}, | |
| {"stray animals", "roads"}, | |
| {"stray animals", "sanitation"}, | |
| ] | |
| def _categories_overlap(cat_a: str, cat_b: str) -> bool: | |
| return {cat_a, cat_b} in _OVERLAPPING_CATEGORIES | |
| def score_civic_relevance(caption: str) -> dict: | |
| caption_lower = caption.lower() | |
| override_match = _NON_CIVIC_OVERRIDE.search(caption_lower) | |
| if override_match: | |
| return { | |
| "is_relevant": False, | |
| "score": 0, | |
| "matched_category": None, | |
| "matched_terms": [], | |
| "override_reason": f"Non-civic content detected: '{override_match.group()}'", | |
| } | |
| category_scores: dict = {} | |
| all_matched: list = [] | |
| for cat, terms in _CIVIC_LEXICON.items(): | |
| score = 0 | |
| for term in terms["primary"]: | |
| if term in caption_lower: | |
| score += 2; all_matched.append(term) | |
| for term in terms["secondary"]: | |
| if term in caption_lower: | |
| score += 1; all_matched.append(term) | |
| if score > 0: | |
| category_scores[cat] = score | |
| total_score = sum(category_scores.values()) | |
| top_category = max(category_scores, key=category_scores.get) if category_scores else None | |
| is_relevant = total_score >= CIVIC_RELEVANCE_THRESHOLD | |
| logger.info("[civic-score] caption='%s...' score=%d top_cat='%s' relevant=%s", | |
| caption[:80], total_score, top_category, is_relevant) | |
| return { | |
| "is_relevant": is_relevant, | |
| "score": total_score, | |
| "matched_category": top_category, | |
| "matched_terms": list(set(all_matched)), | |
| "override_reason": None, | |
| } | |
| def check_evidence_relevance(caption: str, grievance_category: str | None = None) -> dict: | |
| result = score_civic_relevance(caption) | |
| if not result["is_relevant"]: | |
| reason = result["override_reason"] or ( | |
| f"Image does not appear to show a civic issue " | |
| f"(caption: '{caption[:60]}', score: {result['score']})." | |
| ) | |
| return {"evidence_relevant": False, "evidence_note": reason, | |
| "civic_score": result["score"]} | |
| if grievance_category: | |
| lexicon_key = _CATEGORY_TO_LEXICON.get(grievance_category.lower()) | |
| if lexicon_key and result["matched_category"]: | |
| img_cat = result["matched_category"] | |
| bert_cat = lexicon_key | |
| if img_cat != bert_cat: | |
| if _categories_overlap(img_cat, bert_cat): | |
| logger.info("[civic-score] Overlapping categories img='%s' bert='%s' — valid", | |
| img_cat, bert_cat) | |
| else: | |
| note = ( | |
| f"Image appears to show a '{img_cat}' issue but the grievance " | |
| f"is classified as '{grievance_category}'. " | |
| f"The image may not directly support this complaint — " | |
| f"consider retaking a more relevant photo." | |
| ) | |
| return {"evidence_relevant": False, "evidence_note": note, | |
| "civic_score": result["score"]} | |
| img_cat_label = result["matched_category"] or "general civic" | |
| return { | |
| "evidence_relevant": True, | |
| "evidence_note": ( | |
| f"Image contains civic content related to '{img_cat_label}' " | |
| f"(visual relevance score: {result['score']}). " | |
| f"Note: GIT scores the image visually; BERT classifies the complaint text — " | |
| f"they may show different but related categories." | |
| ), | |
| "civic_score": result["score"], | |
| } | |
| # ========================================================= | |
| # WARD BOUNDING BOXES — Kakinada Municipal Corporation | |
| # ========================================================= | |
| WARD_BOUNDS = { | |
| "suryaraopeta": (16.980, 17.010, 82.230, 82.260), | |
| "jagannaickpur": (16.970, 17.000, 82.240, 82.270), | |
| "raja rao peta": (16.975, 17.005, 82.245, 82.275), | |
| "bhanugudi": (16.960, 16.990, 82.250, 82.280), | |
| "old town": (16.990, 17.020, 82.220, 82.250), | |
| "rajah street": (16.985, 17.015, 82.225, 82.255), | |
| "main road": (16.980, 17.010, 82.235, 82.265), | |
| "gandhi nagar": (16.975, 17.005, 82.240, 82.270), | |
| "ashok nagar": (16.970, 17.000, 82.245, 82.275), | |
| "nethaji nagar": (16.965, 16.995, 82.240, 82.270), | |
| "srinivasa nagar": (16.960, 16.990, 82.245, 82.275), | |
| "tngo colony": (16.955, 16.985, 82.250, 82.280), | |
| "shankar vilas": (16.975, 17.005, 82.235, 82.265), | |
| "collector's colony": (16.980, 17.010, 82.240, 82.270), | |
| "new town": (16.990, 17.020, 82.235, 82.265), | |
| "bank colony": (16.985, 17.015, 82.230, 82.260), | |
| "drivers colony": (16.970, 17.000, 82.250, 82.280), | |
| "fci colony": (16.965, 16.995, 82.255, 82.285), | |
| "burma colony": (16.960, 16.990, 82.255, 82.285), | |
| "dwaraka nagar": (16.975, 17.005, 82.245, 82.275), | |
| "ayodhya nagar": (16.970, 17.000, 82.240, 82.270), | |
| "kakinada port area": (16.940, 16.970, 82.260, 82.300), | |
| "kakinada industrial area": (16.930, 16.960, 82.255, 82.295), | |
| "fishing harbour": (16.935, 16.965, 82.265, 82.305), | |
| "dairy farm": (16.950, 16.980, 82.250, 82.280), | |
| "auto nagar": (16.945, 16.975, 82.255, 82.285), | |
| "kaleswara rao nagar": (16.980, 17.010, 82.245, 82.275), | |
| "ramanayyapeta": (16.975, 17.005, 82.250, 82.280), | |
| "rama rao peta": (16.970, 17.000, 82.245, 82.275), | |
| "kondayya palem": (16.965, 16.995, 82.245, 82.275), | |
| "ganganapalle": (16.960, 16.990, 82.240, 82.270), | |
| "gudari gunta": (16.955, 16.985, 82.245, 82.275), | |
| "indrapalem": (16.950, 16.980, 82.245, 82.275), | |
| "sarpavaram": (16.945, 16.975, 82.245, 82.275), | |
| "uppada": (16.960, 16.990, 82.290, 82.330), | |
| "kaikavolu": (17.020, 17.060, 82.250, 82.290), | |
| "kothuru": (17.010, 17.050, 82.255, 82.295), | |
| "thammavaram": (17.000, 17.040, 82.255, 82.295), | |
| "thimmapuram": (16.995, 17.035, 82.250, 82.290), | |
| "vivekananda street": (16.985, 17.015, 82.240, 82.270), | |
| "jr ntr road": (16.980, 17.010, 82.235, 82.265), | |
| "jntu kakinada area": (16.950, 16.980, 82.260, 82.300), | |
| "govt general hospital area": (16.975, 17.005, 82.235, 82.265), | |
| "apsp camp": (16.960, 16.990, 82.260, 82.300), | |
| "kakinada beach road": (16.950, 16.980, 82.270, 82.310), | |
| "kakinada bazar": (16.985, 17.015, 82.230, 82.260), | |
| "anjaneya nagar": (16.970, 17.000, 82.255, 82.285), | |
| "kothapalli": (17.070, 17.110, 82.295, 82.340), | |
| "surampalem": (17.075, 17.105, 82.050, 82.085), | |
| } | |
| WARD_TOLERANCE_DEG = 0.015 | |
| # ========================================================= | |
| # GEO HELPERS | |
| # ========================================================= | |
| def _dms_to_decimal(dms, ref: str) -> float: | |
| degrees = dms[0][0] / dms[0][1] | |
| minutes = dms[1][0] / dms[1][1] | |
| seconds = dms[2][0] / dms[2][1] | |
| decimal = degrees + minutes / 60 + seconds / 3600 | |
| if ref in ("S", "W"): | |
| decimal = -decimal | |
| return decimal | |
| def extract_gps_from_image(image_bytes: bytes) -> tuple | None: | |
| try: | |
| img = Image.open(io.BytesIO(image_bytes)) | |
| exif_bytes = img.info.get("exif") | |
| logger.info("EXIF present: %s", exif_bytes is not None) | |
| if not exif_bytes: | |
| return None | |
| exif_data = piexif.load(exif_bytes) | |
| gps_data = exif_data.get("GPS", {}) | |
| if not gps_data: | |
| return None | |
| lat_dms = gps_data.get(piexif.GPSIFD.GPSLatitude) | |
| lat_ref = gps_data.get(piexif.GPSIFD.GPSLatitudeRef) | |
| lon_dms = gps_data.get(piexif.GPSIFD.GPSLongitude) | |
| lon_ref = gps_data.get(piexif.GPSIFD.GPSLongitudeRef) | |
| if not (lat_dms and lat_ref and lon_dms and lon_ref): | |
| return None | |
| lat = _dms_to_decimal(lat_dms, lat_ref.decode() if isinstance(lat_ref, bytes) else lat_ref) | |
| lon = _dms_to_decimal(lon_dms, lon_ref.decode() if isinstance(lon_ref, bytes) else lon_ref) | |
| return lat, lon | |
| except Exception: | |
| return None | |
| def is_kakinada(lat: float, lon: float) -> bool: | |
| try: | |
| return 16.85 <= float(lat) <= 17.10 and 82.00 <= float(lon) <= 82.35 | |
| except (TypeError, ValueError): | |
| return False | |
| def check_image_location(image_bytes: bytes) -> str: | |
| coords = extract_gps_from_image(image_bytes) | |
| if coords is None: | |
| return "no_gps" | |
| lat, lon = coords | |
| return "valid" if is_kakinada(lat, lon) else "invalid" | |
| def validate_area_vs_coords(area: str, lat: float, lon: float) -> tuple: | |
| key = area.strip().lower() | |
| bounds = WARD_BOUNDS.get(key) | |
| if bounds is None: | |
| logger.warning("[ward-check] Area '%s' not in WARD_BOUNDS — skipping", area) | |
| return True, "unknown_area" | |
| lat_min, lat_max, lon_min, lon_max = bounds | |
| t = WARD_TOLERANCE_DEG | |
| in_bounds = ( | |
| (lat_min - t) <= lat <= (lat_max + t) and | |
| (lon_min - t) <= lon <= (lon_max + t) | |
| ) | |
| if in_bounds: | |
| logger.info("[ward-check] PASSED — area='%s' lat=%.6f lon=%.6f", area, lat, lon) | |
| return True, "valid" | |
| reason = ( | |
| f"Image GPS ({lat:.6f}, {lon:.6f}) does not match the selected ward " | |
| f"'{area}'. Please select the correct ward or retake the photo from " | |
| f"within the reported area." | |
| ) | |
| logger.warning("[ward-check] FAILED — %s", reason) | |
| return False, reason | |
| def resolve_location_status(image_bytes: bytes, area: str = "") -> tuple: | |
| lat = lon = None | |
| coords = extract_gps_from_image(image_bytes) | |
| if coords: | |
| lat, lon = coords | |
| kakinada_ok = is_kakinada(lat, lon) | |
| status = "valid" if kakinada_ok else "invalid" | |
| logger.info("[location] EXIF lat=%.6f lon=%.6f → kakinada=%s", lat, lon, kakinada_ok) | |
| else: | |
| raw_lat = request.form.get("latitude") or request.form.get("lat") | |
| raw_lon = request.form.get("longitude") or request.form.get("lng") | |
| if raw_lat and raw_lon: | |
| try: | |
| lat = float(raw_lat) | |
| lon = float(raw_lon) | |
| status = "valid" if is_kakinada(lat, lon) else "invalid" | |
| logger.info("[location] Form coords lat=%.6f lon=%.6f → %s", lat, lon, status) | |
| except ValueError: | |
| return "no_gps", "Invalid GPS coordinates supplied." | |
| else: | |
| return ( | |
| "no_gps", | |
| "No GPS data found in image and no coordinates supplied. " | |
| "Please allow location access and retake the photo.", | |
| ) | |
| if status != "valid": | |
| return ( | |
| "invalid", | |
| "Image location is outside Kakinada Municipal Corporation limits. " | |
| "Only grievances within Kakinada jurisdiction are accepted.", | |
| ) | |
| if area and lat is not None and lon is not None: | |
| ward_ok, ward_reason = validate_area_vs_coords(area, lat, lon) | |
| if not ward_ok: | |
| return "invalid", ward_reason | |
| return "valid", "ok" | |
| # ========================================================= | |
| # LANGUAGE DETECTION | |
| # ========================================================= | |
| def detect_language(text: str) -> str: | |
| if _RE_HINDI.search(text): | |
| return "hindi" | |
| if _RE_TELUGU.search(text): | |
| return "telugu" | |
| return "english" | |
| # ========================================================= | |
| # APP INIT | |
| # ========================================================= | |
| app = Flask(__name__) | |
| app.config["MAX_CONTENT_LENGTH"] = int(os.environ.get("MAX_UPLOAD_MB", "32")) * 1024 * 1024 | |
| logger.info("🔄 Loading models...") | |
| cat_model_en, cat_tok_en = get_cat_en() | |
| cat_model_indic, cat_tok_indic = get_cat_indic() | |
| urg_model_en, urg_tok_en = get_urg_en() | |
| urg_model_indic, urg_tok_indic = get_urg_indic() | |
| logger.info("✅ Models loaded.") | |
| logger.info("🔄 Initializing Integrated Gradients explainers...") | |
| category_explainer_en = IntegratedGradientsExplainer(cat_model_en, cat_tok_en) | |
| category_explainer_indic = IntegratedGradientsExplainer(cat_model_indic, cat_tok_indic) | |
| urgency_explainer_en = IntegratedGradientsExplainer(urg_model_en, urg_tok_en) | |
| urgency_explainer_indic = IntegratedGradientsExplainer(urg_model_indic, urg_tok_indic) | |
| logger.info("✅ Integrated Gradients ready.") | |
| _RESOURCES = { | |
| "english": { | |
| "cat_fn": predict_category_en, | |
| "urg_fn": predict_urgency_en, | |
| "cat_exp": category_explainer_en, | |
| "urg_exp": urgency_explainer_en, | |
| } | |
| } | |
| _RESOURCES_INDIC = { | |
| "cat_fn": predict_category_indic, | |
| "urg_fn": predict_urgency_indic, | |
| "cat_exp": category_explainer_indic, | |
| "urg_exp": urgency_explainer_indic, | |
| } | |
| def _get_resources(language: str) -> dict: | |
| return _RESOURCES.get(language, _RESOURCES_INDIC) | |
| # ========================================================= | |
| # HOTSPOT FORECAST | |
| # ========================================================= | |
| VALID_LABELS = [ | |
| "electricity", "garbage", "pollution", "public transport", | |
| "roads", "sanitation", "stray animals", "water", | |
| ] | |
| _PROPHET_MAX_WORKERS = int(os.environ.get("PROPHET_MAX_WORKERS", "4")) | |
| RISK_LEVEL_THRESHOLDS = [(75, "Critical"), (50, "High"), (25, "Medium"), (0, "Low")] | |
| def _risk_to_level(score: float) -> str: | |
| for threshold, label in RISK_LEVEL_THRESHOLDS: | |
| if score >= threshold: | |
| return label | |
| return "Low" | |
| def _fit_and_forecast(area, category, group_df, horizon) -> dict | None: | |
| if group_df["ds"].nunique() < 2: | |
| return None | |
| ts = group_df[["ds", "y"]].sort_values("ds") | |
| model = Prophet(weekly_seasonality=False, daily_seasonality=False) | |
| model.fit(ts) | |
| future = model.make_future_dataframe(periods=horizon) | |
| forecast = model.predict(future) | |
| recent_avg = ts.tail(3)["y"].mean() | |
| fc_avg = forecast.tail(horizon)["yhat"].mean() | |
| growth = 0.0 if recent_avg == 0 else max( | |
| -500.0, min(500.0, ((fc_avg - recent_avg) / recent_avg) * 100) | |
| ) | |
| avg_pri = float(group_df["priorityScore"].mean()) | |
| raw_risk = 0.5 * (growth / 100) + 0.3 * avg_pri + 0.2 * (recent_avg / 5) | |
| risk_100 = round(100 / (1 + math.exp(-raw_risk)), 2) | |
| hfc = forecast.tail(horizon) | |
| yhat_range = (hfc["yhat_upper"] - hfc["yhat_lower"]).mean() | |
| yhat_mean = hfc["yhat"].abs().mean() | |
| confidence = round(1.0 - min(1.0, yhat_range / (yhat_mean + 1e-9)), 4) | |
| return { | |
| "area": area, "category": category, | |
| "riskScore": risk_100, "level": _risk_to_level(risk_100), | |
| "growthPercent": round(float(growth), 2), | |
| "forecastHorizonDays": horizon, "confidenceScore": confidence, | |
| "_recentAvg": round(float(recent_avg), 2), | |
| "_forecastAvg": round(float(fc_avg), 2), | |
| } | |
| # ========================================================= | |
| # HEALTH | |
| # ========================================================= | |
| def health(): | |
| return jsonify({ | |
| "status": "ok", | |
| "version": os.environ.get("APP_VERSION", "1.0.0"), | |
| "message": "Multilingual Grievance API (EN/HI/TE) with IG + GFAS — running", | |
| "endpoints": { | |
| "POST /predict": "Classify grievance — text/audio/image.", | |
| "POST /fairness-audit": "GFAS fairness audit.", | |
| "POST /hotspot-forecast": "Prophet hotspot forecasting.", | |
| }, | |
| }) | |
| def health_check(): | |
| return jsonify({"status": "ok"}), 200 | |
| # ========================================================= | |
| # POST /predict | |
| # ========================================================= | |
| def predict_grievance(): | |
| try: | |
| content_type = request.content_type or "" | |
| if "application/json" in content_type: | |
| data = request.get_json(silent=True) or {} | |
| text_input = data.get("text", "").strip() | |
| explain_flag = bool(data.get("explain", False)) | |
| has_text = bool(text_input) | |
| has_audio = False | |
| has_image = False | |
| image_bytes = None | |
| audio_file = None | |
| else: | |
| text_input = request.form.get("text", "").strip() | |
| explain_raw = request.form.get("explain", "false").strip().lower() | |
| explain_flag = explain_raw in ("true", "1", "yes") | |
| has_text = bool(text_input) | |
| has_audio = "audio" in request.files | |
| has_image = "image" in request.files | |
| image_bytes = request.files["image"].read() if has_image else None | |
| audio_file = request.files["audio"] if has_audio else None | |
| logger.info( | |
| "[predict] content_type=%s has_text=%s has_audio=%s has_image=%s", | |
| content_type[:40], has_text, has_audio, has_image, | |
| ) | |
| if not has_text and not has_audio and not has_image: | |
| return jsonify({ | |
| "status": "failed", "code": "missing_input", | |
| "message": "Please provide at least one of: 'text', 'audio', or 'image'.", | |
| }), 400 | |
| area_field = request.form.get("area", "").strip() | |
| evidence_relevant = None | |
| evidence_note = None | |
| civic_score = None | |
| evidence_caption = None | |
| location_field = None | |
| # ── Mode A — IMAGE ONLY ─────────────────────────────────────────────── | |
| if has_image and not has_text and not has_audio: | |
| location_status, location_reason = resolve_location_status( | |
| image_bytes, area=area_field | |
| ) | |
| if location_status in ("invalid", "no_gps"): | |
| logger.warning("[predict] Mode A rejected — %s", location_reason) | |
| return jsonify({ | |
| "status": "failed", "code": "location_invalid", | |
| "message": location_reason, "location": "invalid", | |
| }), 403 | |
| grievance_text = extract_text_from_image(image_bytes) | |
| input_mode = "image" | |
| evidence_caption = grievance_text | |
| ok, err = _validate_machine_text(grievance_text, source="image") | |
| if not ok: | |
| return jsonify(err), 422 | |
| relevance = check_evidence_relevance(evidence_caption) | |
| evidence_relevant = relevance["evidence_relevant"] | |
| evidence_note = relevance["evidence_note"] | |
| civic_score = relevance["civic_score"] | |
| logger.info("[predict] Mode A civic relevance: relevant=%s score=%s", | |
| evidence_relevant, civic_score) | |
| # ── Mode B — AUDIO ONLY ─────────────────────────────────────────────── | |
| elif has_audio and not has_text and not has_image: | |
| grievance_text = transcribe_audio(audio_file) | |
| input_mode = "audio" | |
| ok, err = _validate_machine_text(grievance_text, source="audio") | |
| if not ok: | |
| return jsonify(err), 422 | |
| # ── Mode C — TEXT ONLY ──────────────────────────────────────────────── | |
| elif has_text and not has_image and not has_audio: | |
| grievance_text = text_input | |
| input_mode = "text" | |
| is_valid, error_code = validate_text(grievance_text) | |
| if not is_valid: | |
| return jsonify({ | |
| "status": "failed", | |
| "code": error_code, | |
| "message": _VALIDATION_MESSAGES[error_code], | |
| }), 422 | |
| # ── Mode D — TEXT + IMAGE (evidence) ───────────────────────────────── | |
| elif has_text and has_image and not has_audio: | |
| is_valid, error_code = validate_text(text_input) | |
| if not is_valid: | |
| return jsonify({ | |
| "status": "failed", | |
| "code": error_code, | |
| "message": _VALIDATION_MESSAGES[error_code], | |
| }), 422 | |
| grievance_text = text_input | |
| input_mode = "text+image" | |
| loc_status, loc_reason = resolve_location_status(image_bytes, area=area_field) | |
| location_field = "valid" if loc_status == "valid" else "invalid" | |
| if loc_status == "invalid": | |
| logger.warning("[predict] Mode D location issue — %s", loc_reason) | |
| evidence_caption = extract_text_from_image(image_bytes) | |
| relevance = check_evidence_relevance(evidence_caption) | |
| evidence_relevant = relevance["evidence_relevant"] | |
| evidence_note = relevance["evidence_note"] | |
| civic_score = relevance["civic_score"] | |
| # ── Mode E — AUDIO + IMAGE (evidence) ──────────────────────────────── | |
| elif has_audio and has_image and not has_text: | |
| grievance_text = transcribe_audio(audio_file) | |
| input_mode = "audio+image" | |
| ok, err = _validate_machine_text(grievance_text, source="audio") | |
| if not ok: | |
| return jsonify(err), 422 | |
| loc_status, loc_reason = resolve_location_status(image_bytes, area=area_field) | |
| location_field = "valid" if loc_status == "valid" else "invalid" | |
| if loc_status == "invalid": | |
| logger.warning("[predict] Mode E location issue — %s", loc_reason) | |
| evidence_caption = extract_text_from_image(image_bytes) | |
| relevance = check_evidence_relevance(evidence_caption) | |
| evidence_relevant = relevance["evidence_relevant"] | |
| evidence_note = relevance["evidence_note"] | |
| civic_score = relevance["civic_score"] | |
| else: | |
| return jsonify({ | |
| "status": "failed", "code": "missing_input", | |
| "message": "Please provide at least one of: 'text', 'audio', or 'image'.", | |
| }), 400 | |
| # ── Language & model ────────────────────────────────────────────────── | |
| language = detect_language(grievance_text) | |
| res = _get_resources(language) | |
| # ── Classification ──────────────────────────────────────────────────── | |
| category_result = res["cat_fn"](grievance_text) | |
| category = category_result["category"] | |
| category_conf = category_result["confidence"] | |
| category_index = category_result.get("class_index", 0) | |
| urgency_result = res["urg_fn"](grievance_text) | |
| urgency = urgency_result["urgency"] | |
| urgency_conf = urgency_result["confidence"] | |
| urgency_index = urgency_result.get("class_index", 0) | |
| priority_result = compute_priority_score(category, urgency, urgency_conf) | |
| priority_score = priority_result["score"] | |
| priority_band = priority_result["band"] | |
| # ── Re-check evidence relevance with BERT category (Modes D & E) ───── | |
| if input_mode in ("text+image", "audio+image") and evidence_caption: | |
| refined = check_evidence_relevance(evidence_caption, category) | |
| evidence_relevant = refined["evidence_relevant"] | |
| evidence_note = refined["evidence_note"] | |
| civic_score = refined["civic_score"] | |
| logger.info("[predict] %s refined evidence (cat='%s'): relevant=%s score=%s", | |
| input_mode, category, evidence_relevant, civic_score) | |
| # ── Explainability ──────────────────────────────────────────────────── | |
| category_tokens: list = [] | |
| urgency_tokens: list = [] | |
| if explain_flag: | |
| category_tokens = res["cat_exp"].explain(grievance_text, category_index) | |
| urgency_tokens = res["urg_exp"].explain(grievance_text, urgency_index) | |
| explanation = generate_final_reason( | |
| grievance_text, category, urgency, priority_score, | |
| category_tokens, urgency_tokens, | |
| ) | |
| # ── Response ────────────────────────────────────────────────────────── | |
| response_body = { | |
| "status": "success", | |
| "input_mode": input_mode, | |
| "text": grievance_text, | |
| "language": language, | |
| "category": category, | |
| "category_confidence": category_conf, | |
| "urgency": urgency, | |
| "urgency_confidence": urgency_conf, | |
| "priority_score": priority_score, | |
| "priority_band": priority_band, | |
| "explanation": { | |
| "category_tokens": category_tokens, | |
| "urgency_tokens": urgency_tokens, | |
| "category_decision": explanation["category_decision"], | |
| "urgency_decision": explanation["urgency_decision"], | |
| "priority_summary": explanation["priority_summary"], | |
| "final_reason": explanation["final_reason"], | |
| }, | |
| } | |
| if location_field is not None: | |
| response_body["location"] = location_field | |
| if evidence_relevant is not None: | |
| response_body["evidence_relevant"] = evidence_relevant | |
| response_body["evidence_note"] = evidence_note | |
| response_body["civic_score"] = civic_score | |
| return jsonify(response_body) | |
| except Exception as e: | |
| logger.exception("[predict] Unhandled exception") | |
| return jsonify({ | |
| "status": "failed", "code": "internal_error", | |
| "message": str(e), "trace": traceback.format_exc(), | |
| }), 500 | |
| # ========================================================= | |
| # POST /fairness-audit | |
| # ========================================================= | |
| def fairness_audit(): | |
| try: | |
| data = request.get_json(silent=True) | |
| if not data: | |
| return jsonify({"status": "failed", "message": "Invalid JSON body."}), 400 | |
| result, error, status_code = gfas_audit(data.get("grievances")) | |
| if error: | |
| return jsonify(error), status_code | |
| return jsonify(result), status_code | |
| except Exception as e: | |
| logger.exception("[fairness-audit] Unhandled exception") | |
| return jsonify({"status": "failed", "error": str(e), | |
| "trace": traceback.format_exc()}), 500 | |
| # ========================================================= | |
| # POST /hotspot-forecast | |
| # ========================================================= | |
| def hotspot_forecast(): | |
| try: | |
| data = request.get_json(force=True) | |
| grievances = data.get("grievances", []) | |
| horizon = int(data.get("horizon_days", 1)) | |
| top_n = int(data.get("top_n", 10)) | |
| source_window = int(data.get("source_window_days", 45)) | |
| generated_at = datetime.now(UTC).isoformat() | |
| if not grievances: | |
| return jsonify({"status": "failed", "message": "No grievances supplied"}), 422 | |
| df = pd.DataFrame(grievances) | |
| if df.empty: | |
| return jsonify({"status": "success", "generated_at": generated_at, | |
| "top_hotspots": []}) | |
| df["area"] = df["area"].astype(str).str.lower().str.strip() | |
| df["category"] = df["category"].astype(str).str.lower().str.strip() | |
| df["ds"] = pd.to_datetime(df["createdAt"], errors="coerce", | |
| utc=True).dt.tz_convert(None) | |
| df = df.dropna(subset=["ds"]) | |
| df["y"] = 1 | |
| df = df[df["category"].isin(VALID_LABELS)] | |
| if df.empty: | |
| return jsonify({"status": "success", "generated_at": generated_at, | |
| "top_hotspots": []}) | |
| df = df.groupby(["area", "category", "ds"]).agg( | |
| {"y": "sum", "priorityScore": "mean"} | |
| ).reset_index() | |
| groups = list(df.groupby(["area", "category"])) | |
| hotspots = [] | |
| errors = [] | |
| with ThreadPoolExecutor(max_workers=_PROPHET_MAX_WORKERS) as executor: | |
| futures = { | |
| executor.submit(_fit_and_forecast, area, cat, gdf, horizon): (area, cat) | |
| for (area, cat), gdf in groups | |
| } | |
| for future in as_completed(futures): | |
| area, category = futures[future] | |
| try: | |
| result = future.result() | |
| if result is None: | |
| continue | |
| result["flaskSnapshot"] = { | |
| "recentAvg": result.pop("_recentAvg"), | |
| "forecastAvg": result.pop("_forecastAvg"), | |
| "sourceWindowDays": source_window, | |
| "forecastHorizonDays": horizon, | |
| "generatedAt": generated_at, | |
| } | |
| result["sourceWindowDays"] = source_window | |
| hotspots.append(result) | |
| except Exception as e: | |
| errors.append({"area": area, "category": category, "error": str(e)}) | |
| logger.warning("[hotspot] Prophet failed %s/%s: %s", area, category, e) | |
| ranked = sorted(hotspots, key=lambda x: x["riskScore"], reverse=True) | |
| return jsonify({ | |
| "status": "success", | |
| "generated_at": generated_at, | |
| "top_hotspots": ranked[:top_n], | |
| "meta": { | |
| "groups_evaluated": len(groups), | |
| "forecasts_computed": len(hotspots), | |
| "error_count": len(errors), | |
| "errors": errors, | |
| "source_window_days": source_window, | |
| "horizon_days": horizon, | |
| }, | |
| }) | |
| except Exception as e: | |
| logger.exception("[hotspot-forecast] Unhandled exception") | |
| return jsonify({"status": "failed", "message": str(e)}), 500 | |
| # ========================================================= | |
| # ERROR HANDLERS | |
| # ========================================================= | |
| def request_entity_too_large(e): | |
| return jsonify({ | |
| "status": "failed", "code": "payload_too_large", | |
| "message": f"Upload exceeds {app.config['MAX_CONTENT_LENGTH'] // (1024 * 1024)} MB limit.", | |
| }), 413 | |
| def not_found(e): | |
| return jsonify({"status": "failed", "code": "not_found", | |
| "message": "Endpoint not found."}), 404 | |
| def method_not_allowed(e): | |
| return jsonify({"status": "failed", "code": "method_not_allowed", | |
| "message": "HTTP method not allowed."}), 405 | |
| # ========================================================= | |
| # RUN — Hugging Face Spaces uses port 7860 | |
| # ========================================================= | |
| if __name__ == "__main__": | |
| port = int(os.environ.get("PORT", 7860)) | |
| debug = os.environ.get("FLASK_DEBUG", "false").lower() == "true" | |
| logger.info("🚀 Starting API on port %d (debug=%s)", port, debug) | |
| app.run(host="0.0.0.0", port=port, debug=debug, threaded=True) |