MOHAN799S
Update app.py
ef6f897
# =========================================================
# FLASK API — MULTILINGUAL GRIEVANCE + XPE + GFAS
# INTEGRATED GRADIENTS ONLY (PRODUCTION VERSION)
# Hugging Face Spaces — Production Deployment
# Multimodal: text / audio / image(evidence) support
# =========================================================
from flask import Flask, request, jsonify
import re
import io
import traceback
import logging
import math
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime, timezone
logging.getLogger("prophet").setLevel(logging.ERROR)
logging.getLogger("cmdstanpy").setLevel(logging.ERROR)
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
from prophet import Prophet
import pandas as pd
from PIL import Image
import piexif
from classification.bert_classify import (
predict as predict_category_en,
get_model_and_tokenizer as get_cat_en,
)
from classification.indic_bert_classify import (
predict as predict_category_indic,
get_model_and_tokenizer as get_cat_indic,
)
from sentiment_analysis.bert_predict import (
predict_urgency as predict_urgency_en,
get_model_and_tokenizer as get_urg_en,
)
from sentiment_analysis.indic_bert_predict import (
predict as predict_urgency_indic,
get_model_and_tokenizer as get_urg_indic,
)
from multi_modal.audio_to_text import transcribe_audio
from multi_modal.image_to_text import extract_text_from_image
from xpe.priority_engine import compute_priority_score
from xpe.integrated_gradients_explainer import IntegratedGradientsExplainer
from xpe.hybrid_explainer import generate_final_reason
from gfas import audit as gfas_audit
_RE_HINDI = re.compile(r'[\u0900-\u097F]')
_RE_TELUGU = re.compile(r'[\u0C00-\u0C7F]')
_RE_JUNK = re.compile(r'^[\d\W_]+$')
UTC = timezone.utc
# =========================================================
# GRIEVANCE VALIDATION — TWO-SIGNAL GATE
#
# Signal 1 (_CIVIC_TOPIC): Does text mention a civic issue?
# Expanded to cover easy ("pothole on road") through
# difficult ("residents experiencing waterborne illness
# due to contaminated municipal supply") phrasings.
#
# Signal 2 (_COMPLAINT_INTENT): Does text express complaint
# intent — duration, harm, request, frustration, urgency?
# Civic topic ALONE is now sufficient (Stage 2b); intent
# check remains for borderline observation texts.
#
# Difficulty levels handled:
# EASY — simple nouns: "pothole", "no water", "garbage"
# MEDIUM — descriptive: "road is very bad", "light not
# working", "water not coming since 3 days"
# HARD — indirect/narrative: "fell ill after drinking
# tap water", "vehicle tyres burst due to road
# condition", "children afraid to walk to school
# because of stray dogs"
#
# FIX LOG (v2):
# + path hole / path-hole → now matched (Roads)
# + hole on road / hole in the road / big hole on main road → matched
# + road full of holes / road has many holes → matched
# + deep/big/large hole on road/street/lane/highway → matched
# + current gone / current is gone / power gone → matched (Electricity)
# + lights not working (plural) → matched
# + drain is blocked / drain is choked / drainage is blocked → matched
# + "is" connector tolerance added throughout drain/electricity sections
# =========================================================
# ── Signal 1: Civic topic ─────────────────────────────────────────────────────
_CIVIC_TOPIC = re.compile(
r"""
# ══════════════════════════════════════════════════════
# ROADS — easy: pothole / medium: road damaged / hard:
# vehicle damaged, tyre burst, accident due to road
#
# FIX v2: Added path hole, hole on/in road variants,
# road full of holes, big/deep/large hole on road,
# "is" connector tolerance for drain-style patterns
# ══════════════════════════════════════════════════════
\b(pothole|potholes|crater|craters|road\s+hole|road\s+pit
|path[-\s]?hole
|hole\s+(on|in|at|along)\s+(the\s+)?(road|street|lane|path|highway|footpath|pavement|sidewalk)
|holes?\s+(on|in|along)\s+(road|street|lane|highway|footpath)
|road\s+(full\s+of|filled\s+with|has\s+many|with\s+many)\s+holes?
|(big|deep|large|huge|dangerous)\s+(hole|pit|crater)\s+(on|in|at|along)\s+(the\s+)?(road|street|lane|highway|main\s+road|footpath)
|road\s+(damage|damaged|broken|crack|cracked|bad|rough
|repair|condition|cave|collapse|hazard|blocked
|obstruction|construction|excavation|digging|dug\s+up
|not\s+repaired|not\s+fixed|in\s+bad\s+shape|in\s+poor\s+condition
|is\s+(very\s+)?(bad|terrible|horrible|pathetic|worst|dangerous|unsafe|rough)
|has\s+(potholes?|cracks?|damage|holes?))
|damaged\s+road|broken\s+road|bad\s+road|rough\s+road
|unpaved\s+road|muddy\s+road|dirt\s+road|gravel\s+road
|road\s+not\s+repaired|road\s+not\s+fixed|road\s+not\s+tarred
|open\s+trench|road\s+trench|trench\s+not\s+filled|trench\s+open
|speed\s+breaker\s+(missing|broken|damaged|not\s+marked)
|road\s+sign\s+(missing|damaged|broken|fallen)
|footpath\s+(broken|damaged|blocked|missing|encroached|not\s+there)
|pavement\s+(broken|damaged|cracked|uneven|missing)
|sidewalk\s+(broken|damaged|blocked|missing)
|tyre\s+(burst|puncture|damaged)\s+(due\s+to|because\s+of|from)\s+(road|pothole|pit)
|vehicle\s+(damaged|got\s+damaged|broke\s+down)\s+(due\s+to|because\s+of)\s+(road|pothole)
|accident\s+(due\s+to|because\s+of|caused\s+by)\s+(road|pothole|road\s+condition)
|fell\s+(down|off|into)\s+(pothole|pit|trench|open\s+drain)
|two[-\s]wheeler\s+(fell|skidded|slipped)\s+(due\s+to|because\s+of)
|bike\s+(fell|skidded|slipped|got\s+stuck)\s+(due\s+to|because\s+of)
|road\s+marking\s+(faded|missing|not\s+visible|worn\s+out)
|no\s+road|street\s+(is\s+)?(bad|broken|damaged|not\s+repaired))\b
# ══════════════════════════════════════════════════════
# WATER SUPPLY — easy: no water / medium: pipe burst /
# hard: contaminated water causing illness, muddy
# water, foul smell from tap, people fell ill
# ══════════════════════════════════════════════════════
| \b(water\s+(supply|issue|problem|crisis|shortage|scarcity|not\s+coming
|not\s+available|disrupted|cut|stopped|irregular|leakage
|leak|overflow|overflowing|tank|pressure|quality
|contamination|contaminated|pollution|polluted
|connection|pipeline|line|board|department|authority)
|no\s+water|without\s+water|water\s+cut\s+off
|pipe\s+(burst|leak|leaking|broken|damage|damaged|cracked|old|rusted
|not\s+repaired|open|exposed|underground\s+pipe)
|broken\s+pipe|leaking\s+pipe|burst\s+pipe|old\s+pipe|rusted\s+pipe
|water\s+not\s+(coming|supplied|available|received|restored)
|no\s+water\s+supply|water\s+supply\s+(stopped|disrupted|cut|not\s+given)
|drinking\s+water\s+(contaminated|dirty|impure|polluted|not\s+safe|unsafe
|not\s+potable|not\s+clean|issue|problem|shortage)
|tap\s+water\s+(dirty|smells?|contaminated|yellow|brown|black|unsafe)
|water\s+(smells?|stinks?|colour|colored|discoloured|discolored
|yellow|brown|black|muddy|murky|turbid|unclean|unsafe)
|contaminated\s+water|dirty\s+water|impure\s+water|polluted\s+water
|muddy\s+water|murky\s+water|foul\s+water|unclean\s+water
|unsafe\s+water|undrinkable\s+water|unfit\s+for\s+drinking
|waterborne\s+(disease|illness|infection)|water.?borne
|fell\s+ill\s+(after|due\s+to|because\s+of|from)\s+(drinking|water|tap)
|fallen\s+ill\s+(after|due\s+to|because\s+of|from)\s+(drinking|water|tap)
|got\s+sick\s+(after|due\s+to|because\s+of|from)\s+(drinking|water|tap)
|sick\s+(after|due\s+to|because\s+of)\s+(drinking|contaminated|dirty)\s+water
|ill\s+(after\s+drinking|due\s+to\s+water|from\s+water|because\s+of\s+water)
|vomiting\s+(after|due\s+to|from)\s+(drinking|water)
|diarrhea\s+(due\s+to|from|caused\s+by)\s+water
|stomach\s+(pain|ache|issue|problem)\s+(after\s+drinking|due\s+to|from)\s+water
|disease\s+(due\s+to|from|caused\s+by|spread\s+by)\s+(water|contaminated)
|illness\s+(due\s+to|from|caused\s+by)\s+(water|contaminated|dirty)
|people\s+(fell|fallen|got|are\s+getting)\s+ill\s+(due\s+to|from|after|because\s+of)
|residents\s+(fell|fallen|got|are\s+getting)\s+ill
|children\s+(fell|fallen|got|are\s+getting)\s+ill
|families\s+(fell|fallen|got|are\s+getting)\s+ill
|water\s+not\s+fit\s+for\s+drinking|water\s+not\s+safe\s+to\s+drink
|borewell\s+(contaminated|dirty|water|issue|problem|not\s+working)
|overhead\s+tank\s+(dirty|contaminated|overflow|not\s+cleaned|cracked)
|sump\s+(dirty|contaminated|overflow|cracked|not\s+cleaned)
|tanker\s+(water|not\s+coming|not\s+sent|delayed|supply)
|water\s+tanker\s+(not|delayed|irregular)
|municipal\s+water\s+(supply|contaminated|dirty|problem|issue)
|metro\s+water\s+(supply|issue|contaminated)
|stagnant\s+water|standing\s+water|water\s+accumulation|water\s+stagnation
|waterlog(ging)?|flooded?\s+(road|street|area|locality|colony|lane)
|waterlogged\s+(road|street|area)
|water\s+on\s+the\s+(road|street|lane|path|area|ground|footpath))\b
# ══════════════════════════════════════════════════════
# ELECTRICITY — easy: no power / medium: streetlight
# not working / hard: wire on road causing risk,
# transformer spark, electric shock risk
#
# FIX v2: Added "current gone/is gone", "power gone",
# "lights" plural, "current is (gone|cut|off|failed)"
# ══════════════════════════════════════════════════════
| \b(power\s+(cut|outage|failure|gone|not\s+restored|supply|shortage
|fluctuation|surge|problem|issue|connection|line|grid)
|electricity\s+(cut|outage|issue|failure|problem|gone|not\s+restored
|fluctuation|supply|shortage|connection|bill|board|department)
|no\s+(electricity|power|current|supply|light)
|current\s+(gone|cut|off|not\s+coming|not\s+there|failed|out|tripped)
|current\s+is\s+(gone|cut|off|not\s+coming|not\s+there|failed|out|tripped)
|power\s+(gone|tripped|out|off|not\s+back|not\s+yet\s+restored)
|light\s+(gone|not\s+working|not\s+there|out|off|missing|broken|damaged)
|lights\s+(gone|not\s+working|not\s+there|out|off|missing|broken|damaged)
|streetlight\s+(broken|not\s+working|damaged|out|dark|missing|gone|off
|flickering|dim|no\s+light)
|broken\s+streetlight|dark\s+street|street\s+(dark|in\s+darkness|no\s+light)
|street\s+light\s+(not\s+working|broken|gone|off|missing|out)
|no\s+street\s+light|no\s+streetlight
|electric\s+(wire|pole|shock|spark|short\s+circuit|box|meter|connection
|supply|board|department|fault|hazard|risk|danger)
|live\s+wire|dangling\s+wire|fallen\s+wire|loose\s+wire|naked\s+wire
|exposed\s+(wire|cable|electric)|wire\s+(on\s+road|hanging|low|dangerous
|sparking|broken|snapped|fallen)
|electric\s+pole\s+(fallen|broken|leaning|damaged|cracked|tilted)
|fallen\s+(pole|wire|electric\s+pole|electric\s+wire|cable|tower)
|transformer\s+(damaged|broken|leaking|fire|sparking|exploded|blast|issue|problem)
|electric\s+(shock|fire|spark)\s+(risk|hazard|danger|incident|reported|near)
|risk\s+of\s+electric\s+shock|electric\s+shock\s+risk
|shock\s+(from|due\s+to|because\s+of)\s+(wire|pole|electric|current)
|current\s+(leakage|leak|spark|shock|passing|through)
|power\s+not\s+restored|no\s+power\s+(for|since)\s+\d+
|electricity\s+not\s+(restored|coming|available)
|voltage\s+(fluctuation|low|high|problem|issue|drop|surge)
|meter\s+(box|board)\s+(open|broken|damaged|missing|tampered)
|substation\s+(damaged|fire|issue|problem|fault)
|cable\s+(underground|overhead|broken|damaged|exposed|cut)
|junction\s+box\s+(open|broken|damaged|missing)
|area\s+(in\s+darkness|no\s+light|no\s+electricity|no\s+power|dark)
|darkness\s+(in|at|near|around)\s+(colony|area|street|road|ward|locality))\b
# ══════════════════════════════════════════════════════
# GARBAGE / WASTE — easy: garbage on road /
# medium: bin overflowing / hard: burning waste
# causing health issues, illegal dump attracting pests
# ══════════════════════════════════════════════════════
| \b(garbage\s+(pile|dump|not\s+collected|overflowing|bin|heap|on\s+road
|on\s+street|burning|fire|disposal|collection|vehicle
|truck|not\s+picked|not\s+cleared|accumulation|issue|problem
|near\s+house|near\s+school|near\s+hospital|smell|stench)
|waste\s+(dump|pile|heap|disposal|burning|collection|not\s+collected
|illegal|dumped|accumulation|management|issue|problem|on\s+road)
|trash\s+(pile|dump|on\s+road|on\s+street|collection|not\s+collected)
|litter\s+(on\s+road|on\s+street|pile|problem|issue|everywhere)
|garbage|litter|debris|rubbish
|overflowing\s+(garbage|bin|dustbin|dumpster|container)
|garbage\s+bin\s+(broken|missing|overflowing|full|not\s+emptied)
|uncollected\s+(garbage|waste|trash|litter)
|garbage\s+not\s+(collected|cleared|picked\s+up|removed)
|waste\s+not\s+(collected|cleared|picked\s+up|removed)
|no\s+garbage\s+collection|no\s+waste\s+collection
|burning\s+(garbage|waste|plastic|tyres?|rubber|trash|litter)
|garbage\s+(fire|burning)|waste\s+(fire|burning)
|illegal\s+(dump|dumping|waste\s+dump|garbage\s+dump)
|open\s+(garbage|dump|waste|dumping)
|rotting\s+(waste|garbage|food|material)|decomposing\s+(waste|garbage)
|foul\s+smell\s+(from|due\s+to)\s+(garbage|waste|dump|bin)
|smell\s+(from|due\s+to|because\s+of)\s+(garbage|waste|dump|rotting)
|stench\s+(from|due\s+to|because\s+of)\s+(garbage|waste|dump)
|mosquito\s+(breeding|menace|problem|due\s+to|from|in)\s*(garbage|waste|dump|stagnant)?
|rats?\s+(due\s+to|from|because\s+of)\s+(garbage|waste|dump)
|pests?\s+(due\s+to|from|because\s+of)\s+(garbage|waste|dump)
|flies\s+(due\s+to|from|because\s+of)\s+(garbage|waste|dump)
|disease\s+(due\s+to|from|caused\s+by|spread\s+by)\s+(garbage|waste|dump)
|illness\s+(due\s+to|from|caused\s+by)\s+(garbage|waste|dump)
|health\s+(hazard|risk|issue|problem)\s+(due\s+to|from|caused\s+by)\s+(garbage|waste)
|construction\s+(debris|waste|material)\s+(dumped|blocking|on\s+road)
|animal\s+(carcass|body|dead)\s+(dumped|on\s+road|on\s+street|lying)
|dead\s+animal\s+(dumped|on\s+road|lying|not\s+removed)
|carcass\s+(on\s+road|on\s+street|not\s+removed|lying|dumped))\b
# ══════════════════════════════════════════════════════
# DRAINAGE / SEWAGE / SANITATION — easy: drain blocked /
# medium: sewage overflow / hard: disease from open
# drain, sewage entering homes, manhole accident
#
# FIX v2: Added "drain is (blocked|choked|clogged)",
# "drainage is (blocked|choked)", "is" connector
# tolerance throughout drain/sewage patterns
# ══════════════════════════════════════════════════════
| \b(drain\s+(is\s+)?(blocked|overflow|overflowing|choked|clogged|not\s+cleaned
|broken|damaged|open|uncovered|full|flooding|stagnant
|issue|problem|bad\s+smell|stench|foul\s+smell)
|blocked\s+drain|open\s+drain|choked\s+drain|clogged\s+drain
|drainage\s+(is\s+)?(blocked|overflow|issue|problem|bad|not\s+working
|not\s+cleaned|choked|clogged|system|line|pipe)
|drainage\s+not\s+(working|cleaned|cleared|maintained)
|drain\s+(has\s+)?(bad\s+smell|stench|foul\s+smell|overflowed|collapsed)
|sewage\s+(overflow|overflowing|leak|leaking|on\s+road|on\s+street
|in\s+house|flooding|not\s+cleaned|smell|stench|issue|problem
|entering|flowing\s+on|running\s+on|water|pipe|line)
|sewage\s+water\s+(on\s+road|in\s+colony|in\s+area|entering|flowing)
|sewer\s+(blocked|overflow|issue|problem|line|pipe|not\s+working)
|manhole\s+(open|uncovered|broken|missing|cover\s+missing|no\s+cover
|damaged|dangerous|accident|fell|fallen\s+into)
|open\s+manhole|missing\s+manhole|manhole\s+cover\s+missing
|fell\s+(into|in)\s+(manhole|open\s+drain|drain|sewer)
|fallen\s+(into|in)\s+(manhole|open\s+drain|drain|sewer)
|child\s+(fell|fallen)\s+(into|in)\s+(manhole|drain|open\s+drain)
|accident\s+(due\s+to|caused\s+by|because\s+of)\s+(manhole|open\s+drain|drain)
|public\s+toilet\s+(dirty|broken|not\s+working|blocked|overflow|no\s+water
|locked|missing|damaged|not\s+maintained|stench)
|toilet\s+(blocked|overflow|not\s+working|broken|dirty|no\s+water)
|no\s+public\s+toilet|public\s+toilet\s+not\s+there
|open\s+defecation|urinating\s+in\s+public|defecating\s+in\s+public
|sewage\s+pit\s+(open|uncovered|broken|overflowing)
|septic\s+tank\s+(overflow|full|broken|damaged|issue|problem)
|black\s+water\s+(on\s+road|flooding|overflow|running)
|foul\s+smell\s+(from|due\s+to)\s+(drain|sewage|sewer|toilet|manhole)
|stench\s+(from|due\s+to)\s+(drain|sewage|sewer|toilet|manhole)
|smell\s+(from|due\s+to|because\s+of)\s+(drain|sewage|sewer|gutter)
|disease\s+(from|due\s+to|caused\s+by|spread\s+by)\s+(sewage|drain|open\s+drain)
|illness\s+(from|due\s+to|caused\s+by)\s+(sewage|drain|open\s+drain)
|mosquito\s+(breeding|from|due\s+to)\s+(drain|sewage|stagnant)
|overflowing\s+(drain|sewage|sewer|manhole)
|sewage\s+entering\s+(house|home|building|ground\s+floor|colony|area)
|gutter\s+(blocked|overflow|stench|broken|open|issue))\b
# ══════════════════════════════════════════════════════
# STRAY ANIMALS — easy: stray dogs on road / medium:
# dogs biting people / hard: child bitten, accident
# due to cattle, people afraid to go out
# ══════════════════════════════════════════════════════
| \b(stray\s+(dog|dogs|cat|cats|animal|animals|cattle|cow|cows|bull|buffalo
|pig|pigs|horse|goat|goats)
|stray\s+dogs?\s+(on\s+road|in\s+area|in\s+colony|menace|problem|issue
|biting|attacking|chasing|aggressive|ferocious|pack)
|dogs?\s+(biting|bitten|attacked|attacking|chasing|bit|menace|nuisance
|aggressive|ferocious|dangerous|terrorizing|threat)
|cattle\s+(blocking|on\s+road|on\s+highway|menace|issue|problem
|dangerous|accident|causing\s+accident)
|cow\s+(on\s+road|blocking|dangerous|menace|attack|attacked)
|bull\s+(on\s+road|dangerous|menace|attack|attacked|charging)
|buffalo\s+(on\s+road|dangerous|menace|attack|attacked)
|animal\s+(attack|attacked|biting|menace|carcass|dead|body|on\s+road)
|bitten\s+by\s+(dog|stray|animal|cow|bull)|dog\s+bite|animal\s+bite
|dog\s+(attacked|bit|chased|chasing|biting)\s+(me|child|children|person|people|resident|woman|man)
|child\s+(bitten|attacked|chased)\s+by\s+(dog|stray|animal|cow|bull)
|children\s+(bitten|attacked|chased|scared|afraid)\s+(by|of)\s+(dog|stray|animal)
|people\s+(bitten|attacked|chased|scared|afraid)\s+(by|of)\s+(dog|stray|animal)
|residents\s+(bitten|attacked|chased|scared|afraid)\s+(by|of)\s+(dog|stray|animal)
|afraid\s+(to\s+(go\s+out|walk|come\s+out|step\s+out))\s+(due\s+to|because\s+of)\s+(dog|stray|animal)
|scared\s+(to\s+(go\s+out|walk|come\s+out|step\s+out))\s+(due\s+to|because\s+of)\s+(dog|stray|animal)
|cannot\s+(go\s+out|walk|step\s+out|come\s+out)\s+(due\s+to|because\s+of)\s+(dog|stray|animal)
|accident\s+(due\s+to|caused\s+by|because\s+of)\s+(cattle|cow|stray|animal|dog)
|vehicle\s+(hit|hits|collided|accident)\s+(cattle|cow|stray|animal|dog)
|dead\s+animal|animal\s+carcass|carcass\s+on\s+road
|dog\s+population\s+(increase|out\s+of\s+control|uncontrolled)
|pack\s+of\s+(stray\s+)?dogs)\b
# ══════════════════════════════════════════════════════
# POLLUTION — easy: smoke / medium: burning garbage
# smoke / hard: factory emissions causing disease,
# polluted water body, chemical smell
# ══════════════════════════════════════════════════════
| \b(pollution|polluted|contamination|contaminated
|smoke\s+(from|due\s+to|because\s+of|coming\s+from)?
|black\s+smoke|thick\s+smoke|foul\s+smell|toxic\s+smell|chemical\s+smell
|burning\s+(garbage|waste|plastic|tyres?|rubber|crop|biomass|trash|wood)
|garbage\s+(fire|burning)|waste\s+(fire|burning)
|air\s+(pollution|quality|bad|polluted|hazardous|toxic|smog|smoke)
|factory\s+(smoke|emission|discharge|waste|effluent|chemical|fumes|noise|pollution)
|industry\s+(smoke|emission|discharge|waste|effluent|chemical|fumes|pollution)
|industrial\s+(waste|pollution|discharge|effluent|emission|smoke)
|chimney\s+(smoke|emission|fumes|black\s+smoke)
|exhaust\s+(fumes?|smoke|emission|smell)
|vehicle\s+(pollution|smoke|emission|exhaust)
|dust\s+(pollution|cloud|storm|on\s+road|due\s+to|from\s+construction)
|construction\s+(dust|noise|pollution|debris)
|noise\s+(pollution|from\s+factory|from\s+vehicle|from\s+construction|disturbing|nuisance)
|chemical\s+(spill|discharge|waste|smell|fumes?|dumped|effluent)
|oil\s+spill|chemical\s+spill|toxic\s+(waste|discharge|spill|fumes?|smell)
|polluted\s+(river|lake|pond|water\s+body|canal|nala|stream|groundwater)
|river\s+(garbage|waste|polluted|dirty|sewage|discharge)
|lake\s+(garbage|waste|polluted|dirty|sewage|discharge)
|sewage\s+(discharge|released\s+into|dumped\s+into)\s+(river|lake|pond|water)
|stench\s+(from|due\s+to|because\s+of|coming\s+from)
|foul\s+(smell|odour|odor)\s+(from|due\s+to|because\s+of|in\s+area)
|smell\s+(unbearable|intolerable|very\s+bad|horrible|foul|terrible)
|breathing\s+(problem|issue|difficulty)\s+(due\s+to|from|caused\s+by)\s+(smoke|pollution|dust|fumes)
|respiratory\s+(problem|issue|disease)\s+(due\s+to|from|caused\s+by)\s+(smoke|pollution|dust)
|health\s+(issue|problem|hazard)\s+(due\s+to|from|caused\s+by)\s+(pollution|smoke|fumes|dust)
|eyes?\s+(burning|irritation|watering)\s+(due\s+to|from|because\s+of)\s+(smoke|pollution|fumes|dust))\b
# ══════════════════════════════════════════════════════
# PUBLIC TRANSPORT / INFRASTRUCTURE — easy: bus stop
# broken / medium: no bus service / hard: accident
# at bus stop, overcrowding, no shelter
# ══════════════════════════════════════════════════════
| \b(bus\s+(stop|stand|shelter|route|service|not\s+running|not\s+coming
broken|damaged|missing|no\s+shelter|encroached|dirty)
|bus\s+stop\s+(broken|damaged|missing|encroached|no\s+shelter|no\s+bench)
|auto\s+(stand|encroachment|blocking|menace|issue|problem)
|no\s+bus\s+(service|route|coming|running|available)
|bus\s+(not\s+running|not\s+coming|not\s+available|cancelled|delayed)
|public\s+(transport|vehicle|bus|transit)\s+(issue|problem|not\s+running|poor)
|transport\s+(problem|issue|no\s+service|poor\s+service)
|overcrowding\s+(in|at|on)\s+(bus|bus\s+stop|transport)
|road\s+accident\s+(at|near|due\s+to)|vehicle\s+accident|accident\s+on\s+road
|accident\s+scene|accident\s+spot\s+(not\s+cleared|blocked|dangerous)
|accident\s+blackspot|accident\s+prone\s+(area|spot|zone)
|traffic\s+(jam|congestion|signal\s+not\s+working|signal\s+broken
|light\s+not\s+working|police|issue|problem|blocked|chaos)
|traffic\s+signal\s+(broken|not\s+working|missing|damaged|off)
|signal\s+(not\s+working|broken|missing|damaged|off|malfunctioning)
|no\s+traffic\s+signal|traffic\s+light\s+(not\s+working|broken|missing)
|encroachment\s+(on\s+road|on\s+footpath|on\s+pavement|blocking)
|road\s+encroachment|footpath\s+encroachment|pavement\s+encroachment
|illegal\s+(parking|encroachment|construction|shop|vendor)\s+(blocking|on\s+road|on\s+footpath)
|parking\s+(problem|issue|blocking|road|footpath|encroachment)
|no\s+parking\s+space|vehicles\s+parked\s+(on\s+road|blocking|footpath))\b
# ══════════════════════════════════════════════════════
# BUILDING / STRUCTURAL — easy: wall collapsed / medium:
# building unsafe / hard: structural collapse risk,
# encroachment on public land, illegal construction
# ══════════════════════════════════════════════════════
| \b(building\s+(collapse|collapsed|dangerous|unsafe|crumbling|dilapidated
|crack|cracked|illegal|encroachment|debris|rubble)
|wall\s+(collapse|collapsed|fallen|crumbling|cracked|broken|dangerous
|damaged|leaning|about\s+to\s+fall)
|structure\s+(collapse|collapsed|unsafe|dangerous|crumbling|illegal)
|illegal\s+(construction|building|structure|encroachment)
|encroachment\s+(on|of)\s+(public|government|road|footpath|park|land)
|construction\s+(debris|rubble|material|blocking\s+road|on\s+road|hazard)
|demolition\s+(debris|rubble|blocking|on\s+road|not\s+cleared)
|asbestos\s+(roof|sheet|material)\s+(broken|damaged|dangerous|hazard)
|collapsed\s+(wall|building|structure|roof|ceiling)
|roof\s+(collapse|fallen|dangerous|leaking|cracked)
|ceiling\s+(collapse|fallen|dangerous|cracked|leaking)
|slab\s+(crack|broken|fallen|dangerous)
|pillar\s+(crack|broken|leaning|dangerous))\b
# ══════════════════════════════════════════════════════
# GENERAL HARM / INJURY — catches difficult phrasings
# where the civic issue is implied through harm:
# "injured", "accident", "children cannot go to school"
# ══════════════════════════════════════════════════════
| \b(injured|injury|injuries|got\s+injured|people\s+injured|someone\s+injured
|serious\s+injury|minor\s+injury|accident|accidents
|fell\s+down|slipped\s+and\s+fell|tripped\s+and\s+fell
|fell\s+(due\s+to|because\s+of|into|in)
|children\s+(cannot|can.t|unable\s+to)\s+(go\s+to\s+school|walk|play\s+outside)
|residents\s+(unable|cannot|can.t)\s+(sleep|walk|go\s+out|step\s+out)
|people\s+(unable|cannot|can.t)\s+(walk|cross|use\s+road|go\s+out)
|hazard(ous)?|danger(ous)?|risk(y)?|unsafe|life\s+(risk|threatening|danger)
|life\s+(at\s+)?risk|risk\s+to\s+life|threat\s+to\s+life
|major\s+(accident|risk|hazard|danger|issue|problem)
|emergency\s+(situation|condition|issue)|civic\s+(issue|problem|complaint|grievance))\b
# ══════════════════════════════════════════════════════
# INDIC SCRIPTS — Telugu & Hindi civic keywords
# ══════════════════════════════════════════════════════
| (\u0c38\u0c2e\u0c38\u0c4d\u0c2f|\u0c2b\u0c3f\u0c30\u0c4d\u0c2f\u0c3e\u0c26\u0c41
|\u0c30\u0c4b\u0c21\u0c4d\u0c21\u0c41\s+\u0c2a\u0c3e\u0c21\u0c48\u0c02\u0c26\u0c3f
|\u0c28\u0c40\u0c33\u0c4d\u0c33\u0c41\s+\u0c30\u0c3e\u0c35\u0c21\u0c02\s+\u0c32\u0c47\u0c26\u0c41
|\u0c1a\u0c46\u0c24\u0c4d\u0c24|\u0c2e\u0c41\u0c30\u0c41\u0c17\u0c41|\u0c35\u0c3f\u0c26\u0c4d\u0c2f\u0c41\u0c24\u0c4d
|\u0c15\u0c30\u0c46\u0c02\u0c1f\u0c4d|\u0c28\u0c40\u0c33\u0c4d\u0c33\u0c41|\u0c26\u0c41\u0c2e\u0c4d\u0c2e\u0c41
|\u0c2e\u0c41\u0c30\u0c41\u0c17\u0c41\u0c28\u0c40\u0c33\u0c41|\u0c38\u0c4d\u0c1f\u0c4d\u0c30\u0c40\u0c1f\u0c4d\s+\u0c32\u0c48\u0c1f\u0c4d)
| (\u0938\u092e\u0938\u094d\u092f\u093e|\u0936\u093f\u0915\u093e\u092f\u0924
|\u0938\u095c\u0915\s+\u0916\u0930\u093e\u092c|\u092a\u093e\u0928\u0940\s+\u0928\u0939\u0940\u0902
|\u0915\u091a\u0930\u093e|\u0928\u093e\u0932\u0940|\u092c\u093f\u091c\u0932\u0940
|\u0915\u0930\u0947\u0902\u091f|\u092a\u094d\u0930\u0926\u0942\u0937\u0923
|\u092c\u093f\u091c\u0932\u0940\s+\u0928\u0939\u0940\u0902|\u0938\u0921\u093c\u0915\s+\u0916\u0930\u093e\u092c)
# ══════════════════════════════════════════════════════
# STANDALONE HIGH-SIGNAL NOUNS (easy level)
# ══════════════════════════════════════════════════════
| \b(pothole|streetlight|sewage|waterlogging|footpath|manhole|drainage
|encroachment|carcass|borewell|transformer|substation|tanker)\b
# ══════════════════════════════════════════════════════
# ROAD QUALITY DESCRIPTORS (medium level)
# ══════════════════════════════════════════════════════
| \b(road|street|highway|lane)\s+(is\s+)?(very\s+)?(bad|damaged|broken|cracked
|horrible|terrible|pathetic|worst|hazardous|dangerous|unsafe|rough|uneven
|full\s+of\s+potholes|in\s+bad\s+condition|in\s+poor\s+condition
|not\s+repaired|not\s+fixed|needs\s+repair|needs\s+fixing)\b
""",
re.VERBOSE | re.IGNORECASE,
)
# ── Signal 2: Complaint intent ────────────────────────────────────────────────
_COMPLAINT_INTENT = re.compile(
r"""
# ── Not working / not resolved ───────────────────────
\b(not\s+(working|repaired|fixed|resolved|cleared|collected|done
|responded|addressed|functioning|restored|completed|processed
|cleaned|removed|attended|responding|maintained|inspected
|replaced|upgraded|available|supplied|coming|received
|safe|fit|potable|drinkable|usable))
| \b(no\s+(action|response|resolution|update|repair|water|electricity
|signal|network|power|supply|gas|light|service|maintenance))
| \b(still\s+(not|pending|waiting|broken|blocked|overflowing|unresolved
|same|continuing|happening|there|going\s+on))
| \b(yet\s+to\s+be|never\s+(fixed|repaired|addressed|cleared|resolved
|cleaned|collected|attended|responded))
# ── Requests / calls to action (easy intent) ─────────
| \b(please\s+(fix|repair|clean|clear|remove|take|help|address|look|attend
|send|deploy|do|act|check|inspect|replace|resolve|respond
|come|visit|arrange|ensure|provide|install|restore|maintain)
|kindly\s+(fix|repair|clean|remove|address|look|attend|send|do|act
|check|inspect|resolve|respond|come|visit|arrange|ensure)
|request\s+(you\s+to|for\s+(immediate|urgent|early|early\s+action))
|need\s+(immediate|urgent|your|early)\s+(action|help|attention|response|repair)
|take\s+(action|immediate|urgent|necessary|early|swift|quick)(\s+action)?
|must\s+(fix|repair|address|resolve|clear|clean|replace|restore|remove)
|should\s+(fix|repair|address|resolve|clear|clean|replace|restore|remove)
|need\s+to\s+be\s+(fixed|repaired|addressed|resolved|cleared|cleaned
|replaced|restored|removed|attended|inspected)
|do\s+something|do\s+the\s+needful|take\s+notice|look\s+into\s+(this|the\s+matter)
|bring\s+to\s+your\s+(notice|attention)|draw\s+your\s+attention
|requesting\s+(you|the\s+authorities|officials)\s+to
|hope\s+(you\s+will|authorities\s+will|officials\s+will)\s+(act|fix|resolve|address)
|i\s+request|we\s+request|citizens\s+request|residents\s+request)\b
# ── Duration / persistence (medium intent) ───────────
| \b(since\s+(yesterday|last\s+\w+|\d+\s+(days?|weeks?|months?|years?|hours?)
|morning|night|long\s+time|ages|many\s+(days?|weeks?|months?)
|a\s+(long|very\s+long)\s+time|weeks?|months?|days?|years?|long|ages)
|for\s+(the\s+past\s+)?(\d+\s+)?(days?|weeks?|months?|hours?|years?
|long\s+time|ages|a\s+long\s+time|many\s+(days?|weeks?|months?))
|for\s+(weeks?|months?|days?|years?|long|ages|a\s+very\s+long\s+time)
|days?\s+ago|weeks?\s+ago|months?\s+ago|years?\s+ago
|\d+\s+(days?|weeks?|months?)\s+(now|already|back|since|passed|gone|over)
|since\s+long|since\s+ages|from\s+past\s+\d+|for\s+long|for\s+ages
|repeatedly|again\s+and\s+again|multiple\s+times|several\s+times|many\s+times
|keeps?\s+(happening|recurring|coming\s+back|repeating)
|ongoing\s+(issue|problem)|persistent\s+(issue|problem|complaint)
|chronic\s+(issue|problem)|long.?standing\s+(issue|problem)
|months?\s+have\s+(passed|gone)|years?\s+have\s+(passed|gone)
|no\s+(improvement|change|action|repair)\s+(for|since|in)\s+\d+
|not\s+repaired\s+(for|since|in)\s+(the\s+past\s+)?\d+)\b
# ── Harm / impact / consequence (hard intent) ────────
| \b(affecting|causing\s+(problem|issue|disease|accident|harm|damage|illness
|inconvenience|difficulty|hardship|suffering|injury)
|hazard(ous)?|danger(ous)?|risk(y)?|unsafe
|accident(s)?|injur(y|ies|ed|ing)|hurt
|attack(ed|ing)|attacked|bitten|bit\s+(by|a\s+dog)
|people\s+are\s+(suffering|facing|affected|unable|scared|getting\s+hurt
|being\s+bitten|being\s+attacked|in\s+danger|at\s+risk
|falling\s+ill|getting\s+sick|getting\s+infected)
|residents\s+are\s+(suffering|facing|affected|struggling|scared
|getting\s+hurt|being\s+bitten|falling\s+ill|getting\s+sick
|unable\s+to|cannot)
|children\s+are\s+(being\s+attacked|being\s+bitten|scared|afraid
|falling\s+ill|getting\s+sick|unable\s+to\s+go)
|public\s+(facing|suffering|inconvenienced|at\s+risk|in\s+danger)
|inconvenien(ce|cing)|discomfort|hardship|problem\s+for
|causing\s+inconvenience|creating\s+(problem|issue|hazard|risk)
|life\s+(at\s+risk|threatening|in\s+danger)|risk\s+to\s+life
|health\s+(risk|hazard|danger|issue|problem)
|fell\s+ill|fallen\s+ill|got\s+sick|getting\s+sick|falling\s+ill
|vomiting|diarrhea|stomach\s+(pain|ache|issue)|fever\s+(due\s+to|from|after)
|infection\s+(due\s+to|from|caused\s+by|spread\s+by)
|disease\s+(spreading|spread|due\s+to|from|caused\s+by)
|outbreak\s+(of|due\s+to)|epidemic\s+(due\s+to|from)
|vehicle\s+(damaged|got\s+damaged|broke\s+down|tyres?)\s+(due\s+to|because\s+of)
|tyre\s+(burst|puncture|flat)\s+(due\s+to|because\s+of)
|two.?wheeler\s+(fell|slipped|skidded|accident)\s+(due\s+to|because\s+of))\b
| \b(biting|attacking|chasing|menace|nuisance|threat(ening)?|aggressive|ferocious)\b
# ── Formal complaint / escalation ────────────────────
| \b(complain(t|ing|ed)?|grievance|report(ing|ed)?|escalat(e|ing|ed)
|filing|lodge[d]?|registered|raising\s+(a\s+)?(complaint|issue|grievance)
|bringing\s+to\s+(notice|attention)|lodging\s+(a\s+)?(complaint|grievance)
|submitting\s+(a\s+)?(complaint|grievance)
|reporting\s+(this|the\s+issue|the\s+problem|the\s+matter)
|writing\s+(to\s+)?(complain|regarding|about|to\s+report)
|informing\s+(you|the\s+authorities|officials)\s+(about|of|regarding)
|notifying\s+(you|the\s+authorities)\s+(about|of|regarding))\b
# ── Urgency / priority ────────────────────────────────
| \b(urgent(ly)?|immediately|emergency|as\s+soon\s+as\s+possible|asap
|high\s+priority|critical|serious(ly)?|without\s+delay|at\s+the\s+earliest
|earliest\s+possible|top\s+priority|very\s+important|most\s+important
|cannot\s+wait|cannot\s+be\s+delayed|needs\s+immediate\s+attention)\b
# ── Concession / contrast ─────────────────────────────
| \b(despite|although|even\s+(though|after)|in\s+spite\s+of|inspite|notwithstanding
|however|but\s+(no\s+(action|response)|still|nothing\s+done)
|after\s+(many|multiple|several)\s+(complaints?|requests?|visits?)
|after\s+complaining\s+(many\s+times|multiple\s+times|repeatedly)
|despite\s+(complaining|reporting|raising|requesting)
|no\s+(one|official|authority)\s+(came|visited|responded|acted|checked))\b
# ── Rhetorical / questioning (hard intent) ────────────
| \b(why\s+(is|are|has|have|no|not|isn.t|aren.t|hasn.t|haven.t|was|were)
|when\s+will|how\s+long\s+(will|has|have|does|is|are|do)
|how\s+many\s+(days?|weeks?|months?|times?)\s+(will|has|have|do|does|more)
|till\s+when|until\s+when|for\s+how\s+long
|who\s+is\s+responsible|who\s+will\s+(fix|repair|address|resolve|act|respond)
|what\s+action\s+(has|have|was|were|will)\s+(been\s+)?(taken|done)
|is\s+(anyone|anybody|no\s+one|nobody)\s+(responsible|listening|taking\s+action)
|are\s+(you|they|authorities)\s+(aware|listening|going\s+to\s+fix))\b
# ── Terminal question mark (implied complaint) ────────
| (\?\s*$)
# ── Indic complaint intent — Telugu ──────────────────
| (\u0c1c\u0c35\u0c3e\u0c2c\u0c41\s*\u0c32\u0c47\u0c26\u0c41|\u0c1a\u0c30\u0c4d\u0c2f\s*\u0c32\u0c47\u0c26\u0c41
|\u0c06\u0c32\u0c38\u0c4d\u0c2f\u0c02|\u0c26\u0c2f\u0c1a\u0c47\u0c38\u0c3f|\u0c38\u0c30\u0c3f\u0c1a\u0c47\u0c2f\u0c02\u0c21\u0c3f
|\u0c05\u0c2d\u0c4d\u0c2f\u0c30\u0c4d\u0c25\u0c28|\u0c1a\u0c42\u0c38\u0c41\u0c15\u0c4b\u0c02\u0c21\u0c3f
|\u0c35\u0c46\u0c02\u0c1f\u0c28\u0c47|\u0c24\u0c15\u0c4d\u0c37\u0c23\u0c02|\u0c38\u0c2e\u0c38\u0c4d\u0c2f\u0c32\u0c41)
# ── Indic complaint intent — Hindi ───────────────────
| (\u091c\u0935\u093e\u092c\s*\u0928\u0939\u0940\u0902|\u0915\u093e\u0930\u094d\u0930\u0935\u093e\u0908\s*\u0928\u0939\u0940\u0902
|\u0926\u0947\u0930\u0940|\u0915\u0943\u092a\u092f\u093e|\u0924\u0941\u0930\u0902\u0924|\u0920\u0940\u0915\s*\u0915\u0930\u0947\u0902
|\u0928\u093f\u0935\u0947\u0926\u0928|\u0927\u094d\u092f\u093e\u0928\s*\u0926\u0947\u0902|\u0905\u0928\u0941\u0930\u094b\u0927
|\u0936\u093f\u0915\u093e\u092f\u0924|\u0938\u092e\u0938\u094d\u092f\u093e\s*\u0939\u0948)
""",
re.VERBOSE | re.IGNORECASE,
)
# ── Special case: animal harm without explicit civic noun ─────────────────────
_ANIMAL_HARM_PATTERN = re.compile(
r"""
\b(dogs?|cats?|cattle|cow|bull|buffalo|animal|stray)\b
.{0,80}
\b(biting|bitten|attacked|attacking|chasing|bit|injured|hurt|menace|nuisance
|aggressive|ferocious|dangerous|terrorizing|chased)\b
.{0,80}
\b(people|residents|children|child|person|public|woman|man|commuters?
|pedestrians?|passers?-?by|student|students|elderly|senior)\b
|
\b(people|residents|children|child|person|public|student|students)\b
.{0,80}
\b(bitten|attacked|chased|injured|hurt|scared|harassed|afraid|terrorized)\b
.{0,80}
\b(dogs?|cats?|cattle|cow|bull|buffalo|animal|stray)\b
|
\b(dog\s+bite|animal\s+bite|bitten\s+by\s+(a\s+)?(dog|stray|animal|cow|bull)
|attacked\s+by\s+(a\s+)?(dog|stray|animal|cow|bull)
|chased\s+by\s+(a\s+)?(dog|stray|animal|cow|bull))\b
""",
re.VERBOSE | re.IGNORECASE,
)
# ── Conversational / non-grievance rejection ──────────────────────────────────
_NON_GRIEVANCE_PATTERNS = re.compile(
r"""
^[\s]*(
good\s*(morning|afternoon|evening|night|day|nite)
| (hello+|hi+|hey+|howdy|greetings|namaste|namaskar|vanakkam|
salam|salaam|kem\s+cho|hii+|heyy+|helloo+)
| how\s+are\s+you(\s+doing)?|how\s+r\s+u|how\s+do\s+you\s+do
| how\s+is\s+it\s+going|how.?s\s+(it|everything|life|things)
| what.?s\s+(up|going\s+on|new|happening|cooking|the\s+matter)
| what\s+are\s+you\s+doing|are\s+you\s+there|you\s+there\??
| thank\s+you(\s+so\s+much)?|thanks(\s+a\s+(lot|ton|bunch))?|thank\s+u|thx|ty
| ok(ay)?\.?|sure\.?|fine\.?|yep\.?|nope\.?|yes\.?|no\.?
| alright\.?|alrite\.?|hmm+\.?|huh\.?|oh\.?|ah\.?|uh\.?
| bye+\.?|goodbye\.?|good\s+bye\.?|see\s+you(\s+later)?
| take\s+care\.?|talk\s+(to\s+you\s+)?later\.?|ttyl|brb|gtg
| test(ing)?[\s\d!.]*|[a-zA-Z]{1,2}
)[\s!?.]*$
""",
re.VERBOSE | re.IGNORECASE,
)
_MIN_GRIEVANCE_CHECK_LEN = 8
_VALIDATION_MESSAGES = {
"too_short": "Text is too short. Please provide at least 5 characters.",
"junk_input": "Input contains only numbers or special characters.",
"not_a_grievance": (
"Your message does not appear to be a grievance or civic complaint. "
"Please describe the issue you are facing — for example: pothole on "
"the road, water supply disruption, electricity outage, garbage not "
"collected, stray dogs biting residents, or any other civic problem."
),
}
def _is_grievance(text: str) -> bool:
"""
Returns True if the text is a valid civic grievance.
Handles three difficulty levels:
EASY — direct nouns: "pothole", "no water", "garbage on road"
MEDIUM — descriptive: "road is very bad", "light not working since 3 days"
HARD — indirect/narrative: "fell ill after drinking tap water",
"tyre burst due to road condition", "children afraid to walk
to school because of stray dogs"
Stage 1 — Reject ONLY when entire input is a pure greeting/filler.
Stage 2a — Animal harm self-contained check (independent).
Stage 2b — Civic topic presence ALONE is sufficient.
"""
stripped = text.strip()
# Stage 1: full-string greeting/filler rejection
if _NON_GRIEVANCE_PATTERNS.match(stripped):
return False
if len(stripped) < _MIN_GRIEVANCE_CHECK_LEN:
return False
# Stage 2a: animal harm self-contained
if _ANIMAL_HARM_PATTERN.search(stripped):
return True
# Stage 2b: civic topic alone is sufficient
if _CIVIC_TOPIC.search(stripped):
return True
return False
def validate_text(text) -> tuple:
"""
Validate user-typed grievance text.
Returns (is_valid: bool, error_code: str | None).
"""
if not isinstance(text, str):
return False, "too_short"
stripped = text.strip()
if len(stripped) < 5:
return False, "too_short"
if _RE_JUNK.fullmatch(stripped.lower()):
return False, "junk_input"
if len(stripped) >= _MIN_GRIEVANCE_CHECK_LEN and not _is_grievance(stripped):
return False, "not_a_grievance"
return True, None
def _validate_machine_text(text: str, source: str) -> tuple:
"""
Lightweight validation for machine-generated text.
Skips grievance intent — only checks length and junk.
"""
if not text or len(text.strip()) < 5:
code = "image_unreadable" if source == "image" else "audio_unreadable"
msg = (
"Could not extract meaningful content from the image. "
"Please upload a clearer photo of the civic issue."
if source == "image" else
"Could not transcribe audio. Please try again with a clearer recording."
)
return False, {"status": "failed", "code": code, "message": msg}
if _RE_JUNK.fullmatch(text.strip().lower()):
return False, {"status": "failed", "code": "junk_input",
"message": _VALIDATION_MESSAGES["junk_input"]}
return True, None
# =========================================================
# ADVANCED CIVIC IMAGE RELEVANCE SCORER
# =========================================================
_CIVIC_LEXICON = {
"roads": {
"primary": [
"pothole", "crater", "road damage", "road broken", "road crack",
"broken road", "damaged road", "road cave", "road collapse",
"unpaved road", "road debris", "road blocked", "road obstruction",
"road construction", "barricade", "road divider", "speed breaker",
"road marking faded", "road sign missing", "footpath broken",
"pavement crack", "sidewalk damage", "footpath blocked",
"road waterlogging", "road pothole", "tarmac damage",
"road pit", "open trench", "road excavation",
],
"secondary": [
"road", "street", "highway", "lane", "path", "pavement",
"footpath", "sidewalk", "tar", "asphalt", "concrete slab",
"gravel", "mud road", "dirt road", "traffic", "pedestrian",
"junction", "intersection", "bridge", "culvert", "divider",
"median", "overpass", "underpass", "flyover",
],
},
"water": {
"primary": [
"water leak", "pipe burst", "pipe leakage", "water overflow",
"broken pipe", "waterlogged road", "waterlogged street",
"flooded road", "flooded street", "open drain overflowing",
"drain overflow", "sewage overflow", "no water supply",
"water supply disrupted", "water stagnation", "water contamination",
"dirty water supply", "muddy water pipe", "water tank overflow",
"exposed water pipe", "water seeping road",
],
"secondary": [
"water", "pipe", "drain", "drainage", "sewage", "sewer",
"tank", "pump", "valve", "tap", "leak", "flood", "puddle",
"waterlogging", "stagnant", "overflow", "canal", "borewell",
"water main", "supply line", "gutter", "nala",
],
},
"electricity": {
"primary": [
"broken streetlight", "streetlight not working", "dark street at night",
"fallen electric pole", "fallen wire on road", "dangling wire",
"exposed electric wire", "naked wire", "live wire on ground",
"electric sparks", "transformer damage", "transformer fire",
"electric box open", "power line down", "electric pole broken",
"meter box damaged", "electric pole leaning", "snapped cable",
"wire hanging low", "substation damage",
],
"secondary": [
"electric", "electricity", "wire", "cable", "pole", "transformer",
"streetlight", "lamp post", "street lamp", "power", "voltage",
"meter", "fuse box", "tower", "pylon", "grid", "line",
"conductor", "insulator", "junction box",
],
},
"garbage": {
"primary": [
"garbage pile", "waste pile", "trash pile", "litter pile",
"garbage dump", "illegal dump site", "open garbage",
"overflowing garbage bin", "uncollected garbage", "garbage on road",
"waste on street", "burning garbage", "garbage fire",
"plastic waste heap", "construction debris dumped illegally",
"garbage bin broken", "garbage bin missing", "rotting waste",
"animal carcass dumped", "household waste on road",
],
"secondary": [
"garbage", "waste", "trash", "litter", "debris", "rubbish",
"bin", "dustbin", "dumpster", "dump", "refuse", "plastic",
"polythene", "bag", "bottle", "can", "heap", "pile",
"filth", "dirty", "unclean", "stench", "decompose",
],
},
"sanitation": {
"primary": [
"broken public toilet", "public toilet dirty", "toilet blocked",
"toilet overflow", "urinating in public", "open defecation spot",
"sewage pit open", "manhole open", "manhole cover missing",
"open manhole on road", "uncovered drain", "drain blocked",
"drain choked", "drain overflow", "mosquito breeding site",
"stagnant sewage pool", "open sewer", "foul drain",
],
"secondary": [
"toilet", "latrine", "bathroom", "manhole", "sewer", "sewage",
"drain", "smell", "stench", "odour", "hygiene", "sanitation",
"cleanliness", "mosquito", "rat", "pest", "flies", "filth",
],
},
"pollution": {
"primary": [
"black smoke from chimney", "factory smoke emission",
"smoke from burning garbage", "burning tyres smoke",
"burning plastic smoke", "air pollution haze",
"dust pollution on road", "chemical spill on road",
"oil spill on road", "toxic waste dumped in water",
"polluted river", "polluted lake", "river garbage",
],
"secondary": [
"smoke", "smog", "dust", "pollution", "emission", "toxic",
"chemical", "factory", "industry", "exhaust", "fume",
"haze", "contamination", "spill", "river", "lake",
"pond", "burning", "ash", "soot",
],
},
"public transport": {
"primary": [
"bus stop broken", "bus shelter damaged", "bus stop sign missing",
"auto stand encroachment", "bus stand blocked",
"broken down bus on road", "public vehicle accident",
"road accident scene", "vehicle overturned on road",
"bus stop bench broken",
],
"secondary": [
"bus", "auto", "rickshaw", "taxi", "stop", "shelter",
"stand", "route", "transport", "vehicle", "commute",
"passenger", "queue",
],
},
"stray animals": {
"primary": [
"stray dogs on road", "pack of stray dogs",
"cattle blocking road", "stray cattle on highway",
"animal carcass on road", "dead animal on street",
"dog bite victim", "injured stray animal on road",
],
"secondary": [
"stray", "dog", "cow", "cattle", "animal", "carcass",
"dead animal", "bite", "attack", "menace", "herd",
"buffalo", "goat", "pig",
],
},
"building": {
"primary": [
"building collapse", "wall collapse", "collapsed structure",
"crumbling wall on road", "dangerous building", "unsafe structure",
"major building crack", "illegal construction blocking road",
"encroachment on footpath", "dilapidated building",
"building rubble on road", "demolition debris on road",
],
"secondary": [
"building", "wall", "structure", "crack", "collapse",
"construction", "demolition", "rubble", "bricks",
"pillar", "beam", "slab", "roof", "cement", "scaffolding",
],
},
}
CIVIC_RELEVANCE_THRESHOLD = 2
_NON_CIVIC_OVERRIDE = re.compile(
r"""
\b(selfie|portrait\s+photo|person\s+posing|man\s+smiling|woman\s+smiling
|people\s+laughing|group\s+photo|family\s+photo|couple\s+photo
|person\s+standing\s+in\s+front)\b
| \b(food|meal|dish|plate\s+of|cooking|restaurant|cafe|eating|drinking
|biryani|curry|pizza|burger|snack\s+food|fruit\s+bowl
|vegetable\s+market\s+stall)\b
| \b(flower\s+garden|blooming\s+flower|garden\s+path|scenic\s+nature
|beautiful\s+sunset|sunrise\s+sky|rainbow\s+sky|mountain\s+view
|beach\s+scenery|forest\s+trail|green\s+landscape|paddy\s+field
|agricultural\s+field)\b
| \b(baby\s+playing|infant|child\s+playing\s+in\s+park|wedding\s+ceremony
|birthday\s+party|celebration\s+event|festival\s+decoration
|religious\s+ceremony)\b
| \b(cat\s+sitting|dog\s+playing\s+in\s+yard|pet\s+dog|pet\s+cat
|bird\s+perched|butterfly\s+on\s+flower|insect\s+on\s+flower)\b
| \b(screenshot\s+of|meme|advertisement\s+poster|promotional\s+banner
|movie\s+poster|product\s+photo)\b
| \b(bedroom|living\s+room\s+interior|kitchen\s+interior|office\s+desk
|laptop\s+on\s+desk|indoor\s+plant)\b
""",
re.VERBOSE | re.IGNORECASE,
)
_CATEGORY_TO_LEXICON = {
"electricity": "electricity",
"garbage": "garbage",
"pollution": "pollution",
"public transport": "public transport",
"roads": "roads",
"sanitation": "sanitation",
"stray animals": "stray animals",
"water": "water",
"other": None,
}
_OVERLAPPING_CATEGORIES = [
{"garbage", "sanitation"},
{"garbage", "pollution"},
{"sanitation", "water"},
{"water", "roads"},
{"electricity", "roads"},
{"pollution", "sanitation"},
{"stray animals", "roads"},
{"stray animals", "sanitation"},
]
def _categories_overlap(cat_a: str, cat_b: str) -> bool:
return {cat_a, cat_b} in _OVERLAPPING_CATEGORIES
def score_civic_relevance(caption: str) -> dict:
caption_lower = caption.lower()
override_match = _NON_CIVIC_OVERRIDE.search(caption_lower)
if override_match:
return {
"is_relevant": False,
"score": 0,
"matched_category": None,
"matched_terms": [],
"override_reason": f"Non-civic content detected: '{override_match.group()}'",
}
category_scores: dict = {}
all_matched: list = []
for cat, terms in _CIVIC_LEXICON.items():
score = 0
for term in terms["primary"]:
if term in caption_lower:
score += 2; all_matched.append(term)
for term in terms["secondary"]:
if term in caption_lower:
score += 1; all_matched.append(term)
if score > 0:
category_scores[cat] = score
total_score = sum(category_scores.values())
top_category = max(category_scores, key=category_scores.get) if category_scores else None
is_relevant = total_score >= CIVIC_RELEVANCE_THRESHOLD
logger.info("[civic-score] caption='%s...' score=%d top_cat='%s' relevant=%s",
caption[:80], total_score, top_category, is_relevant)
return {
"is_relevant": is_relevant,
"score": total_score,
"matched_category": top_category,
"matched_terms": list(set(all_matched)),
"override_reason": None,
}
def check_evidence_relevance(caption: str, grievance_category: str | None = None) -> dict:
result = score_civic_relevance(caption)
if not result["is_relevant"]:
reason = result["override_reason"] or (
f"Image does not appear to show a civic issue "
f"(caption: '{caption[:60]}', score: {result['score']})."
)
return {"evidence_relevant": False, "evidence_note": reason,
"civic_score": result["score"]}
if grievance_category:
lexicon_key = _CATEGORY_TO_LEXICON.get(grievance_category.lower())
if lexicon_key and result["matched_category"]:
img_cat = result["matched_category"]
bert_cat = lexicon_key
if img_cat != bert_cat:
if _categories_overlap(img_cat, bert_cat):
logger.info("[civic-score] Overlapping categories img='%s' bert='%s' — valid",
img_cat, bert_cat)
else:
note = (
f"Image appears to show a '{img_cat}' issue but the grievance "
f"is classified as '{grievance_category}'. "
f"The image may not directly support this complaint — "
f"consider retaking a more relevant photo."
)
return {"evidence_relevant": False, "evidence_note": note,
"civic_score": result["score"]}
img_cat_label = result["matched_category"] or "general civic"
return {
"evidence_relevant": True,
"evidence_note": (
f"Image contains civic content related to '{img_cat_label}' "
f"(visual relevance score: {result['score']}). "
f"Note: GIT scores the image visually; BERT classifies the complaint text — "
f"they may show different but related categories."
),
"civic_score": result["score"],
}
# =========================================================
# WARD BOUNDING BOXES — Kakinada Municipal Corporation
# =========================================================
WARD_BOUNDS = {
"suryaraopeta": (16.980, 17.010, 82.230, 82.260),
"jagannaickpur": (16.970, 17.000, 82.240, 82.270),
"raja rao peta": (16.975, 17.005, 82.245, 82.275),
"bhanugudi": (16.960, 16.990, 82.250, 82.280),
"old town": (16.990, 17.020, 82.220, 82.250),
"rajah street": (16.985, 17.015, 82.225, 82.255),
"main road": (16.980, 17.010, 82.235, 82.265),
"gandhi nagar": (16.975, 17.005, 82.240, 82.270),
"ashok nagar": (16.970, 17.000, 82.245, 82.275),
"nethaji nagar": (16.965, 16.995, 82.240, 82.270),
"srinivasa nagar": (16.960, 16.990, 82.245, 82.275),
"tngo colony": (16.955, 16.985, 82.250, 82.280),
"shankar vilas": (16.975, 17.005, 82.235, 82.265),
"collector's colony": (16.980, 17.010, 82.240, 82.270),
"new town": (16.990, 17.020, 82.235, 82.265),
"bank colony": (16.985, 17.015, 82.230, 82.260),
"drivers colony": (16.970, 17.000, 82.250, 82.280),
"fci colony": (16.965, 16.995, 82.255, 82.285),
"burma colony": (16.960, 16.990, 82.255, 82.285),
"dwaraka nagar": (16.975, 17.005, 82.245, 82.275),
"ayodhya nagar": (16.970, 17.000, 82.240, 82.270),
"kakinada port area": (16.940, 16.970, 82.260, 82.300),
"kakinada industrial area": (16.930, 16.960, 82.255, 82.295),
"fishing harbour": (16.935, 16.965, 82.265, 82.305),
"dairy farm": (16.950, 16.980, 82.250, 82.280),
"auto nagar": (16.945, 16.975, 82.255, 82.285),
"kaleswara rao nagar": (16.980, 17.010, 82.245, 82.275),
"ramanayyapeta": (16.975, 17.005, 82.250, 82.280),
"rama rao peta": (16.970, 17.000, 82.245, 82.275),
"kondayya palem": (16.965, 16.995, 82.245, 82.275),
"ganganapalle": (16.960, 16.990, 82.240, 82.270),
"gudari gunta": (16.955, 16.985, 82.245, 82.275),
"indrapalem": (16.950, 16.980, 82.245, 82.275),
"sarpavaram": (16.945, 16.975, 82.245, 82.275),
"uppada": (16.960, 16.990, 82.290, 82.330),
"kaikavolu": (17.020, 17.060, 82.250, 82.290),
"kothuru": (17.010, 17.050, 82.255, 82.295),
"thammavaram": (17.000, 17.040, 82.255, 82.295),
"thimmapuram": (16.995, 17.035, 82.250, 82.290),
"vivekananda street": (16.985, 17.015, 82.240, 82.270),
"jr ntr road": (16.980, 17.010, 82.235, 82.265),
"jntu kakinada area": (16.950, 16.980, 82.260, 82.300),
"govt general hospital area": (16.975, 17.005, 82.235, 82.265),
"apsp camp": (16.960, 16.990, 82.260, 82.300),
"kakinada beach road": (16.950, 16.980, 82.270, 82.310),
"kakinada bazar": (16.985, 17.015, 82.230, 82.260),
"anjaneya nagar": (16.970, 17.000, 82.255, 82.285),
"kothapalli": (17.070, 17.110, 82.295, 82.340),
"surampalem": (17.075, 17.105, 82.050, 82.085),
}
WARD_TOLERANCE_DEG = 0.015
# =========================================================
# GEO HELPERS
# =========================================================
def _dms_to_decimal(dms, ref: str) -> float:
degrees = dms[0][0] / dms[0][1]
minutes = dms[1][0] / dms[1][1]
seconds = dms[2][0] / dms[2][1]
decimal = degrees + minutes / 60 + seconds / 3600
if ref in ("S", "W"):
decimal = -decimal
return decimal
def extract_gps_from_image(image_bytes: bytes) -> tuple | None:
try:
img = Image.open(io.BytesIO(image_bytes))
exif_bytes = img.info.get("exif")
logger.info("EXIF present: %s", exif_bytes is not None)
if not exif_bytes:
return None
exif_data = piexif.load(exif_bytes)
gps_data = exif_data.get("GPS", {})
if not gps_data:
return None
lat_dms = gps_data.get(piexif.GPSIFD.GPSLatitude)
lat_ref = gps_data.get(piexif.GPSIFD.GPSLatitudeRef)
lon_dms = gps_data.get(piexif.GPSIFD.GPSLongitude)
lon_ref = gps_data.get(piexif.GPSIFD.GPSLongitudeRef)
if not (lat_dms and lat_ref and lon_dms and lon_ref):
return None
lat = _dms_to_decimal(lat_dms, lat_ref.decode() if isinstance(lat_ref, bytes) else lat_ref)
lon = _dms_to_decimal(lon_dms, lon_ref.decode() if isinstance(lon_ref, bytes) else lon_ref)
return lat, lon
except Exception:
return None
def is_kakinada(lat: float, lon: float) -> bool:
try:
return 16.85 <= float(lat) <= 17.10 and 82.00 <= float(lon) <= 82.35
except (TypeError, ValueError):
return False
def check_image_location(image_bytes: bytes) -> str:
coords = extract_gps_from_image(image_bytes)
if coords is None:
return "no_gps"
lat, lon = coords
return "valid" if is_kakinada(lat, lon) else "invalid"
def validate_area_vs_coords(area: str, lat: float, lon: float) -> tuple:
key = area.strip().lower()
bounds = WARD_BOUNDS.get(key)
if bounds is None:
logger.warning("[ward-check] Area '%s' not in WARD_BOUNDS — skipping", area)
return True, "unknown_area"
lat_min, lat_max, lon_min, lon_max = bounds
t = WARD_TOLERANCE_DEG
in_bounds = (
(lat_min - t) <= lat <= (lat_max + t) and
(lon_min - t) <= lon <= (lon_max + t)
)
if in_bounds:
logger.info("[ward-check] PASSED — area='%s' lat=%.6f lon=%.6f", area, lat, lon)
return True, "valid"
reason = (
f"Image GPS ({lat:.6f}, {lon:.6f}) does not match the selected ward "
f"'{area}'. Please select the correct ward or retake the photo from "
f"within the reported area."
)
logger.warning("[ward-check] FAILED — %s", reason)
return False, reason
def resolve_location_status(image_bytes: bytes, area: str = "") -> tuple:
lat = lon = None
coords = extract_gps_from_image(image_bytes)
if coords:
lat, lon = coords
kakinada_ok = is_kakinada(lat, lon)
status = "valid" if kakinada_ok else "invalid"
logger.info("[location] EXIF lat=%.6f lon=%.6f → kakinada=%s", lat, lon, kakinada_ok)
else:
raw_lat = request.form.get("latitude") or request.form.get("lat")
raw_lon = request.form.get("longitude") or request.form.get("lng")
if raw_lat and raw_lon:
try:
lat = float(raw_lat)
lon = float(raw_lon)
status = "valid" if is_kakinada(lat, lon) else "invalid"
logger.info("[location] Form coords lat=%.6f lon=%.6f → %s", lat, lon, status)
except ValueError:
return "no_gps", "Invalid GPS coordinates supplied."
else:
return (
"no_gps",
"No GPS data found in image and no coordinates supplied. "
"Please allow location access and retake the photo.",
)
if status != "valid":
return (
"invalid",
"Image location is outside Kakinada Municipal Corporation limits. "
"Only grievances within Kakinada jurisdiction are accepted.",
)
if area and lat is not None and lon is not None:
ward_ok, ward_reason = validate_area_vs_coords(area, lat, lon)
if not ward_ok:
return "invalid", ward_reason
return "valid", "ok"
# =========================================================
# LANGUAGE DETECTION
# =========================================================
def detect_language(text: str) -> str:
if _RE_HINDI.search(text):
return "hindi"
if _RE_TELUGU.search(text):
return "telugu"
return "english"
# =========================================================
# APP INIT
# =========================================================
app = Flask(__name__)
app.config["MAX_CONTENT_LENGTH"] = int(os.environ.get("MAX_UPLOAD_MB", "32")) * 1024 * 1024
logger.info("🔄 Loading models...")
cat_model_en, cat_tok_en = get_cat_en()
cat_model_indic, cat_tok_indic = get_cat_indic()
urg_model_en, urg_tok_en = get_urg_en()
urg_model_indic, urg_tok_indic = get_urg_indic()
logger.info("✅ Models loaded.")
logger.info("🔄 Initializing Integrated Gradients explainers...")
category_explainer_en = IntegratedGradientsExplainer(cat_model_en, cat_tok_en)
category_explainer_indic = IntegratedGradientsExplainer(cat_model_indic, cat_tok_indic)
urgency_explainer_en = IntegratedGradientsExplainer(urg_model_en, urg_tok_en)
urgency_explainer_indic = IntegratedGradientsExplainer(urg_model_indic, urg_tok_indic)
logger.info("✅ Integrated Gradients ready.")
_RESOURCES = {
"english": {
"cat_fn": predict_category_en,
"urg_fn": predict_urgency_en,
"cat_exp": category_explainer_en,
"urg_exp": urgency_explainer_en,
}
}
_RESOURCES_INDIC = {
"cat_fn": predict_category_indic,
"urg_fn": predict_urgency_indic,
"cat_exp": category_explainer_indic,
"urg_exp": urgency_explainer_indic,
}
def _get_resources(language: str) -> dict:
return _RESOURCES.get(language, _RESOURCES_INDIC)
# =========================================================
# HOTSPOT FORECAST
# =========================================================
VALID_LABELS = [
"electricity", "garbage", "pollution", "public transport",
"roads", "sanitation", "stray animals", "water",
]
_PROPHET_MAX_WORKERS = int(os.environ.get("PROPHET_MAX_WORKERS", "4"))
RISK_LEVEL_THRESHOLDS = [(75, "Critical"), (50, "High"), (25, "Medium"), (0, "Low")]
def _risk_to_level(score: float) -> str:
for threshold, label in RISK_LEVEL_THRESHOLDS:
if score >= threshold:
return label
return "Low"
def _fit_and_forecast(area, category, group_df, horizon) -> dict | None:
if group_df["ds"].nunique() < 2:
return None
ts = group_df[["ds", "y"]].sort_values("ds")
model = Prophet(weekly_seasonality=False, daily_seasonality=False)
model.fit(ts)
future = model.make_future_dataframe(periods=horizon)
forecast = model.predict(future)
recent_avg = ts.tail(3)["y"].mean()
fc_avg = forecast.tail(horizon)["yhat"].mean()
growth = 0.0 if recent_avg == 0 else max(
-500.0, min(500.0, ((fc_avg - recent_avg) / recent_avg) * 100)
)
avg_pri = float(group_df["priorityScore"].mean())
raw_risk = 0.5 * (growth / 100) + 0.3 * avg_pri + 0.2 * (recent_avg / 5)
risk_100 = round(100 / (1 + math.exp(-raw_risk)), 2)
hfc = forecast.tail(horizon)
yhat_range = (hfc["yhat_upper"] - hfc["yhat_lower"]).mean()
yhat_mean = hfc["yhat"].abs().mean()
confidence = round(1.0 - min(1.0, yhat_range / (yhat_mean + 1e-9)), 4)
return {
"area": area, "category": category,
"riskScore": risk_100, "level": _risk_to_level(risk_100),
"growthPercent": round(float(growth), 2),
"forecastHorizonDays": horizon, "confidenceScore": confidence,
"_recentAvg": round(float(recent_avg), 2),
"_forecastAvg": round(float(fc_avg), 2),
}
# =========================================================
# HEALTH
# =========================================================
@app.route("/", methods=["GET"])
def health():
return jsonify({
"status": "ok",
"version": os.environ.get("APP_VERSION", "1.0.0"),
"message": "Multilingual Grievance API (EN/HI/TE) with IG + GFAS — running",
"endpoints": {
"POST /predict": "Classify grievance — text/audio/image.",
"POST /fairness-audit": "GFAS fairness audit.",
"POST /hotspot-forecast": "Prophet hotspot forecasting.",
},
})
@app.route("/health", methods=["GET"])
def health_check():
return jsonify({"status": "ok"}), 200
# =========================================================
# POST /predict
# =========================================================
@app.route("/predict", methods=["POST"])
def predict_grievance():
try:
content_type = request.content_type or ""
if "application/json" in content_type:
data = request.get_json(silent=True) or {}
text_input = data.get("text", "").strip()
explain_flag = bool(data.get("explain", False))
has_text = bool(text_input)
has_audio = False
has_image = False
image_bytes = None
audio_file = None
else:
text_input = request.form.get("text", "").strip()
explain_raw = request.form.get("explain", "false").strip().lower()
explain_flag = explain_raw in ("true", "1", "yes")
has_text = bool(text_input)
has_audio = "audio" in request.files
has_image = "image" in request.files
image_bytes = request.files["image"].read() if has_image else None
audio_file = request.files["audio"] if has_audio else None
logger.info(
"[predict] content_type=%s has_text=%s has_audio=%s has_image=%s",
content_type[:40], has_text, has_audio, has_image,
)
if not has_text and not has_audio and not has_image:
return jsonify({
"status": "failed", "code": "missing_input",
"message": "Please provide at least one of: 'text', 'audio', or 'image'.",
}), 400
area_field = request.form.get("area", "").strip()
evidence_relevant = None
evidence_note = None
civic_score = None
evidence_caption = None
location_field = None
# ── Mode A — IMAGE ONLY ───────────────────────────────────────────────
if has_image and not has_text and not has_audio:
location_status, location_reason = resolve_location_status(
image_bytes, area=area_field
)
if location_status in ("invalid", "no_gps"):
logger.warning("[predict] Mode A rejected — %s", location_reason)
return jsonify({
"status": "failed", "code": "location_invalid",
"message": location_reason, "location": "invalid",
}), 403
grievance_text = extract_text_from_image(image_bytes)
input_mode = "image"
evidence_caption = grievance_text
ok, err = _validate_machine_text(grievance_text, source="image")
if not ok:
return jsonify(err), 422
relevance = check_evidence_relevance(evidence_caption)
evidence_relevant = relevance["evidence_relevant"]
evidence_note = relevance["evidence_note"]
civic_score = relevance["civic_score"]
logger.info("[predict] Mode A civic relevance: relevant=%s score=%s",
evidence_relevant, civic_score)
# ── Mode B — AUDIO ONLY ───────────────────────────────────────────────
elif has_audio and not has_text and not has_image:
grievance_text = transcribe_audio(audio_file)
input_mode = "audio"
ok, err = _validate_machine_text(grievance_text, source="audio")
if not ok:
return jsonify(err), 422
# ── Mode C — TEXT ONLY ────────────────────────────────────────────────
elif has_text and not has_image and not has_audio:
grievance_text = text_input
input_mode = "text"
is_valid, error_code = validate_text(grievance_text)
if not is_valid:
return jsonify({
"status": "failed",
"code": error_code,
"message": _VALIDATION_MESSAGES[error_code],
}), 422
# ── Mode D — TEXT + IMAGE (evidence) ─────────────────────────────────
elif has_text and has_image and not has_audio:
is_valid, error_code = validate_text(text_input)
if not is_valid:
return jsonify({
"status": "failed",
"code": error_code,
"message": _VALIDATION_MESSAGES[error_code],
}), 422
grievance_text = text_input
input_mode = "text+image"
loc_status, loc_reason = resolve_location_status(image_bytes, area=area_field)
location_field = "valid" if loc_status == "valid" else "invalid"
if loc_status == "invalid":
logger.warning("[predict] Mode D location issue — %s", loc_reason)
evidence_caption = extract_text_from_image(image_bytes)
relevance = check_evidence_relevance(evidence_caption)
evidence_relevant = relevance["evidence_relevant"]
evidence_note = relevance["evidence_note"]
civic_score = relevance["civic_score"]
# ── Mode E — AUDIO + IMAGE (evidence) ────────────────────────────────
elif has_audio and has_image and not has_text:
grievance_text = transcribe_audio(audio_file)
input_mode = "audio+image"
ok, err = _validate_machine_text(grievance_text, source="audio")
if not ok:
return jsonify(err), 422
loc_status, loc_reason = resolve_location_status(image_bytes, area=area_field)
location_field = "valid" if loc_status == "valid" else "invalid"
if loc_status == "invalid":
logger.warning("[predict] Mode E location issue — %s", loc_reason)
evidence_caption = extract_text_from_image(image_bytes)
relevance = check_evidence_relevance(evidence_caption)
evidence_relevant = relevance["evidence_relevant"]
evidence_note = relevance["evidence_note"]
civic_score = relevance["civic_score"]
else:
return jsonify({
"status": "failed", "code": "missing_input",
"message": "Please provide at least one of: 'text', 'audio', or 'image'.",
}), 400
# ── Language & model ──────────────────────────────────────────────────
language = detect_language(grievance_text)
res = _get_resources(language)
# ── Classification ────────────────────────────────────────────────────
category_result = res["cat_fn"](grievance_text)
category = category_result["category"]
category_conf = category_result["confidence"]
category_index = category_result.get("class_index", 0)
urgency_result = res["urg_fn"](grievance_text)
urgency = urgency_result["urgency"]
urgency_conf = urgency_result["confidence"]
urgency_index = urgency_result.get("class_index", 0)
priority_result = compute_priority_score(category, urgency, urgency_conf)
priority_score = priority_result["score"]
priority_band = priority_result["band"]
# ── Re-check evidence relevance with BERT category (Modes D & E) ─────
if input_mode in ("text+image", "audio+image") and evidence_caption:
refined = check_evidence_relevance(evidence_caption, category)
evidence_relevant = refined["evidence_relevant"]
evidence_note = refined["evidence_note"]
civic_score = refined["civic_score"]
logger.info("[predict] %s refined evidence (cat='%s'): relevant=%s score=%s",
input_mode, category, evidence_relevant, civic_score)
# ── Explainability ────────────────────────────────────────────────────
category_tokens: list = []
urgency_tokens: list = []
if explain_flag:
category_tokens = res["cat_exp"].explain(grievance_text, category_index)
urgency_tokens = res["urg_exp"].explain(grievance_text, urgency_index)
explanation = generate_final_reason(
grievance_text, category, urgency, priority_score,
category_tokens, urgency_tokens,
)
# ── Response ──────────────────────────────────────────────────────────
response_body = {
"status": "success",
"input_mode": input_mode,
"text": grievance_text,
"language": language,
"category": category,
"category_confidence": category_conf,
"urgency": urgency,
"urgency_confidence": urgency_conf,
"priority_score": priority_score,
"priority_band": priority_band,
"explanation": {
"category_tokens": category_tokens,
"urgency_tokens": urgency_tokens,
"category_decision": explanation["category_decision"],
"urgency_decision": explanation["urgency_decision"],
"priority_summary": explanation["priority_summary"],
"final_reason": explanation["final_reason"],
},
}
if location_field is not None:
response_body["location"] = location_field
if evidence_relevant is not None:
response_body["evidence_relevant"] = evidence_relevant
response_body["evidence_note"] = evidence_note
response_body["civic_score"] = civic_score
return jsonify(response_body)
except Exception as e:
logger.exception("[predict] Unhandled exception")
return jsonify({
"status": "failed", "code": "internal_error",
"message": str(e), "trace": traceback.format_exc(),
}), 500
# =========================================================
# POST /fairness-audit
# =========================================================
@app.route("/fairness-audit", methods=["POST"])
def fairness_audit():
try:
data = request.get_json(silent=True)
if not data:
return jsonify({"status": "failed", "message": "Invalid JSON body."}), 400
result, error, status_code = gfas_audit(data.get("grievances"))
if error:
return jsonify(error), status_code
return jsonify(result), status_code
except Exception as e:
logger.exception("[fairness-audit] Unhandled exception")
return jsonify({"status": "failed", "error": str(e),
"trace": traceback.format_exc()}), 500
# =========================================================
# POST /hotspot-forecast
# =========================================================
@app.route("/hotspot-forecast", methods=["POST"])
def hotspot_forecast():
try:
data = request.get_json(force=True)
grievances = data.get("grievances", [])
horizon = int(data.get("horizon_days", 1))
top_n = int(data.get("top_n", 10))
source_window = int(data.get("source_window_days", 45))
generated_at = datetime.now(UTC).isoformat()
if not grievances:
return jsonify({"status": "failed", "message": "No grievances supplied"}), 422
df = pd.DataFrame(grievances)
if df.empty:
return jsonify({"status": "success", "generated_at": generated_at,
"top_hotspots": []})
df["area"] = df["area"].astype(str).str.lower().str.strip()
df["category"] = df["category"].astype(str).str.lower().str.strip()
df["ds"] = pd.to_datetime(df["createdAt"], errors="coerce",
utc=True).dt.tz_convert(None)
df = df.dropna(subset=["ds"])
df["y"] = 1
df = df[df["category"].isin(VALID_LABELS)]
if df.empty:
return jsonify({"status": "success", "generated_at": generated_at,
"top_hotspots": []})
df = df.groupby(["area", "category", "ds"]).agg(
{"y": "sum", "priorityScore": "mean"}
).reset_index()
groups = list(df.groupby(["area", "category"]))
hotspots = []
errors = []
with ThreadPoolExecutor(max_workers=_PROPHET_MAX_WORKERS) as executor:
futures = {
executor.submit(_fit_and_forecast, area, cat, gdf, horizon): (area, cat)
for (area, cat), gdf in groups
}
for future in as_completed(futures):
area, category = futures[future]
try:
result = future.result()
if result is None:
continue
result["flaskSnapshot"] = {
"recentAvg": result.pop("_recentAvg"),
"forecastAvg": result.pop("_forecastAvg"),
"sourceWindowDays": source_window,
"forecastHorizonDays": horizon,
"generatedAt": generated_at,
}
result["sourceWindowDays"] = source_window
hotspots.append(result)
except Exception as e:
errors.append({"area": area, "category": category, "error": str(e)})
logger.warning("[hotspot] Prophet failed %s/%s: %s", area, category, e)
ranked = sorted(hotspots, key=lambda x: x["riskScore"], reverse=True)
return jsonify({
"status": "success",
"generated_at": generated_at,
"top_hotspots": ranked[:top_n],
"meta": {
"groups_evaluated": len(groups),
"forecasts_computed": len(hotspots),
"error_count": len(errors),
"errors": errors,
"source_window_days": source_window,
"horizon_days": horizon,
},
})
except Exception as e:
logger.exception("[hotspot-forecast] Unhandled exception")
return jsonify({"status": "failed", "message": str(e)}), 500
# =========================================================
# ERROR HANDLERS
# =========================================================
@app.errorhandler(413)
def request_entity_too_large(e):
return jsonify({
"status": "failed", "code": "payload_too_large",
"message": f"Upload exceeds {app.config['MAX_CONTENT_LENGTH'] // (1024 * 1024)} MB limit.",
}), 413
@app.errorhandler(404)
def not_found(e):
return jsonify({"status": "failed", "code": "not_found",
"message": "Endpoint not found."}), 404
@app.errorhandler(405)
def method_not_allowed(e):
return jsonify({"status": "failed", "code": "method_not_allowed",
"message": "HTTP method not allowed."}), 405
# =========================================================
# RUN — Hugging Face Spaces uses port 7860
# =========================================================
if __name__ == "__main__":
port = int(os.environ.get("PORT", 7860))
debug = os.environ.get("FLASK_DEBUG", "false").lower() == "true"
logger.info("🚀 Starting API on port %d (debug=%s)", port, debug)
app.run(host="0.0.0.0", port=port, debug=debug, threaded=True)