Spaces:

mohanbot799s
/

civicconnect-ai-engine

Sleeping

App Files Files Community

MOHAN799S commited on Mar 11

Commit

8da2d54

1 Parent(s): 53e8064

Deploy CivicConnect AI Engine — BERT + BLIP + EasyOCR + Whisper API

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.dockerignore +55 -0
.gitattributes +2 -0
.gitignore +51 -0
Dockerfile +67 -0
classification/artifacts/bert_model/config.json +51 -0
classification/artifacts/indic_test.csv +0 -0
classification/artifacts/indic_tokenizer.pkl +3 -0
classification/artifacts/indic_train.csv +0 -0
classification/artifacts/indic_val.csv +0 -0
classification/artifacts/indicbert_model/config.json +54 -0
classification/artifacts/indicbert_model/tokenizer.json +3 -0
classification/artifacts/indicbert_model/tokenizer_config.json +24 -0
classification/artifacts/label_encoder.pkl +3 -0
classification/artifacts/label_map.pkl +3 -0
classification/artifacts/test.csv +523 -0
classification/artifacts/tokenizer.pkl +3 -0
classification/artifacts/train.csv +0 -0
classification/artifacts/val.csv +523 -0
classification/bert_classify.py +164 -0
classification/bert_model.py +417 -0
classification/classification/artifacts/label_encoder.pkl +3 -0
classification/classification/artifacts/label_map.pkl +3 -0
classification/indic_bert_classify.py +142 -0
classification/indic_bert_model.py +299 -0
classification/indic_train.csv +0 -0
classification/train.csv +0 -0
gfas/__init__.py +9 -0
gfas/disparity_analysis.py +156 -0
gfas/fairness_audit.py +111 -0
gfas/fairness_metrics.py +80 -0
gfas/gfas_engine.py +81 -0
gfas/report_generator.py +93 -0
main.py +707 -0
multi_modal/audio_to_text.py +463 -0
multi_modal/image_to_text.py +346 -0
requirements.txt +50 -0
sentiment_analysis/artifacts/indic_urgency_model/config.json +46 -0
sentiment_analysis/artifacts/indic_urgency_model/label_encoder.pkl +3 -0
sentiment_analysis/artifacts/indic_urgency_model/label_map.pkl +3 -0
sentiment_analysis/artifacts/indic_urgency_model/tokenizer.json +3 -0
sentiment_analysis/artifacts/indic_urgency_model/tokenizer_config.json +24 -0
sentiment_analysis/artifacts/urgency_bert_model/config.json +43 -0
sentiment_analysis/artifacts/urgency_bert_model/label_encoder.pkl +3 -0
sentiment_analysis/artifacts/urgency_bert_model/label_map.pkl +3 -0
sentiment_analysis/artifacts/urgency_bert_model/tokenizer.json +0 -0
sentiment_analysis/artifacts/urgency_bert_model/tokenizer_config.json +14 -0
sentiment_analysis/bert_model.py +268 -0
sentiment_analysis/bert_predict.py +82 -0
sentiment_analysis/indic_bert_model.py +260 -0
sentiment_analysis/indic_bert_predict.py +89 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,55 @@

+# ── Version control ───────────────────────────────────────
+.git
+.gitignore
+# ── Python cache ──────────────────────────────────────────
+__pycache__
+*.pyc
+*.pyo
+*.pyd
+.Python
+*.egg-info
+dist/
+build/
+.eggs/
+# ── Virtual environments ──────────────────────────────────
+.venv
+venv/
+env/
+ENV/
+# ── Local secrets / config ────────────────────────────────
+.env
+.env.*
+!.env.example
+# ── Test / dev artefacts ──────────────────────────────────
+tests/
+*.test.py
+pytest.ini
+.pytest_cache/
+.coverage
+htmlcov/
+# ── Jupyter notebooks ─────────────────────────────────────
+*.ipynb
+.ipynb_checkpoints/
+# ── OS junk ───────────────────────────────────────────────
+.DS_Store
+Thumbs.db
+# ── Docs / CI (not needed at runtime) ─────────────────────
+docs/
+*.md
+!README.md
+.github/
+# ── Large local model checkpoints (downloaded at runtime) ─
+# Comment these out if you bundle models into the image.
+models/
+*.bin
+*.safetensors
+*.pt
+*.ckpt

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+classification/artifacts/indicbert_model/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+sentiment_analysis/artifacts/indic_urgency_model/tokenizer.json filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,51 @@

+# In PowerShell inside civicconnect-ai-engine folder:
+@"
+__pycache__/
+*.pyc
+*.pyo
+.env
+venv/
+.venv/
+*.log
+classification/artifacts/results/
+classification/artifacts/indic_results/
+sentiment_analysis/artifacts/results/
+sentiment_analysis/artifacts/indic_results/
+"@ | Out-File -FilePath .gitignore -Encoding utf8
+# Python
+__pycache__/
+*.py[cod]
+*.egg-info/
+dist/
+build/
+.eggs/
+# Environments
+.venv/
+venv/
+env/
+# Secrets
+.env
+*.key
+# Models (large binaries — use HF Hub or Git LFS)
+models/
+*.bin
+*.safetensors
+*.pt
+*.ckpt
+# OS
+.DS_Store
+Thumbs.db
+# IDE
+.vscode/
+.idea/
+# Test artefacts
+.pytest_cache/
+.coverage
+htmlcov/

Dockerfile ADDED Viewed

	@@ -0,0 +1,67 @@

+# =========================================================
+# CivicConnect — Flask AI Engine
+# Deploy target: Hugging Face Spaces (Docker SDK)
+# =========================================================
+FROM python:3.11-slim
+# ── System dependencies ───────────────────────────────────
+# ffmpeg        → pydub audio decode (webm/ogg/mp3 → wav)
+# libsndfile1   → soundfile (WAV/FLAC fallback)
+# libgl1-mesa-glx + libglib2.0-0 → EasyOCR / OpenCV headless
+# libgomp1      → PyTorch multi-threaded CPU ops
+# git           → HF model downloads via git-lfs if needed
+# curl          → health-check probes / HF API calls
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    ffmpeg \
+    libsndfile1 \
+    libgl1-mesa-glx \
+    libglib2.0-0 \
+    libgomp1 \
+    git \
+    curl \
+ && rm -rf /var/lib/apt/lists/*
+# ── Create non-root user (HF Spaces requirement) ─────────
+RUN useradd -m -u 1000 appuser
+# ── Set working directory ─────────────────────────────────
+WORKDIR /app
+# ── Copy requirements first (layer cache) ────────────────
+COPY requirements.txt .
+# ── Install Python dependencies ───────────────────────────
+RUN pip install --no-cache-dir --upgrade pip \
+ && pip install --no-cache-dir -r requirements.txt
+# ── Copy application source ───────────────────────────────
+COPY --chown=appuser:appuser . .
+# ── Environment defaults (overridden by HF Secrets) ──────
+ENV PORT=7860
+ENV PYTHONUNBUFFERED=1
+ENV HF_HOME=/app/.cache/huggingface
+# ── Switch to non-root user ───────────────────────────────
+USER appuser
+# ── Expose port ───────────────────────────────────────────
+EXPOSE 7860
+# ── Healthcheck ───────────────────────────────────────────
+HEALTHCHECK --interval=60s --timeout=10s --start-period=120s --retries=3 \
+    CMD curl -f http://localhost:7860/ || exit 1
+# ── Start server ──────────────────────────────────────────
+# 1 worker only — models are loaded once at startup (global state).
+# 600s timeout handles audio+image (Whisper large-v3 ≈ 2-3 min on CPU).
+CMD ["gunicorn", \
+     "--bind", "0.0.0.0:7860", \
+     "--workers", "1", \
+     "--timeout", "600", \
+     "--keep-alive", "5", \
+     "--log-level", "info", \
+     "--access-logfile", "-", \
+     "--error-logfile", "-", \
+     "main:app"]

classification/artifacts/bert_model/config.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "add_cross_attention": false,
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": null,
+  "classifier_dropout": null,
+  "dtype": "float32",
+  "eos_token_id": null,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3",
+    "4": "LABEL_4",
+    "5": "LABEL_5",
+    "6": "LABEL_6",
+    "7": "LABEL_7"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "is_decoder": false,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2,
+    "LABEL_3": 3,
+    "LABEL_4": 4,
+    "LABEL_5": 5,
+    "LABEL_6": 6,
+    "LABEL_7": 7
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "tie_word_embeddings": true,
+  "transformers_version": "5.0.0",
+  "type_vocab_size": 2,
+  "use_cache": false,
+  "vocab_size": 30522
+}

classification/artifacts/indic_test.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

classification/artifacts/indic_tokenizer.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fa2396c5b53605359d466d67fc892aaca020711ae8ac7b7ab2fd9336d82c428c
+size 14979445

classification/artifacts/indic_train.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

classification/artifacts/indic_val.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

classification/artifacts/indicbert_model/config.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+  "architectures": [
+    "AlbertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0,
+  "bos_token_id": 2,
+  "classifier_dropout_prob": 0.1,
+  "down_scale_factor": 1,
+  "dtype": "float32",
+  "embedding_size": 128,
+  "eos_token_id": 3,
+  "gap_size": 0,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3",
+    "4": "LABEL_4",
+    "5": "LABEL_5",
+    "6": "LABEL_6",
+    "7": "LABEL_7"
+  },
+  "initializer_range": 0.02,
+  "inner_group_num": 1,
+  "intermediate_size": 3072,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2,
+    "LABEL_3": 3,
+    "LABEL_4": 4,
+    "LABEL_5": 5,
+    "LABEL_6": 6,
+    "LABEL_7": 7
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "albert",
+  "net_structure_type": 0,
+  "num_attention_heads": 12,
+  "num_hidden_groups": 1,
+  "num_hidden_layers": 12,
+  "num_memory_blocks": 0,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "tie_word_embeddings": true,
+  "transformers_version": "5.1.0",
+  "type_vocab_size": 2,
+  "use_cache": false,
+  "vocab_size": 200000
+}

classification/artifacts/indicbert_model/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d34df3ca6a5769c1f8ae24a1e64517f3c37a934fd221d9a2ae2c5164d5e21be5
+size 14969520

classification/artifacts/indicbert_model/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "add_prefix_space": true,
+  "backend": "tokenizers",
+  "bos_token": "[CLS]",
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "eos_token": "[SEP]",
+  "extra_special_tokens": [
+    "<pad>",
+    "[CLS]",
+    "[SEP]",
+    "[MASK]"
+  ],
+  "is_local": false,
+  "keep_accents": false,
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<pad>",
+  "sep_token": "[SEP]",
+  "tokenizer_class": "AlbertTokenizer",
+  "trim_offsets": true,
+  "unk_id": 1,
+  "unk_token": "<unk>"
+}

classification/artifacts/label_encoder.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6b0be0d88eed1838fba777af266556aea55e435b970076684d2ad1c8c9b3fb0b
+size 342

classification/artifacts/label_map.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d8e10c5614e117fd9ccab4af3fa62c0e4c44d23195847586d4d1ddb47f4a00cc
+size 321

classification/artifacts/test.csv ADDED Viewed

	@@ -0,0 +1,523 @@

+text,label,label_id
+Transformer emits burning smell,Electricity,0
+Power failures interrupt electric signaling systems causing traffic confusion,Electricity,0
+Water main repair causing disruption,Water,7
+Stray dogs near playgrounds scaring children,Stray Animals,6
+Buses running without first aid kits,Public Transport,3
+Electric poles damaged near highway,Electricity,0
+No information boards at bus stops,Public Transport,3
+Low voltage causing fan malfunction,Electricity,0
+Stray animals damage public seating and benches,Stray Animals,6
+Pigs damaging road surfaces,Stray Animals,6
+Overall public transport problem in locality,Public Transport,3
+Stray dogs barking loudly at night,Stray Animals,6
+The ongoing noise from the water pump is making it difficult for families to rest and relax at home,Water,7
+Street dogs biting pedestrians,Stray Animals,6
+Household waste overflow leads to foul odor spreading into nearby streets,Garbage,1
+Overflowing dustbins near temple,Garbage,1
+Roads are left uneven after utility maintenance work,Roads,4
+Waste not collected from high-rise apartments,Garbage,1
+Air pollution impacts children outdoor play,Pollution,2
+Irregular bus timings causing inconvenience,Public Transport,3
+Delayed trains disrupt workforce movement across city zones,Public Transport,3
+Dust from unpaved road affecting residents,Pollution,2
+Unreliable water supply affects small businesses and shops,Water,7
+Bus shelters without proper signage,Public Transport,3
+Industrial noise pollution disrupts normal daily activities,Pollution,2
+The water pump noise persists for long durations causing mental stress,Water,7
+Road near temple congested during festivals,Roads,4
+Inadequate fleet capacity increases wait times,Public Transport,3
+Accumulated waste near street corner,Garbage,1
+Sanitation workers absent on weekends,Sanitation,5
+Stray animals damage underground cables and water pipelines,Stray Animals,6
+Road drainage damaged,Roads,4
+Garbage has not been cleared from the market area for several days,Garbage,1
+Water contamination affects public health outcomes,Water,7
+Road markings missing on newly constructed road,Roads,4
+Industrial waste contaminating groundwater,Pollution,2
+Water supply pressure varies widely between floors,Water,7
+Plastic waste dumped near playground,Garbage,1
+Electrical outages disable automated road safety equipment creating hazardous conditions,Electricity,0
+Garbage accumulation problem,Garbage,1
+Pollutants from fuel combustion linger in enclosed transport zones,Pollution,2
+Road construction debris narrows lanes and slows vehicles,Roads,4
+Noise pollution increases mental fatigue,Pollution,2
+Power outages interrupt data driven traffic optimization,Electricity,0
+Public transport breakdowns increase dependency on informal transit options,Public Transport,3
+Uncontrolled dumping of waste is leading to severe soil contamination,Pollution,2
+Industrial fumes causing bad odor in colony,Pollution,2
+Water supply interrupted by municipal work,Water,7
+Garbage collection staff do not cover this street regularly,Garbage,1
+Residents complain of constant disturbance caused by water pump vibrations and sound,Water,7
+Poor road lighting affects night time driving safety,Roads,4
+Smoke from factories causing health issues,Pollution,2
+Street clogged due to construction,Roads,4
+Stray goats eating plants in public gardens,Stray Animals,6
+Dogs blocking roads and footpaths,Stray Animals,6
+The water pump generates ongoing noise that affects mental calmness of residents,Water,7
+Electric supply interruptions disrupt essential household chores,Electricity,0
+Stop start driving patterns significantly increase emission output,Pollution,2
+Streetlights not operational,Electricity,0
+Street corners clogged with wet waste,Sanitation,5
+Inefficient driving patterns raise overall emission output,Pollution,2
+Drinking water quality has deteriorated significantly over the past few months,Water,7
+Unsegregated waste decomposition is polluting the air and surrounding land,Pollution,2
+Odor from chemical treatment plant near houses,Pollution,2
+Noise pollution disrupts residential sleep cycles,Pollution,2
+Stray goats blocking sidewalks,Stray Animals,6
+Road bottlenecks from incomplete construction trap vehicles in narrow corridors,Roads,4
+Stray cattle damage public gardens and green belts,Stray Animals,6
+Overhead tank pump malfunctioning,Water,7
+Potholes near bus stop causing delays,Roads,4
+Stray animals causing fear among women residents,Stray Animals,6
+Waste collection vehicles arrive at irregular times causing inconvenience,Sanitation,5
+Roads are left damaged after cable laying work,Roads,4
+Open burning of waste releases toxic smoke affecting nearby households,Pollution,2
+Poor road finishing is leading to early deterioration,Roads,4
+No proper lighting inside buses at night,Public Transport,3
+Aggressive stray dogs attacking children,Stray Animals,6
+Drain water mixing with rainwater,Sanitation,5
+Street corners full of waste,Sanitation,5
+No drinking water facility in our street,Water,7
+Stray dogs roaming in parks causing fear,Stray Animals,6
+Nighttime operation of the water pump is leading to frequent sleep interruptions for nearby residents,Water,7
+Traffic signals not visible,Roads,4
+Traffic signals not synchronized,Roads,4
+The water pump produces nonstop noise causing stress,Water,7
+Low water pressure in commercial buildings,Water,7
+Roadside drainage overflowing,Roads,4
+Garbage left near drainage channels contributes to blockages during rainfall,Garbage,1
+Persistent congestion contributes to chronic air pollution exposure,Pollution,2
+Public water tap is broken,Water,7
+Overcharging by bus conductors,Public Transport,3
+Waterlogging on damaged roads makes them impassable,Roads,4
+Power supply resumes late,Electricity,0
+Animal presence affects pedestrian safety,Stray Animals,6
+Roadside damage narrows lanes and slows traffic,Roads,4
+Electric failures disrupt smart road analytics causing unmanaged flow,Electricity,0
+Lack of buses in early morning hours,Public Transport,3
+Low water pressure in offices,Water,7
+Animal movement causes repeated traffic interruptions,Stray Animals,6
+Fuse blowing repeatedly in our neighborhood,Electricity,0
+Pollution due to illegal dumping,Pollution,2
+Residents report difficulty sleeping due to loud water pump operation,Water,7
+Garbage pile near market area,Garbage,1
+Road construction debris increases airborne particulate matter,Pollution,2
+Stray animals block footpaths forcing pedestrians onto roads,Stray Animals,6
+Road signage failures cause navigation confusion,Roads,4
+Water pipeline damage reduces distribution efficiency,Water,7
+Electric poles obstruct roads and pedestrian pathways,Electricity,0
+Low voltage affecting fans and lights,Electricity,0
+Waste disposal points are not properly sanitized,Sanitation,5
+No shelter for injured stray animals,Stray Animals,6
+Power outage affecting schools,Electricity,0
+Stray animals biting delivery workers,Stray Animals,6
+Sanitation workers not equipped with tools,Sanitation,5
+Electric wires spark during strong winds,Electricity,0
+Irregular tanker delivery schedules increase uncertainty,Water,7
+Road network imbalance shifts traffic to residential streets,Roads,4
+Delayed water pipeline repairs increase hardship,Water,7
+Electric failures shut down borewell motors affecting residential water access,Electricity,0
+Frequent power cuts in monsoon,Electricity,0
+Garbage bins overflow during weekends due to lack of timely pickup,Garbage,1
+Pollution from roadside burning affects nearby shops,Pollution,2
+Street clogged due to parked vehicles,Roads,4
+Power outages cause loss of productivity for freelancers,Electricity,0
+Open waste dumping encourages animal congregation near residences,Garbage,1
+Pipeline repair delayed due to traffic,Water,7
+No animal control measures implemented,Stray Animals,6
+Stray animals causing accidents on roads,Stray Animals,6
+Garbage overflowing near house,Garbage,1
+Public toilets without privacy,Sanitation,5
+Improper waste handling causes frequent odor problems,Sanitation,5
+Water supply stopped without notice,Water,7
+Bus stops not visible at night,Public Transport,3
+Dust pollution due to construction work,Pollution,2
+Stagnant water near market street,Sanitation,5
+Bus routes not covering industrial corridors,Public Transport,3
+Uneven road height is causing frequent vehicle underbody damage,Roads,4
+Garbage collection lacks proper supervision,Garbage,1
+Noise pollution from night time construction disrupts sleep,Pollution,2
+Streetlights not repaired after accident,Roads,4
+Drain water on footpath,Sanitation,5
+Garbage disposal issue,Garbage,1
+Water supply irregular in community area,Water,7
+Transformer overloaded during peak load,Electricity,0
+Transport hubs suffer from sanitation and crowd management issues,Public Transport,3
+Road shoulder eroded near river bank,Roads,4
+No buses for late-night travel,Public Transport,3
+Sanitation services decline during public holidays,Sanitation,5
+Residents report disturbance from water pump operation,Water,7
+Stray animals disrupt public spaces frequently,Stray Animals,6
+Dirty streets near bus stand,Sanitation,5
+Street handpump dry for several days,Water,7
+Power instability affects electric vehicle infrastructure availability,Electricity,0
+Waste accumulation is contaminating nearby stormwater drains,Pollution,2
+Stray animals causing dirt accumulation near markets,Stray Animals,6
+Air pollution from cement factory chimney,Pollution,2
+Water from tap has suspended particles,Water,7
+Stray animals remain untreated for diseases spreading infection,Stray Animals,6
+Drivers refusing service in rain,Public Transport,3
+Public sanitation issues affect overall quality of life,Sanitation,5
+Transformer emits smoke,Electricity,0
+Waste dumped illegally,Garbage,1
+Road capacity limitations force vehicles into dense clusters,Roads,4
+Sewage smell near bus stand,Sanitation,5
+Air pollution from diesel generators in colony,Pollution,2
+Industrial pollution damages surrounding ecosystems,Pollution,2
+Road safety is compromised due to roaming animals,Stray Animals,6
+Dust from demolition site near playground,Pollution,2
+Streetlights not working near bus stop,Electricity,0
+Noise pollution from train operations,Pollution,2
+Garbage collection staff leave waste behind after partial pickup,Garbage,1
+Blocked drains causing flooding in colony,Sanitation,5
+Polluted drainage water seeps into residential plots,Pollution,2
+Mud road causing dust problem,Roads,4
+Road repair materials used are of very poor quality,Roads,4
+Dust from quarry affecting local residents,Pollution,2
+Garbage collection stopped,Garbage,1
+Footpath uneven and unsafe,Roads,4
+Pipeline under construction causing water shortage,Water,7
+Stalling engines emit excessive smoke degrading air quality,Pollution,2
+Garbage heaps are obstructing traffic and pedestrian movement,Garbage,1
+Lack of regular street sweeping leads to dust and waste accumulation,Sanitation,5
+Electric system faults disrupt coordination of transport infrastructure,Electricity,0
+Persistent congestion sustains unhealthy air quality levels,Pollution,2
+Slow moving traffic produces higher emissions per distance traveled,Pollution,2
+Pipeline damage causing water supply disruption,Water,7
+Rainwater flooding water storage area,Water,7
+Water tankers are irregular and insufficient to meet residential requirements,Water,7
+Garbage not cleared regularly,Garbage,1
+Stray cattle wander into drainage channels causing blockages,Stray Animals,6
+Vehicles forced closer together increase localized emission density,Pollution,2
+Polluted waste disposal attracts stray animals,Pollution,2
+Unvaccinated stray dogs increase the risk of rabies in the neighborhood,Stray Animals,6
+Overhead wires hanging low,Electricity,0
+Frequent power interruptions near major intersections cause traffic buildup and prolonged vehicle idling affecting air quality,Electricity,0
+Water cuts affecting hospital area,Water,7
+Dust from open construction affecting local residents,Pollution,2
+Electricity outages disrupt water pumping operations,Electricity,0
+Uneven road surface near hospital entrance,Roads,4
+Congestion related emissions worsen air quality in commercial districts,Pollution,2
+Noise pollution near hospital affecting patients,Pollution,2
+Garbage not removed from community center,Garbage,1
+Power instability disrupts electric vehicle adoption in urban transport corridors,Electricity,0
+Persistent humming from the water pump is causing stress and discomfort for residents living close to the facility,Water,7
+Road near bus stand full of potholes,Roads,4
+Stray dogs attack pets in apartment complexes,Stray Animals,6
+Water from taps has sand particles,Water,7
+Environmental hazards in residential zone,Pollution,2
+Unsafe travel conditions for women,Public Transport,3
+Garbage accumulation is increasing environmental risk,Garbage,1
+Dirty water from taps during rainy season,Water,7
+Stray animals creating mess near water bodies,Stray Animals,6
+Sanitation systems fail leading to sewage water backing up onto roads,Sanitation,5
+Street lighting outages increase accident risk on poorly visible roads,Electricity,0
+Residents are affected by water pump sound levels exceeding acceptable limits,Water,7
+Stray cattle obstruct parking areas in residential zones,Stray Animals,6
+Long waiting time for buses,Public Transport,3
+Bus stops not cleaned after festivals,Public Transport,3
+Water leaking under road surface,Water,7
+Buses without GPS updates,Public Transport,3
+Streetlights not working on Elm Road,Roads,4
+Water shortage impacts daily cleaning and sanitation,Water,7
+Stray animals creating mess in community areas,Stray Animals,6
+Buses overcrowded during festivals,Public Transport,3
+Pollution affecting children and elderly,Pollution,2
+Public toilets lack regular cleaning schedules,Sanitation,5
+Damaged roads increase travel uncertainty,Roads,4
+Improper road grading leads to water accumulation,Roads,4
+Sewage water leaking onto roads,Sanitation,5
+Pollution from waste burning spreads toxic particles,Pollution,2
+Garbage remains scattered near commercial complexes for days,Garbage,1
+Bus routes not covering industrial areas,Public Transport,3
+Waste disposal areas are not properly fenced,Sanitation,5
+Water main burst near market,Water,7
+Road surface wear increases particulate release from tires,Roads,4
+Air pollution from diesel generators,Pollution,2
+Potholes near park causing accidents,Roads,4
+Leaking overhead tanks result in continuous water wastage,Water,7
+Public sanitation facilities are insufficient in crowded areas,Sanitation,5
+Residents face daily inconvenience due to uncontrolled noise from the water pump,Water,7
+Drain smell causing illness,Sanitation,5
+Cats multiplying rapidly in neighborhood,Stray Animals,6
+Stray cats creating hygiene problems in markets,Stray Animals,6
+Road issue near temple,Roads,4
+Uncollected garbage provides feeding grounds for roaming animals,Garbage,1
+Frequent power cuts affecting shops,Electricity,0
+Damaged road surfaces slow traffic causing inefficient fuel usage,Roads,4
+Improper disposal of waste is contaminating nearby open areas,Sanitation,5
+The water pump produces constant noise that disrupts daily household activities,Water,7
+Residents experience repeated water pump disturbance,Water,7
+Voltage surges damage electronic devices unexpectedly,Electricity,0
+Industrial noise pollution affects quality of life,Pollution,2
+Garbage mismanagement amplifies sanitation maintenance burden,Garbage,1
+Electricity outages increase fire safety risks,Electricity,0
+Sewage discharge causing river pollution,Pollution,2
+Accumulated waste blocks sanitation channels leading to stagnant wastewater,Garbage,1
+Garbage is scattered due to stray animals tearing open trash bags,Garbage,1
+Oil spill in water body,Pollution,2
+Uncollected waste decomposes and flows into sewage lines worsening sanitation blockages,Garbage,1
+Sanitation workers lack adequate training,Sanitation,5
+Water supply irregular in new housing society,Water,7
+Traffic congestion due to narrow road,Roads,4
+Generator noise late at night,Pollution,2
+Unattended garbage heaps are spoiling the appearance of the locality,Garbage,1
+Solid waste blocking drainage system,Pollution,2
+Improper waste handling is creating sanitation problems,Garbage,1
+Stray animals causing injuries to pedestrians,Stray Animals,6
+Stray dogs attacking postal workers,Stray Animals,6
+Buses running without permits,Public Transport,3
+Electric supply irregular,Electricity,0
+Monkey problem near market area,Stray Animals,6
+Waste degradation is impacting environmental and public health,Pollution,2
+Factory emissions affecting children with asthma,Pollution,2
+Low speed traffic generates higher emission per distance,Pollution,2
+Sanitation services are delayed without prior notice,Sanitation,5
+Garbage accumulation causes repeated public complaints across departments,Garbage,1
+Uncollected household waste attracting flies,Garbage,1
+Stray cats damaging community gardens,Stray Animals,6
+Road construction stopped halfway,Roads,4
+Illegal dumping of construction debris,Garbage,1
+Transformer oil leakage near road,Electricity,0
+Garbage from nearby construction sites is dumped illegally,Garbage,1
+Waste contamination is impacting soil and water quality,Pollution,2
+Road damage worsens during monsoon due to poor drainage integration,Roads,4
+Poor water quality leads to foul taste and odor,Water,7
+Public garbage bins are overflowing and spilling waste onto the roads,Garbage,1
+No coordination with train schedules,Public Transport,3
+Electricity issues affect remote work productivity,Electricity,0
+Electric poles with loose wires,Electricity,0
+Water supply resumes late morning,Water,7
+Garbage heaps block proper drainage,Garbage,1
+Public sanitation infrastructure is inadequate for urban demands,Sanitation,5
+Road repair delays extend traffic disruption periods,Roads,4
+Roads are breaking repeatedly after each monsoon season,Roads,4
+Drivers rude to passengers,Public Transport,3
+Water pollution spreading diseases,Pollution,2
+Sudden electricity cuts without prior notice are affecting daily work from home activities,Electricity,0
+Supply water appears muddy after pipeline repair work in the locality,Water,7
+Garbage dumping spots are unmanaged and poorly maintained,Garbage,1
+Old buses with faulty engines,Public Transport,3
+Industrial pollution increases cancer risks,Pollution,2
+Discolored water flows from taps after long supply gaps,Water,7
+Route diversions lead to unpredictable commute durations,Public Transport,3
+Sanitation workers not performing evening duty,Sanitation,5
+Uneven roads cause discomfort for passengers,Roads,4
+Stray dogs chase vehicles during nighttime hours,Stray Animals,6
+Stray cats entering school premises,Stray Animals,6
+Noise pollution from traffic affecting students,Pollution,2
+Sanitation facilities near residential areas are neglected,Sanitation,5
+Stray cats entering restaurants,Stray Animals,6
+Electricity supply schedules are not followed consistently,Electricity,0
+Water supply cut without prior notice,Water,7
+Noise from sports stadium affecting neighborhood,Pollution,2
+Fuse keeps blowing in kitchen,Electricity,0
+Air pollution from industrial chimneys,Pollution,2
+Stray animals roam freely due to ineffective monitoring,Stray Animals,6
+Stray dogs fighting with other animals in streets,Stray Animals,6
+No electricity supply in office,Electricity,0
+Heavy vehicles damaging residential roads,Roads,4
+Household garbage is not collected on holidays leading to buildup,Garbage,1
+The water pump operates loudly affecting quality of life,Water,7
+Dog attacks reported in locality,Stray Animals,6
+Open dumping near street causing pollution,Garbage,1
+Leaking sewage lines pollute nearby water bodies,Sanitation,5
+Frequent engine problems,Public Transport,3
+Drain cleaning not done,Sanitation,5
+Broken road near school,Roads,4
+Stray animals scavenging from open garbage,Stray Animals,6
+Road markings faded causing confusion for drivers,Roads,4
+Road near temple full of potholes,Roads,4
+Blocked drains causing stagnant water,Sanitation,5
+Animals causing traffic jams,Stray Animals,6
+Waste decomposition emits pollutants into residential areas,Pollution,2
+Transformer noise disturbing residents,Electricity,0
+Overflowing sewage near street,Sanitation,5
+Poor road quality impacts overall city image,Roads,4
+Water main leakage near central road,Water,7
+No water connection for new house,Water,7
+Sanitation workers do not segregate waste properly,Sanitation,5
+Road bottlenecks caused by poor design delay movement,Roads,4
+Streetlights off during night,Electricity,0
+The water pump operates loudly and disrupts household peace,Water,7
+Narrow roads reduce bus movement efficiency,Public Transport,3
+Sewage smell unbearable,Sanitation,5
+Water cuts extended for more than 24 hours,Water,7
+Electric wires near playground unsafe,Electricity,0
+Stray animals damaging parked vehicles,Stray Animals,6
+Garbage collection is irregular during rainy seasons,Garbage,1
+Garbage collection does not cover all households equally,Sanitation,5
+Dust from road repair work affecting houses,Pollution,2
+Temporary road fixes fail within weeks of implementation,Roads,4
+Waste collection vehicles are insufficient for this locality,Garbage,1
+Pollution caused by heavy trucks affects nearby residential colonies,Pollution,2
+Sanitation workers not maintaining cleanliness,Sanitation,5
+Road repair materials blocking lanes,Roads,4
+Residents experience sleep issues due to water pump noise,Water,7
+Garbage burning issue,Garbage,1
+Voltage drops affecting office equipment,Electricity,0
+Sanitation system collapse impacts drinking water safety,Sanitation,5
+Water supply lines are poorly mapped leading to frequent damage,Water,7
+Fuse keeps tripping in rainy season,Electricity,0
+Streetlight flickering at night,Electricity,0
+Overflowing dustbins near market,Garbage,1
+Noise from generators disturbing neighborhood,Pollution,2
+Fuse box damaged,Electricity,0
+Poor water pressure affects bathroom usage severely,Water,7
+Cattle blocking traffic movement,Stray Animals,6
+Lack of seating at bus stops,Public Transport,3
+Low water pressure in government buildings,Water,7
+Dust from demolition site causing allergies,Pollution,2
+Dense vehicle clusters elevate local emission concentration,Pollution,2
+Road width constraints force stop start movement,Roads,4
+Open burning of plastic waste,Pollution,2
+Stray dogs occupy bus stops causing inconvenience to commuters,Stray Animals,6
+Road markings are missing due to worn out surfaces,Roads,4
+Road work causing traffic jam,Roads,4
+Drivers ignoring pedestrian crossings,Public Transport,3
+Water supply is inconsistent across different times of day,Water,7
+No electricity in entire colony,Electricity,0
+Waste burning polluting surroundings,Pollution,2
+Waste collection points are poorly located causing inconvenience,Sanitation,5
+Stray dogs bark loudly during late night hours,Stray Animals,6
+Stray dogs forming packs near bus depot,Stray Animals,6
+Waste related pollution is impacting daily life,Pollution,2
+Bus not stopping at designated bus stop,Public Transport,3
+Street drains filled with plastic waste,Sanitation,5
+Buses overcrowded during weekends,Public Transport,3
+Sanitation failures impact school environments,Sanitation,5
+Industrial smoke causing respiratory issues,Pollution,2
+Broken road surfaces near residential areas are making daily commuting unsafe for motorists,Roads,4
+Water leakage near school entrance,Water,7
+Drivers not assisting differently-abled passengers,Public Transport,3
+Public sanitation services are underfunded and understaffed,Sanitation,5
+Streetlights off on major roads,Electricity,0
+Sewage water stagnant near temple,Sanitation,5
+Stray cows wandering in residential streets,Stray Animals,6
+Stray dogs lack vaccination leading to health hazards,Stray Animals,6
+Garbage truck skipped area,Garbage,1
+Sewage water stagnant near residential block,Sanitation,5
+Improper waste treatment is increasing pollution levels in surrounding neighborhoods,Pollution,2
+No proper drainage in colony,Sanitation,5
+Sanitation workers not using safety equipment,Sanitation,5
+No monitoring of stray animals in colony,Stray Animals,6
+Water pipelines are damaged during unrelated construction work,Water,7
+Bus breakdowns happening frequently,Public Transport,3
+Street littered with garbage and sewage,Sanitation,5
+Overloaded transformers frequently trip causing blackouts,Electricity,0
+Transport inefficiency impacts economic productivity,Public Transport,3
+Garbage collection delays are a recurring issue,Garbage,1
+Pipeline blockage causing low water supply,Water,7
+Electricity department response time is unsatisfactory,Electricity,0
+Power supply disrupted without notice,Electricity,0
+Unannounced water shutdowns cause inconvenience to residents,Water,7
+Traffic congestion due to road narrowing,Roads,4
+Transformer failure affecting area,Electricity,0
+Garbage piles near manholes worsen sewage backflow issues,Garbage,1
+Chemical smell spreading in residential area,Pollution,2
+Roads are not resurfaced regularly,Roads,4
+The water pump generates loud operational noise that disrupts sleep and rest patterns,Water,7
+Garbage piles remain after festival events,Garbage,1
+Stray dogs roaming near hospitals,Stray Animals,6
+Voltage drop prevents proper functioning of appliances,Electricity,0
+Noise pollution near hospital area,Pollution,2
+Water cuts affecting commercial complexes,Water,7
+Water complaint pending for long time,Water,7
+Stray dogs roaming near hospitals causing fear,Stray Animals,6
+No response to stray animal complaints,Stray Animals,6
+Power interruptions affect automated road management infrastructure,Electricity,0
+Electric poles block proper road widening projects,Electricity,0
+Roadside garbage obstructing traffic,Roads,4
+Road surface uneven after monsoon,Roads,4
+Tap water contains visible particles making it unsafe for consumption,Water,7
+Lack of proper information for routes,Public Transport,3
+Air pollution from generators affects indoor air quality,Pollution,2
+Voltage fluctuations causing hazards,Electricity,0
+Industrial waste dumping has degraded soil quality severely,Pollution,2
+Animals damage sanitation pipelines searching for food,Stray Animals,6
+Pollution control norms are not enforced on local industries,Pollution,2
+Pollution from stone crushing units spreads fine dust,Pollution,2
+Public toilets without maintenance,Sanitation,5
+Mechanical noise from the water pump is becoming unbearable during nighttime hours for residents,Water,7
+Water pollution due to domestic waste dumping,Pollution,2
+Street corners dirty due to uncollected waste,Sanitation,5
+Electric issues interrupt automated toll and traffic flow systems,Electricity,0
+Waste collection timing clashes with peak activity hours,Sanitation,5
+Road surface uneven after rains,Roads,4
+Odor from sewage line leaks in residential area,Pollution,2
+Roadside garbage obstructing lanes,Roads,4
+Poorly maintained valves cause water wastage,Water,7
+Power cut without information,Electricity,0
+Public sanitation infrastructure lacks maintenance,Sanitation,5
+Garbage collection delayed during holidays,Garbage,1
+Noise pollution from construction machinery in mornings,Pollution,2
+Stagnant water near bus stop,Sanitation,5
+Industrial fumes affecting neighborhood air quality,Pollution,2
+Water meter shows unusual consumption,Water,7
+Road edges damaged by waterlogging,Roads,4
+Road repair work causing extended traffic jams,Roads,4
+Traffic bottlenecks delay public transport vehicles,Public Transport,3
+Sanitation workers not collecting waste on time,Sanitation,5
+Water leakage contributes to road deterioration,Water,7
+Sanitation services do not cover all localities equally,Sanitation,5
+Water flow is insufficient for basic hygiene needs,Water,7
+Electric instability affects charging dependent transit reducing efficiency,Electricity,0
+Water pump vibrations are clearly audible inside houses and are causing continuous disturbance to residents,Water,7
+Noise pollution from construction near offices,Pollution,2
+Improper disposal of waste is impacting daily life,Garbage,1
+Street littered with wet and dry waste,Sanitation,5
+Public toilets lacking handwashing facilities,Sanitation,5
+Bus stop information boards missing,Public Transport,3
+Broken roads pose risks to senior citizens,Roads,4
+Persistent water pump noise affects indoor comfort,Water,7
+Garbage bin broken,Garbage,1
+Garbage piles are becoming permanent fixtures,Garbage,1
+Loud water pump vibrations are causing discomfort and anxiety among residents living close to it,Water,7
+Water supply resumes with air bursts damaging pipelines,Water,7
+Road surface damaged by heavy rainfall,Roads,4
+Dirty water from community taps,Water,7
+Residents complain about excessive water pump sound disturbing peaceful living,Water,7
+Sanitation workers absent in evening rounds,Sanitation,5
+Electrical load failures impact road tunnel ventilation systems,Electricity,0
+Water pressure drops drastically during peak usage hours every day,Water,7
+Stray cows grazing near road construction,Stray Animals,6
+Garbage bin missing,Garbage,1
+Uncollected waste causing bad smell,Pollution,2
+Animal herds slow down traffic significantly,Stray Animals,6
+Road repair material spilling on lanes,Roads,4
+Public toilets without proper maintenance,Sanitation,5
+Low voltage in offices,Electricity,0
+Water supply disrupted due to pipeline cleaning,Water,7
+Garbage problem near shops,Garbage,1
+The water pump produces continuous noise impacting residential peace,Water,7
+Dirty water from taps after rain,Water,7
+Water supply disrupted due to power outage,Water,7
+Animals roaming near hospital area,Stray Animals,6
+Damaged roads are affecting delivery services,Roads,4
+Power interruptions affect electric mobility adoption increasing combustion usage,Electricity,0
+Unstable power supply affects industrial equipment performance,Electricity,0
+The water pump generates excessive sound that disrupts normal household activities,Water,7
+Electric wires pass dangerously close to trees,Electricity,0
+Air pollution from industrial boilers,Pollution,2
+Drain water breeding mosquitoes,Sanitation,5
+No schedule boards at bus stops,Public Transport,3
+Stray dogs causing accidents at intersections,Stray Animals,6
+Air pollution from burning crop residue,Pollution,2
+Pollution spreads due to lack of integrated urban planning,Pollution,2
+Electric infrastructure planning ignores future demand growth,Electricity,0
+Residents are affected by loud water pump sounds daily,Water,7
+Accumulated waste near street lights,Garbage,1
+Waste disposal areas emit strong foul odors,Sanitation,5
+Water infrastructure cannot handle peak demand loads,Water,7
+Uncollected wet waste causing odor,Garbage,1
+Drainage blockage causing muddy water on road,Roads,4
+Dirty streets causing mosquito nuisance,Sanitation,5
+Street corners dirty with garbage,Sanitation,5
+Waste collection staff do not collect garbage from interior lanes,Garbage,1
+Overflowing bins causing insect breeding,Garbage,1
+Increased fuel combustion worsens environmental air conditions,Pollution,2
+Voltage spikes damaging ACs,Electricity,0
+No CCTV in buses for safety,Public Transport,3
+Dust from stone crushing units affecting schools,Pollution,2
+Electric cable broken,Electricity,0

classification/artifacts/tokenizer.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1c3bb446109f57871636dcbaf11730f886c37cbab2e72deb065ba0619617fefa
+size 851995

classification/artifacts/train.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

classification/artifacts/val.csv ADDED Viewed

	@@ -0,0 +1,523 @@

+text,label,label_id
+Poorly designed road curves increase accident risk,Roads,4
+Prolonged power failures during summer are making living conditions unbearable,Electricity,0
+Dogs sleeping on busy roads,Stray Animals,6
+Stray animals making loud noise at night,Stray Animals,6
+No water in entire colony,Water,7
+Decomposing organic waste is creating environmental pollution beyond sanitation issues,Pollution,2
+Open dumping of waste is causing soil and air pollution in nearby residential areas,Pollution,2
+Road repair work incomplete,Roads,4
+Bus terminals poorly lit at night,Public Transport,3
+Road surfaces are not designed for heavy vehicle load,Roads,4
+Frequent power interruptions affect electric road signage reliability,Electricity,0
+Pollution from vehicle exhaust accumulates in narrow streets,Pollution,2
+Open dumping of garbage in streets,Pollution,2
+Odor from sewage treatment plant,Pollution,2
+Garbage is accumulating near parks and playgrounds,Garbage,1
+Frequent accidents at junction of Oak and Pine,Roads,4
+Street water valves leaking after rain,Water,7
+Constant water pump noise interferes with peaceful residential living conditions,Water,7
+Stray goats damaging roadside plants,Stray Animals,6
+Voltage fluctuations affecting lights,Electricity,0
+Sewage overflow near transport hubs causes commuter distress,Sanitation,5
+Water cuts affecting schools and offices,Water,7
+Road widening work delayed,Roads,4
+No shelters at remote bus stops,Public Transport,3
+Sanitation failures contaminate nearby water sources,Sanitation,5
+No vaccination for stray animals in colony,Stray Animals,6
+Electric wires near trees causing hazard,Electricity,0
+Streetlights not functioning near school,Electricity,0
+Potholes near school entrance,Roads,4
+Water main repair causing water shortage,Water,7
+Residents face regular sleep disruption due to excessive water pump noise,Water,7
+Garbage dumping continues despite warning notices,Sanitation,5
+Polluted river water emits foul odor affecting nearby areas,Pollution,2
+Stray cattle block traffic lanes during peak hours,Stray Animals,6
+Bus stops not properly sheltered,Public Transport,3
+Wastewater stagnation seeps into water storage areas,Sanitation,5
+Stray cattle graze on roadside greenery damaging landscaping,Stray Animals,6
+Stray animals leaving waste near houses,Stray Animals,6
+Stray dogs attacking school children,Stray Animals,6
+Garbage collection vehicles skip this area frequently,Garbage,1
+Power supply interruptions affect hospitals and clinics nearby,Electricity,0
+Water pump sound pollution is affecting the quality of life of people living in surrounding apartments,Water,7
+Air quality worsens due to inefficient traffic flow patterns,Pollution,2
+Stray animals wander into construction sites creating hazards,Stray Animals,6
+Sanitation leaks affect nearby commercial areas,Sanitation,5
+Sanitation issues worsen waterborne disease risks,Sanitation,5
+Blocked drains causing water stagnation,Sanitation,5
+Road curvature issues force slow driving in high traffic zones,Roads,4
+Poorly designed road layouts force vehicles to idle longer increasing travel delays,Roads,4
+Improper compaction during construction is weakening road strength,Roads,4
+Overhead tank water not sufficient,Water,7
+Uncovered garbage piles pose serious health risks to residents,Sanitation,5
+Road conditions are worsening despite recent repairs,Roads,4
+Roadside encroachments reduce effective driving space,Roads,4
+Overflowing septic tanks near street,Sanitation,5
+Garbage remains uncollected after scheduled pickup times,Garbage,1
+Electric supply does not meet modern appliance requirements,Electricity,0
+Water pipeline damaged during excavation,Water,7
+Street drains not cleaned for weeks,Sanitation,5
+Overhead tank not refilled after maintenance,Water,7
+Dust from cement plant affecting residential area,Pollution,2
+Open burning of leaves and trash creating smoke,Pollution,2
+Inadequate water supply is affecting sanitation and hygiene in homes,Water,7
+Garbage piles emit strong odor and attract rodents due to delayed removal,Garbage,1
+Electric lines spark during rains creating fire hazards,Electricity,0
+Residents are unable to maintain quiet living conditions due to water pump noise,Water,7
+Road damaged due to heavy rain,Roads,4
+No water in community tank,Water,7
+Stray animals obstructing emergency vehicles,Stray Animals,6
+Garbage attracting mosquitoes and flies,Pollution,2
+Road surfaces peel off creating dangerous driving conditions,Roads,4
+Stray cattle feed on roadside waste creating health issues,Stray Animals,6
+Persistent water pump noise impacts mental well-being of residents,Water,7
+Bus lane blocked by parked trucks,Roads,4
+Cracked pavement reduces driving efficiency and increases fuel usage,Roads,4
+Poor connectivity to rural areas,Public Transport,3
+Road cracks widening after rains,Roads,4
+Buses not stopping at proper stops,Public Transport,3
+No water supply in Sarpavaram since morning,Water,7
+Road shoulder eroding,Roads,4
+Garbage collection frequency is inadequate,Garbage,1
+No electricity in entire street,Electricity,0
+No dedicated buses for women,Public Transport,3
+Garbage collection systems are failing in this zone,Garbage,1
+Frequent breakdowns during peak hours,Public Transport,3
+Electric supply interruptions impact food storage safety,Electricity,0
+Water pump breakdown near park,Water,7
+Insufficient water supply is creating severe inconvenience for large families,Water,7
+The water pump generates loud operational sounds disrupting rest,Water,7
+Road near bridge damaged,Roads,4
+Power outages prevent effective road signal synchronization during peak hours,Electricity,0
+Damaged roads increase vehicle maintenance costs,Roads,4
+Road near school has potholes,Roads,4
+Residents complain about loud water pump operation,Water,7
+Road construction debris narrows traffic lanes,Roads,4
+Overhead tank pump not working properly,Water,7
+Stray cattle rest under streetlights blocking visibility,Stray Animals,6
+Smoke from roadside burning affecting nearby homes,Pollution,2
+Smoke from tire burning polluting air,Pollution,2
+Stray dogs in residential streets making noise,Stray Animals,6
+Residents are troubled by water pump sound,Water,7
+Electric supply disruptions affect safety lighting reducing night time traffic efficiency,Electricity,0
+Stray dogs chasing joggers in parks,Stray Animals,6
+Power instability affects hospital infrastructure impacting sanitation and water use,Electricity,0
+Residents experience continuous irritation due to loud water pump vibrations,Water,7
+Power cuts during night,Electricity,0
+Low voltage in hospital affecting equipment,Electricity,0
+Dust from demolition affecting local market,Pollution,2
+Electric infrastructure repairs are delayed unnecessarily,Electricity,0
+Metro station cleanliness issues,Public Transport,3
+Water cuts affecting residents for multiple days,Water,7
+Damaged traffic signs causing confusion,Roads,4
+Garbage is scattered by animals because collection is irregular,Garbage,1
+Unclean seats and floors in buses,Public Transport,3
+DJ sound creating public nuisance,Pollution,2
+Stray dogs enter school premises causing panic among students,Stray Animals,6
+Waste bins not available in market area,Garbage,1
+Industrial effluents polluting pond water,Pollution,2
+Overflowing trash bins are spreading foul smells but the primary issue is improper garbage collection,Garbage,1
+Garbage disposal points are unmanaged and constantly overflowing,Garbage,1
+Water supply does not meet basic daily consumption needs,Water,7
+Water supply irregular after pipeline repair,Water,7
+Garbage not collected for over a week,Garbage,1
+Stray animals gather near roadside eateries creating mess,Stray Animals,6
+Smoke from vehicles affecting morning walkers,Pollution,2
+Open drain without cover,Sanitation,5
+Garbage scattered near community hall,Garbage,1
+Frequent voltage drop during evenings,Electricity,0
+Waste pollution poses long term health risks,Pollution,2
+Persistent water pump noise disrupts residents,Water,7
+No buses connecting new residential areas,Public Transport,3
+Waste buildup near water bodies contaminates local supply sources indirectly,Garbage,1
+Road repair work causing inconvenience,Roads,4
+Residents are disturbed by loud water pump sound,Water,7
+Traffic slowdowns intensify pollution concentration near residences,Pollution,2
+Electric meter malfunctioning,Electricity,0
+Residents report water pump sound disturbance,Water,7
+Public toilets without proper lighting,Sanitation,5
+Stray dogs making loud noise at night,Stray Animals,6
+Potholes causing accidents,Roads,4
+Stray animals causing traffic accidents,Stray Animals,6
+Damaged pavement causing accidents,Roads,4
+Vehicle idling near intersections raises particulate concentration levels,Pollution,2
+Poor road connectivity affects transit access,Public Transport,3
+Waste management issues are worsening over time,Garbage,1
+Noise pollution increases stress levels among residents,Pollution,2
+Residents experience irritation due to water pump noise,Water,7
+Industrial dust settles on homes causing cleanliness issues,Pollution,2
+Lack of awareness leads to mixing of wet and dry waste,Sanitation,5
+Open garbage near playground causing health hazard,Garbage,1
+Open dumping of waste is increasing environmental pollution,Garbage,1
+High voltage surges in colony,Electricity,0
+Pollution from unregulated industries harms environment,Pollution,2
+Contaminated water supply is increasing dependency on bottled water,Water,7
+Stray dogs fight over food causing injuries and noise,Stray Animals,6
+No water for drinking and cooking,Water,7
+Garbage disposal methods are outdated and ineffective,Garbage,1
+Water pollution from river effluents,Pollution,2
+Drain water stagnation,Sanitation,5
+Power instability disrupts functioning of smart road systems,Electricity,0
+Dogs barking at night causing disturbance,Stray Animals,6
+Sanitation blockages worsen during monsoon season causing flooding,Sanitation,5
+Damaged footpath causing inconvenience to pedestrians,Roads,4
+Poor air quality near traffic junction,Pollution,2
+Garbage pile near school,Garbage,1
+Waste dumping near school gate,Garbage,1
+Sewage water leaking onto streets,Sanitation,5
+Electric poles not properly grounded,Electricity,0
+Pollution from vehicle congestion impacts air quality daily,Pollution,2
+Stray dogs roam in packs increasing attack risks,Stray Animals,6
+No security cameras at bus stations,Public Transport,3
+The water pump produces disturbing sounds that interfere with peaceful living conditions in the locality,Water,7
+The water pump emits ongoing mechanical noise causing irritation,Water,7
+Electric meter not updating readings,Electricity,0
+Smoke from garbage burning near school,Pollution,2
+Buses without proper lighting at night,Public Transport,3
+Monkeys entering homes frequently,Stray Animals,6
+Waste collection disrupted after festival,Garbage,1
+The water pump generates continuous sound that penetrates walls and disturbs indoor peace,Water,7
+Water supply restoration takes excessively long after repairs,Water,7
+Potholes causing tire punctures in colony roads,Roads,4
+Stray animals causing hygiene issues in alleys,Stray Animals,6
+Odor from open sewage causing discomfort,Pollution,2
+Voltage fluctuations during evening hours,Electricity,0
+Electricity failures affect street lighting at night,Electricity,0
+Residents complain that water pump sound disrupts rest and relaxation,Water,7
+Dirty streets due to irregular cleaning,Sanitation,5
+Cats creating noise at night,Stray Animals,6
+Low pressure water supply prevents proper cleaning,Water,7
+Blocked drainage causing flooding near park,Sanitation,5
+Road near bridge uneven and dangerous,Roads,4
+Garbage dumping near markets creates unhygienic conditions,Sanitation,5
+Persistent water pump noise causes frustration and mental strain among residents,Water,7
+Waste decomposition is releasing strong pollutants into the air,Pollution,2
+Drain cleaning vehicle not coming,Sanitation,5
+Oil spills polluting street drains,Pollution,2
+No ramps in buses for wheelchairs,Public Transport,3
+Traffic signs missing on busy roads,Roads,4
+Stray animals fighting each other in streets,Stray Animals,6
+Stray cats entering residential buildings,Stray Animals,6
+Dirty streets near market area,Sanitation,5
+Overhead wires sagging dangerously,Electricity,0
+Stray cows wandering near schools,Stray Animals,6
+Water quality test failed,Water,7
+Excessive noise from generators violates permissible sound levels,Pollution,2
+Supply water contains excessive chlorine smell,Water,7
+Pollution from waste incineration spreads toxins,Pollution,2
+Garbage bins are broken and unusable forcing people to dump waste outside,Garbage,1
+Public sanitation facilities lack proper water supply,Sanitation,5
+Inconsistent water flow damages household water storage systems,Water,7
+Unreliable power disables water purification infrastructure intermittently,Electricity,0
+Power cuts affecting businesses,Electricity,0
+No night bus services available,Public Transport,3
+Odor from sewage backup in street,Pollution,2
+Traffic congestion near junction,Roads,4
+Drainage problem near shops,Sanitation,5
+Long term waste accumulation is degrading environmental quality,Pollution,2
+Residents express concern over long-term exposure to water pump noise pollution,Water,7
+Roadside erosion releases dust affecting respiratory health,Pollution,2
+Dust from construction debris affecting children,Pollution,2
+Uneven pavements disrupt public transport schedules,Roads,4
+Streetlights off on main road,Electricity,0
+Road surfaces have sunk creating deep depressions,Roads,4
+Air pollution from brick kiln operations,Pollution,2
+Old buses causing discomfort to passengers,Public Transport,3
+Overhead tank valve is malfunctioning,Water,7
+Streetlights not working in residential area,Electricity,0
+Power cuts during peak hours,Electricity,0
+Electric line damaged due to rain,Electricity,0
+Bus drivers not following traffic rules,Public Transport,3
+Drinking water tanker arrives late,Water,7
+Stray animals leaving waste on streets,Stray Animals,6
+Stray cattle sit on speed breakers causing visibility issues,Stray Animals,6
+Poor sanitation maintenance increases mosquito breeding,Sanitation,5
+Street littered with plastic bags,Garbage,1
+Garbage trucks not covering all streets,Garbage,1
+No emergency numbers displayed in buses,Public Transport,3
+Airborne particulate matter rises due to prolonged vehicle idling in traffic heavy corridors,Pollution,2
+Irregular water supply forces residents to rely on unsafe storage methods,Water,7
+Stray dogs disrupt morning walks in residential colonies,Stray Animals,6
+Voltage drops affect illuminated road signage clarity at night,Electricity,0
+Garbage trucks create spillage during transportation,Sanitation,5
+Electric wires exposed near playground,Electricity,0
+Garbage remains uncleared despite municipal schedules,Garbage,1
+Electricity department does not provide outage updates,Electricity,0
+Road near market has cracks,Roads,4
+Damaged road shoulders reduce usable driving space,Roads,4
+Pollution levels rise due to unmanaged waste decay,Pollution,2
+Plastic waste mixed with organic waste,Garbage,1
+Stray dogs forming packs near temples,Stray Animals,6
+Streetlight poles broken,Electricity,0
+Road surface slippery due to oil spillage,Roads,4
+Garbage bins are not sufficient for waste volume,Garbage,1
+Garbage disposal points are poorly managed and constantly overflowing,Garbage,1
+Stray cows blocking traffic on highways,Stray Animals,6
+Burning leaves and trash releases harmful pollutants into the air,Pollution,2
+Blocked drains causing flooding near park,Sanitation,5
+Power failures affect monitoring of traffic density resulting in unmanaged congestion,Electricity,0
+Long queues for public transport tickets,Public Transport,3
+Buses without functional horn or lights,Public Transport,3
+Uncontrolled pollution in urban area,Pollution,2
+Auto drivers misbehaving with passengers,Public Transport,3
+Electricity outages affect elevator operations in apartments,Electricity,0
+Electric infrastructure maintenance is irregular and insufficient,Electricity,0
+Fuse boxes not maintained,Electricity,0
+No asphalt layer on road,Roads,4
+Water supply interruptions affect hospitals and schools,Water,7
+Bus staff not enforcing safety measures,Public Transport,3
+Water leakage causes erosion around building foundations,Water,7
+No shelter homes for injured strays,Stray Animals,6
+Dirty drains causing mosquito nuisance,Sanitation,5
+Frequent power cuts during peak hours,Electricity,0
+Narrow road design leads to chronic congestion during working hours,Roads,4
+Drain clogged for days,Sanitation,5
+Stray animals defecate near homes causing hygiene problems,Stray Animals,6
+No segregation of wet and dry waste,Garbage,1
+Power cuts occur without any prior announcements,Electricity,0
+Poor maintenance of public transport vehicles,Public Transport,3
+Drain water flowing continuously,Sanitation,5
+Road surfaces have lost structural integrity,Roads,4
+Water contamination in handpump near residential block,Water,7
+Power cuts affecting hospitals,Electricity,0
+Garbage from nearby markets is dumped irresponsibly in residential zones,Garbage,1
+Odor pollution from garbage dumping site,Pollution,2
+Buses not stopping at requested locations,Public Transport,3
+Water supply irregular in residential colony,Water,7
+Environmental pollution is increasing due to lack of proper waste treatment,Pollution,2
+Electricity department fails to upgrade outdated infrastructure,Electricity,0
+Street littered with paper and plastic waste,Garbage,1
+Noise from night-time clubs disturbing residents,Pollution,2
+Open defecation near residential area,Sanitation,5
+Electric failures affect emergency response systems across major road corridors,Electricity,0
+Open garbage near hospital creating health hazard,Garbage,1
+Water pollution due to sewage leakage,Pollution,2
+Public dustbin overflowing,Garbage,1
+Multiple pollution sources in area,Pollution,2
+Stray cats spreading diseases in markets,Stray Animals,6
+Road shoulders are damaged making pedestrian movement unsafe,Roads,4
+Transformer making noise,Electricity,0
+Bus drivers refusing to stop at requested locations,Public Transport,3
+No proper animal control in residential areas,Stray Animals,6
+Smoke from crematorium affecting local area,Pollution,2
+Stray dogs near railway station scaring passengers,Stray Animals,6
+Garbage dumping near drains causes sewage overflow during rainfall,Garbage,1
+Drain overflow creating traffic issue,Sanitation,5
+Dirty drains causing waterlogging during monsoon,Sanitation,5
+Garbage not collected from high-rise buildings,Garbage,1
+Garbage collection irregular in park area,Garbage,1
+Supply water contains excessive sediment affecting water filters,Water,7
+Sanitation backflow damages road surfaces,Sanitation,5
+Stray animals blocking sidewalks,Stray Animals,6
+Water supply stops abruptly without any official communication,Water,7
+Stray animals damage parked vehicles while searching for food,Stray Animals,6
+Overflowing bins near bus stops create hygiene and commuter discomfort,Garbage,1
+Buses not running on weekends,Public Transport,3
+Bus drivers ignoring signals,Public Transport,3
+Incomplete road projects create traffic bottlenecks,Roads,4
+Electricity outages occur frequently during weekends,Electricity,0
+Traffic signal timing not optimized,Roads,4
+Buses not following GPS routes,Public Transport,3
+Stray animals sleeping on pavements,Stray Animals,6
+Drain blockage causing flooding,Sanitation,5
+Electricity supply issues disrupt online education,Electricity,0
+Polluted soil contains hazardous chemicals,Pollution,2
+The water pump noise remains constant without breaks causing ongoing stress to nearby households,Water,7
+Water overflow near house,Water,7
+Garbage collection vehicles do not arrive on time,Garbage,1
+Trapped traffic emits concentrated pollutants impacting nearby pedestrians,Pollution,2
+Road surface uneven near playground,Roads,4
+Poor road quality is forcing vehicles to take long detours,Roads,4
+Long term waste dumping is degrading environmental health in the locality,Pollution,2
+Stray cats entering homes and damaging property,Stray Animals,6
+Traffic signals malfunctioning,Roads,4
+Odor from sewage backup in residential area,Pollution,2
+Stray dogs gather near food waste sites,Stray Animals,6
+Detour related fuel burn increases atmospheric contamination,Pollution,2
+No announcements for stops for visually impaired,Public Transport,3
+Garbage disposal practices need urgent improvement,Garbage,1
+Uncollected household garbage near shops,Garbage,1
+Pollution from heavy vehicles affects residential zones,Pollution,2
+Noise from metro construction disturbing residents,Pollution,2
+Waste bins are not covered allowing animals to scatter garbage,Garbage,1
+Garbage is not collected daily leading to foul smells and health concerns,Sanitation,5
+Loose electrical connections cause repeated outages,Electricity,0
+Electric shock from pole,Electricity,0
+Smoke from roadside eateries causing health issues,Pollution,2
+Road repair complaint ignored,Roads,4
+Broken roads increase travel stress and fatigue,Roads,4
+Waste disposal practices increase environmental hazards,Sanitation,5
+Decaying waste releases gases that significantly degrade air quality,Pollution,2
+Electric outage since yesterday,Electricity,0
+Public sanitation services lack accountability,Sanitation,5
+Drivers not following assigned routes,Public Transport,3
+Buses overcrowded with standing passengers,Public Transport,3
+Water supply does not meet the needs of growing population in the area,Water,7
+Frequent fare disputes in buses,Public Transport,3
+Stray animals causing sanitation issues,Stray Animals,6
+Accumulated waste attracts insects and rodents,Sanitation,5
+Sewage water flowing on road,Sanitation,5
+Air pollution from construction dust near hospital,Pollution,2
+Drainage overflow in street,Sanitation,5
+Sanitation workers do not report for duty regularly,Sanitation,5
+Water pressure drops affect sanitation and hygiene practices,Water,7
+Overflowing water tanks cause wastage due to faulty valves,Water,7
+Animal interference increases sanitation workload,Stray Animals,6
+Overflowing drains near school,Sanitation,5
+Garbage is piling up near bus stops and public areas,Garbage,1
+Open drains near houses,Sanitation,5
+Power outages occur daily during peak usage hours without explanation,Electricity,0
+Sewage stagnation creates unhygienic living conditions,Sanitation,5
+Odor from chemical treatment plant near road,Pollution,2
+Stray goats eating flowers in gardens,Stray Animals,6
+Odor from garbage dump near residential area,Pollution,2
+Animals crossing roads increase collision risk,Stray Animals,6
+Speed bumps not visible at night,Roads,4
+Odor from sewage near commercial complex,Pollution,2
+Air pollution aggravates asthma and respiratory conditions,Pollution,2
+Drain overflow near hospital,Sanitation,5
+Water management inefficiency impacts urban resilience,Water,7
+Dumped waste is polluting nearby agricultural land,Pollution,2
+Street taps dry after pipeline maintenance,Water,7
+Organic waste decomposition is polluting the air and attracting disease carrying insects,Pollution,2
+Lack of municipal response to stray animal complaints is concerning,Stray Animals,6
+Water meters show abnormal readings despite limited water usage,Water,7
+Uneven road surface,Roads,4
+Waste disposal sites attract stray animals,Sanitation,5
+Industrial pollution affects nearby residential quality of life,Pollution,2
+Electric lines are exposed and unsafe in public areas,Electricity,0
+Public toilets lacking maintenance schedule,Sanitation,5
+Ongoing water pump vibrations are creating a persistent nuisance for families living nearby,Water,7
+Water tankers charge high prices due to municipal shortages,Water,7
+Dogs entering houses frequently,Stray Animals,6
+Water pipeline leakage near main road,Water,7
+Waste segregation is not practiced consistently,Garbage,1
+Street corners full of mixed garbage,Garbage,1
+Streetlight poles corroded,Electricity,0
+Buses not following scheduled intervals,Public Transport,3
+Power failures disrupt home medical equipment usage,Electricity,0
+Road surfaces are damaged by heavy construction vehicle movement,Roads,4
+Air pollution from coal transport trucks,Pollution,2
+Garbage collection trucks not available,Garbage,1
+Stray animals knocking over dustbins,Stray Animals,6
+Construction debris dumped illegally,Pollution,2
+Pollution caused by waste decay is affecting nearby residential comfort,Pollution,2
+Damaged culvert causing road erosion,Roads,4
+Waste breakdown affects surrounding environmental conditions,Pollution,2
+Stray goats wandering on highways,Stray Animals,6
+Garbage is often burned causing air pollution,Sanitation,5
+Power outage affecting local shops,Electricity,0
+Sanitation neglect increases long term infrastructure damage,Sanitation,5
+Street light flickering,Electricity,0
+Poor road connectivity increases travel time and logistical inefficiency,Roads,4
+Water contamination from untreated sewage,Pollution,2
+Garbage piles draw stray dogs increasing public safety concerns,Garbage,1
+Water pump noise disturbing residents,Water,7
+Stray animals disrupt peaceful living in residential areas,Stray Animals,6
+Garbage collection delayed during rainy season,Garbage,1
+Residents complain about persistent water pump noise,Water,7
+Road maintenance work is delayed for months without explanation,Roads,4
+Stray goats entering parks,Stray Animals,6
+Road surface cracks widening,Roads,4
+Bus stations without toilets or drinking water,Public Transport,3
+Traffic signals not functioning at major intersection,Roads,4
+Road network gaps increase dependency on longer travel routes,Roads,4
+Electric poles damaged due to storm,Electricity,0
+Power cuts disrupting businesses,Electricity,0
+Stray animals lack proper shelters leading to street occupation,Stray Animals,6
+Stray cattle wander into marketplaces creating safety hazards,Stray Animals,6
+Road near bus stop damaged,Roads,4
+Improper waste management is causing resident dissatisfaction,Garbage,1
+Garbage remains scattered after market hours,Garbage,1
+Industrial wastewater discharge pollutes groundwater sources,Pollution,2
+Garbage accumulation creates breeding grounds for pests,Sanitation,5
+Road full of potholes near bus stand,Roads,4
+Waste management practices do not meet basic standards,Sanitation,5
+Odor from poultry market affecting houses,Pollution,2
+Overhead tank overflowing constantly,Water,7
+Sewage pipe damaged,Sanitation,5
+Smoke from cooking chimneys in dense areas,Pollution,2
+Dirty drains causing street flooding,Sanitation,5
+Water tank cleaning required,Water,7
+Garbage lying near drain,Garbage,1
+Drivers not issuing proper receipts,Public Transport,3
+Water tanker delivery inconsistent,Water,7
+Suspended dust particles from road surfaces are contributing to respiratory discomfort,Pollution,2
+Lack of buses in suburban areas,Public Transport,3
+Stray dogs guard territories aggressively near houses,Stray Animals,6
+Stagnant water near public park,Sanitation,5
+Road near market slippery after rain,Roads,4
+Sanitation workers not patrolling community areas,Sanitation,5
+Water pipes frequently clog causing supply interruptions,Water,7
+No electricity in residential block,Electricity,0
+Odor from leather tanning unit near river,Pollution,2
+Noise pollution from nearby nightclub,Pollution,2
+Water mains corrode causing supply issues,Water,7
+Drain water entering homes,Sanitation,5
+Water contamination reported in residential area,Water,7
+Water supply pressure is too weak to reach upper floors of buildings,Water,7
+Voltage drop in colony during night,Electricity,0
+Burning of garbage creating toxic smoke,Pollution,2
+Garbage pile near street corner attracting rats,Garbage,1
+Road shoulder erosion limits usable space leading to congestion,Roads,4
+Damaged roads near hospitals are affecting ambulance movement,Roads,4
+Potholes near shopping complex,Roads,4
+Road surface dust contributes to respiratory discomfort,Roads,4
+Water infrastructure repairs lack proper supervision,Water,7
+No proper waiting areas at bus terminals,Public Transport,3
+Garbage scattered by stray animals,Garbage,1
+Garbage not segregated in bins,Garbage,1
+Water tanker service not available on time,Water,7
+Dust from open construction sites affecting market,Pollution,2
+Air pollution increases hospital visits for breathing issues,Pollution,2
+Public toilets locked or inaccessible,Sanitation,5
+Overflowing sewage near commercial area,Sanitation,5
+Electric outages halt automated water distribution scheduling systems,Electricity,0
+The water pump emits a harsh mechanical sound that causes constant irritation to residents,Water,7
+Bad road near hospital,Roads,4
+Pollution affecting quality of life,Pollution,2
+Water supply timing not communicated,Water,7
+Water pipelines lack proper insulation and protection,Water,7
+Excessive vehicle emissions in this area have significantly reduced air quality levels,Pollution,2
+Stray animals cause night time disturbances near homes,Stray Animals,6
+Bus drivers not obeying traffic signals,Public Transport,3
+Sanitation services are poorly monitored,Sanitation,5
+No separate buses for students,Public Transport,3
+Temporary road repairs wash away during rains,Roads,4
+Noise pollution from factories disturbs nearby residents,Pollution,2
+Stray animals affect commuter comfort,Stray Animals,6
+Damaged roads force vehicles to take longer detours increasing fuel consumption,Roads,4
+Garbage has been left unattended near public places,Garbage,1
+Sewage leaks enter drainage and road systems,Sanitation,5
+Improper garbage disposal affects nearby residential buildings,Sanitation,5
+Garbage heaps are visible across multiple streets,Garbage,1
+Electric supply disruptions force manual traffic handling causing congestion,Electricity,0
+Electric infrastructure failures disrupt adaptive traffic control systems,Electricity,0
+Sanitation complaints are not addressed promptly by authorities,Sanitation,5
+Garbage dumped in public spaces is affecting cleanliness,Garbage,1
+Smoke from tire burning in industrial area,Pollution,2
+Damaged guardrail causing accident,Roads,4
+Low water pressure in newly built area,Water,7
+Irregular water supply during festival season,Water,7
+Unregulated tanker water sources raise safety concerns,Water,7
+Water from taps has unusual odor,Water,7
+Accumulated waste near bus stops,Garbage,1
+Street water valves leaking,Water,7
+Voltage spikes affecting ACs,Electricity,0
+Smoke from burning tires on roadside,Pollution,2
+Stray goats wandering in playgrounds,Stray Animals,6
+Stray cattle block roads during peak hours disrupting traffic flow,Stray Animals,6
+Unfriendly behavior from bus staff,Public Transport,3
+Water supply infrastructure expansion has not kept pace with growth,Water,7
+The loud humming of the water pump causes discomfort throughout the day and night,Water,7
+Road construction debris restricts lane capacity,Roads,4
+Streetlight malfunction causing darkness,Electricity,0
+Water supply disrupted due to civic work,Water,7
+Dirty water flowing from street taps,Water,7
+Pollution from heavy machinery continues throughout night,Pollution,2
+Stray animals causing traffic delays,Stray Animals,6
+Industrial waste discharged into river,Pollution,2
+Electric poles with broken cross arms,Electricity,0
+Improper waste dumping worsens underground sanitation congestion,Garbage,1
+Water supply schedules change without notification,Water,7

classification/bert_classify.py ADDED Viewed

	@@ -0,0 +1,164 @@

+# =========================================================
+# BERT MODEL — CATEGORY CLASSIFICATION (ENGLISH)
+# =========================================================
+import os
+import re
+import torch
+import pickle
+from transformers import BertForSequenceClassification
+# ── Path config ───────────────────────────────────────────
+BASE_DIR     = os.path.dirname(os.path.abspath(__file__))
+ARTIFACT_DIR = os.path.join(BASE_DIR, "artifacts")
+MODEL_DIR    = os.path.join(ARTIFACT_DIR, "bert_model")
+MAX_LENGTH   = 128   # FIX: was 100 — aligned with IG explainer and indic module
+# ── Load artifacts ────────────────────────────────────────
+with open(os.path.join(ARTIFACT_DIR, "tokenizer.pkl"), "rb") as f:
+    tokenizer = pickle.load(f)
+with open(os.path.join(ARTIFACT_DIR, "label_encoder.pkl"), "rb") as f:
+    label_encoder = pickle.load(f)
+model = BertForSequenceClassification.from_pretrained(
+    MODEL_DIR, local_files_only=True
+)
+model.eval()
+# ── Edge-case constants ───────────────────────────────────
+LABEL_WORDS = {
+    "water", "electricity", "roads", "garbage",
+    "sanitation", "pollution", "transport", "animals",
+}
+NON_GRIEVANCE_PHRASES = {
+    "hello", "hi", "hi there", "hey", "hey there",
+    "good morning", "good afternoon", "good evening", "good day",
+    "greetings", "namaste", "how are you", "how are you doing",
+    "hope you are doing well", "hope everything is fine",
+    "just checking in", "nice to meet you", "long time no see",
+    "good weather", "nice weather", "weather is nice", "weather is good",
+    "it is a sunny day", "it is raining today", "pleasant weather",
+    "cool weather today", "hot weather today", "cold weather today",
+    "it is a good day", "everything is fine", "all good", "no issues",
+    "no problem", "things are okay", "everything looks good",
+    "nothing to complain", "all services are working",
+    "thank you", "thanks", "thanks a lot", "thank you very much",
+    "appreciate it", "appreciate your help", "great work", "good job",
+    "well done", "excellent service", "for your information",
+    "just informing", "sharing information", "today is a holiday",
+    "office opens at 10 am", "school reopens next week",
+    "meeting scheduled tomorrow", "okay", "ok", "alright", "fine",
+    "cool", "great", "nice", "regards", "best regards", "with regards",
+    "kind regards", "thank you and regards", "thank you very much sir",
+    "test", "testing", "demo", "sample text", "random text",
+    "🙂", "👍", "🙏", "😂", "🔥", "!!!", "???",
+}
+# ── Text cleaning ─────────────────────────────────────────
+def clean_text(text: str) -> str:
+    text = str(text)
+    text = re.sub(r"<.*?>", " ", text)
+    # FIX: do NOT strip non-ASCII here — this module receives English
+    # only (language detection in main.py routes correctly), but
+    # stripping non-ASCII would silently corrupt any mis-routed Indic text.
+    # Keep only the HTML-strip; whitespace normalisation is sufficient.
+    text = re.sub(r"\s+", " ", text).strip()
+    return text
+# ── Input validation ──────────────────────────────────────
+def validate_input(text: str):
+    if not text or not text.strip():
+        return "empty_text"
+    text_l = text.strip().lower()
+    if len(text_l) < 10:
+        return "too_short"
+    if len(text_l.split()) < 3:
+        return "too_few_words"
+    if text_l in LABEL_WORDS:
+        return "label_only"
+    if text_l in NON_GRIEVANCE_PHRASES:
+        return "non_grievance_text"
+    return None
+# ── Predict ───────────────────────────────────────────────
+def predict(
+    text: str,
+    input_ids=None,       # O3: pre-tokenised tensor from main.py
+    attention_mask=None,  # O3: pre-tokenised tensor from main.py
+) -> dict:
+    """
+    Predict grievance category for English text.
+    Args:
+        text           : Raw input string (always required for validation).
+        input_ids      : Optional pre-tokenised tensor (1, seq_len).
+                         When provided by main.py the internal tokenisation
+                         step is skipped — eliminates duplicate tokenisation.
+        attention_mask : Required when input_ids is provided.
+    Returns dict with keys: status, category, confidence, class_index.
+    """
+    # 1. Rule-based validation (always on raw text)
+    reason = validate_input(text)
+    if reason:
+        return {
+            "status":      "failed",
+            "reason":      reason,
+            "category":    None,
+            "confidence":  0.0,
+            "class_index": None,
+        }
+    # 2. Clean text for model consumption
+    cleaned = clean_text(text)
+    # 3. O3: use pre-tokenised tensors if supplied; otherwise tokenise now.
+    #    padding=False — single-string inference needs no padding;
+    #    avoids [PAD] tokens appearing in IG attributions.
+    if input_ids is None:
+        enc = tokenizer(
+            cleaned,
+            return_tensors="pt",
+            truncation=True,
+            padding=False,
+            max_length=MAX_LENGTH,
+        )
+        input_ids      = enc["input_ids"]
+        attention_mask = enc["attention_mask"]
+    # 4. Forward pass
+    with torch.no_grad():
+        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
+    probs      = torch.softmax(outputs.logits, dim=1)
+    conf, pred = torch.max(probs, dim=1)
+    confidence     = conf.item()
+    predicted_index = pred.item()
+    # 5. Confidence gate
+    if confidence < 0.30:
+        return {
+            "status":      "success",
+            "reason":      "low_confidence",
+            "category":    "Other",
+            "confidence":  round(confidence, 4),
+            "class_index": predicted_index,
+        }
+    label = label_encoder.inverse_transform([predicted_index])[0]
+    return {
+        "status":      "success",
+        "category":    label,
+        "confidence":  round(confidence, 4),
+        "class_index": predicted_index,
+    }
+def get_model_and_tokenizer():
+    return model, tokenizer

classification/bert_model.py ADDED Viewed

	@@ -0,0 +1,417 @@

+# =========================================================
+# BERT PREPROCESSING + TRAINING + ARTIFACT GENERATION
+# =========================================================
+import os
+import re
+import pickle
+import pandas as pd
+import numpy as np
+import torch
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import LabelEncoder
+from sklearn.metrics import accuracy_score, f1_score, balanced_accuracy_score, matthews_corrcoef
+from transformers import (
+    BertTokenizer,
+    BertForSequenceClassification,
+    Trainer,
+    TrainingArguments
+)
+from torch.utils.data import Dataset
+# ---------------------------------------------------------
+# CONFIG
+# ---------------------------------------------------------
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+DATA_PATH = os.path.join(BASE_DIR, "train.csv")
+print("📄 Loading dataset from:", DATA_PATH)   # CHANGE if needed
+ARTIFACT_DIR = "classification/artifacts"
+MODEL_DIR = f"{ARTIFACT_DIR}/bert_model"
+MAX_LENGTH = 100
+EPOCHS = 3
+BATCH_SIZE = 16
+LEARNING_RATE = 2e-5
+os.makedirs(ARTIFACT_DIR, exist_ok=True)
+# ---------------------------------------------------------
+# 1. LOAD DATA
+# ---------------------------------------------------------
+df = pd.read_csv(DATA_PATH)
+df = df[['text', 'label']]
+df.dropna(inplace=True)
+df.drop_duplicates(inplace=True)
+# ---------------------------------------------------------
+# 2. CLEAN TEXT (BERT SAFE)
+# ---------------------------------------------------------
+def clean_text(text):
+    text = str(text)
+    text = re.sub(r"<.*?>", " ", text)
+    text = re.sub(r"[^\x00-\x7F]+", " ", text)
+    text = re.sub(r"\s+", " ", text).strip()
+    return text
+df['text'] = df['text'].apply(clean_text)
+# ---------------------------------------------------------
+# 3. LABEL ENCODING
+# ---------------------------------------------------------
+label_encoder = LabelEncoder()
+df['label_id'] = label_encoder.fit_transform(df['label'])
+label_map = dict(zip(label_encoder.classes_,
+                     label_encoder.transform(label_encoder.classes_)))
+# SAVE LABEL ENCODER & MAP
+with open(f"{ARTIFACT_DIR}/label_encoder.pkl", "wb") as f:
+    pickle.dump(label_encoder, f)
+with open(f"{ARTIFACT_DIR}/label_map.pkl", "wb") as f:
+    pickle.dump(label_map, f)# =========================================================
+# BERT PREPROCESSING + TRAINING + ARTIFACT GENERATION
+# =========================================================
+import os
+import re
+import pickle
+import pandas as pd
+import numpy as np
+import torch
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import LabelEncoder
+from sklearn.metrics import (
+    accuracy_score,
+    f1_score,
+    balanced_accuracy_score,
+    matthews_corrcoef
+)
+from transformers import (
+    BertTokenizer,
+    BertForSequenceClassification,
+    Trainer,
+    TrainingArguments
+)
+from torch.utils.data import Dataset
+# ---------------------------------------------------------
+# PATH CONFIG (WINDOWS SAFE)
+# ---------------------------------------------------------
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+DATA_PATH = os.path.join(BASE_DIR, "train.csv")
+ARTIFACT_DIR = os.path.join(BASE_DIR, "artifacts")
+MODEL_DIR = os.path.join(ARTIFACT_DIR, "bert_model")
+MAX_LENGTH = 100
+EPOCHS = 3
+BATCH_SIZE = 16
+LEARNING_RATE = 2e-5
+os.makedirs(ARTIFACT_DIR, exist_ok=True)
+# ---------------------------------------------------------
+# 1. LOAD DATA
+# ---------------------------------------------------------
+print(f"📄 Loading dataset from: {DATA_PATH}")
+df = pd.read_csv(DATA_PATH)
+df = df[['text', 'label']]
+df.dropna(inplace=True)
+df.drop_duplicates(inplace=True)
+# ---------------------------------------------------------
+# 2. CLEAN TEXT (BERT SAFE)
+# ---------------------------------------------------------
+def clean_text(text):
+    text = str(text)
+    text = re.sub(r"<.*?>", " ", text)
+    text = re.sub(r"[^\x00-\x7F]+", " ", text)
+    text = re.sub(r"\s+", " ", text).strip()
+    return text
+df["text"] = df["text"].apply(clean_text)
+# ---------------------------------------------------------
+# 3. LABEL ENCODING
+# ---------------------------------------------------------
+label_encoder = LabelEncoder()
+df["label_id"] = label_encoder.fit_transform(df["label"])
+label_map = dict(zip(label_encoder.classes_,
+                     label_encoder.transform(label_encoder.classes_)))
+# Save label artifacts
+with open(os.path.join(ARTIFACT_DIR, "label_encoder.pkl"), "wb") as f:
+    pickle.dump(label_encoder, f)
+with open(os.path.join(ARTIFACT_DIR, "label_map.pkl"), "wb") as f:
+    pickle.dump(label_map, f)
+NUM_LABELS = len(label_map)
+print(f"✅ Number of classes: {NUM_LABELS}")
+# ---------------------------------------------------------
+# 4. TRAIN / VAL / TEST SPLIT
+# ---------------------------------------------------------
+train_df, temp_df = train_test_split(
+    df,
+    test_size=0.30,
+    stratify=df["label_id"],
+    random_state=42
+)
+val_df, test_df = train_test_split(
+    temp_df,
+    test_size=0.50,
+    stratify=temp_df["label_id"],
+    random_state=42
+)
+# Save processed splits
+train_df.to_csv(os.path.join(ARTIFACT_DIR, "train.csv"), index=False)
+val_df.to_csv(os.path.join(ARTIFACT_DIR, "val.csv"), index=False)
+test_df.to_csv(os.path.join(ARTIFACT_DIR, "test.csv"), index=False)
+# ---------------------------------------------------------
+# 5. TOKENIZER
+# ---------------------------------------------------------
+tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
+with open(os.path.join(ARTIFACT_DIR, "tokenizer.pkl"), "wb") as f:
+    pickle.dump(tokenizer, f)
+# ---------------------------------------------------------
+# 6. TORCH DATASET
+# ---------------------------------------------------------
+class GrievanceDataset(Dataset):
+    def __init__(self, texts, labels):
+        self.encodings = tokenizer(
+            list(texts),
+            truncation=True,
+            padding=True,
+            max_length=MAX_LENGTH
+        )
+        self.labels = list(labels)
+    def __getitem__(self, idx):
+        item = {k: torch.tensor(v[idx]) for k, v in self.encodings.items()}
+        item["labels"] = torch.tensor(self.labels[idx])
+        return item
+    def __len__(self):
+        return len(self.labels)
+train_dataset = GrievanceDataset(train_df["text"], train_df["label_id"])
+val_dataset   = GrievanceDataset(val_df["text"], val_df["label_id"])
+test_dataset  = GrievanceDataset(test_df["text"], test_df["label_id"])
+# ---------------------------------------------------------
+# 7. MODEL
+# ---------------------------------------------------------
+model = BertForSequenceClassification.from_pretrained(
+    "bert-base-uncased",
+    num_labels=NUM_LABELS
+)
+# ---------------------------------------------------------
+# 8. METRICS
+# ---------------------------------------------------------
+def compute_metrics(eval_pred):
+    logits, labels = eval_pred
+    preds = np.argmax(logits, axis=1)
+    return {
+        "accuracy": accuracy_score(labels, preds),
+        "balanced_accuracy": balanced_accuracy_score(labels, preds),
+        "f1_weighted": f1_score(labels, preds, average="weighted"),
+        "mcc": matthews_corrcoef(labels, preds)
+    }
+# ---------------------------------------------------------
+# 9. TRAINING
+# ---------------------------------------------------------
+training_args = TrainingArguments(
+    output_dir=os.path.join(ARTIFACT_DIR, "results"),
+    learning_rate=LEARNING_RATE,
+    per_device_train_batch_size=BATCH_SIZE,
+    per_device_eval_batch_size=BATCH_SIZE,
+    num_train_epochs=EPOCHS,
+    weight_decay=0.01,
+    logging_steps=100,
+    save_strategy="no",
+    report_to="none"
+)
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=train_dataset,
+    eval_dataset=val_dataset,
+    compute_metrics=compute_metrics
+)
+trainer.train()
+# ---------------------------------------------------------
+# 10. FINAL TEST EVALUATION
+# ---------------------------------------------------------
+predictions = trainer.predict(test_dataset)
+y_true = predictions.label_ids
+y_pred = np.argmax(predictions.predictions, axis=1)
+print("\n===== FINAL TEST METRICS =====")
+print(f"Accuracy          : {accuracy_score(y_true, y_pred):.4f}")
+print(f"Balanced Accuracy : {balanced_accuracy_score(y_true, y_pred):.4f}")
+print(f"Weighted F1       : {f1_score(y_true, y_pred, average='weighted'):.4f}")
+print(f"MCC               : {matthews_corrcoef(y_true, y_pred):.4f}")
+# ---------------------------------------------------------
+# 11. SAVE TRAINED MODEL
+# ---------------------------------------------------------
+model.save_pretrained(MODEL_DIR)
+print("\n✅ PREPROCESSING + TRAINING COMPLETED SUCCESSFULLY")
+NUM_LABELS = len(label_map)
+# ---------------------------------------------------------
+# 4. TRAIN / VAL / TEST SPLIT
+# ---------------------------------------------------------
+train_df, temp_df = train_test_split(
+    df, test_size=0.30, stratify=df['label_id'], random_state=42
+)
+val_df, test_df = train_test_split(
+    temp_df, test_size=0.50, stratify=temp_df['label_id'], random_state=42
+)
+# SAVE PREPROCESSED SPLITS
+train_df.to_csv(f"{ARTIFACT_DIR}/train.csv", index=False)
+val_df.to_csv(f"{ARTIFACT_DIR}/val.csv", index=False)
+test_df.to_csv(f"{ARTIFACT_DIR}/test.csv", index=False)
+# ---------------------------------------------------------
+# 5. TOKENIZER
+# ---------------------------------------------------------
+tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
+# SAVE TOKENIZER
+with open(f"{ARTIFACT_DIR}/tokenizer.pkl", "wb") as f:
+    pickle.dump(tokenizer, f)
+# ---------------------------------------------------------
+# 6. DATASET CLASS
+# ---------------------------------------------------------
+class GrievanceDataset(Dataset):
+    def __init__(self, texts, labels):
+        self.encodings = tokenizer(
+            list(texts),
+            truncation=True,
+            padding=True,
+            max_length=MAX_LENGTH
+        )
+        self.labels = list(labels)
+    def __getitem__(self, idx):
+        item = {k: torch.tensor(v[idx]) for k, v in self.encodings.items()}
+        item["labels"] = torch.tensor(self.labels[idx])
+        return item
+    def __len__(self):
+        return len(self.labels)
+train_dataset = GrievanceDataset(train_df['text'], train_df['label_id'])
+val_dataset   = GrievanceDataset(val_df['text'], val_df['label_id'])
+test_dataset  = GrievanceDataset(test_df['text'], test_df['label_id'])
+# ---------------------------------------------------------
+# 7. MODEL
+# ---------------------------------------------------------
+model = BertForSequenceClassification.from_pretrained(
+    "bert-base-uncased",
+    num_labels=NUM_LABELS
+)
+# ---------------------------------------------------------
+# 8. METRICS
+# ---------------------------------------------------------
+def compute_metrics(eval_pred):
+    logits, labels = eval_pred
+    preds = np.argmax(logits, axis=1)
+    return {
+        "accuracy": accuracy_score(labels, preds),
+        "balanced_accuracy": balanced_accuracy_score(labels, preds),
+        "f1": f1_score(labels, preds, average="weighted"),
+        "mcc": matthews_corrcoef(labels, preds)
+    }
+# ---------------------------------------------------------
+# 9. TRAINING
+# ---------------------------------------------------------
+training_args = TrainingArguments(
+    output_dir=f"{ARTIFACT_DIR}/results",
+    learning_rate=LEARNING_RATE,
+    per_device_train_batch_size=BATCH_SIZE,
+    per_device_eval_batch_size=BATCH_SIZE,
+    num_train_epochs=EPOCHS,
+    weight_decay=0.01,
+    logging_steps=100,
+    save_strategy="no",
+    report_to="none"
+)
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=train_dataset,
+    eval_dataset=val_dataset,
+    tokenizer=tokenizer,
+    compute_metrics=compute_metrics
+)
+trainer.train()
+# ---------------------------------------------------------
+# 10. FINAL TEST EVALUATION
+# ---------------------------------------------------------
+predictions = trainer.predict(test_dataset)
+y_true = predictions.label_ids
+y_pred = np.argmax(predictions.predictions, axis=1)
+print("\n===== FINAL TEST METRICS =====")
+print(f"Accuracy          : {accuracy_score(y_true, y_pred):.4f}")
+print(f"Balanced Accuracy : {balanced_accuracy_score(y_true, y_pred):.4f}")
+print(f"Weighted F1       : {f1_score(y_true, y_pred, average='weighted'):.4f}")
+print(f"MCC               : {matthews_corrcoef(y_true, y_pred):.4f}")
+# ---------------------------------------------------------
+# 11. SAVE TRAINED MODEL
+# ---------------------------------------------------------
+model.save_pretrained(MODEL_DIR)
+print("\n✅ PREPROCESSING + TRAINING + ARTIFACT GENERATION COMPLETED")

classification/classification/artifacts/label_encoder.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6b0be0d88eed1838fba777af266556aea55e435b970076684d2ad1c8c9b3fb0b
+size 342

classification/classification/artifacts/label_map.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d8e10c5614e117fd9ccab4af3fa62c0e4c44d23195847586d4d1ddb47f4a00cc
+size 321

classification/indic_bert_classify.py ADDED Viewed

	@@ -0,0 +1,142 @@

+# =========================================================
+# INDICBERT MODEL — CATEGORY CLASSIFICATION (HINDI + TELUGU)
+# =========================================================
+import os
+import re
+import torch
+import pickle
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+# ── Path config ───────────────────────────────────────────
+BASE_DIR     = os.path.dirname(os.path.abspath(__file__))
+ARTIFACT_DIR = os.path.join(BASE_DIR, "artifacts")
+MODEL_DIR    = os.path.join(ARTIFACT_DIR, "indicbert_model")
+MAX_LENGTH   = 128
+# ── Load artifacts ────────────────────────────────────────
+tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR, local_files_only=True)
+with open(os.path.join(ARTIFACT_DIR, "label_encoder.pkl"), "rb") as f:
+    label_encoder = pickle.load(f)
+model = AutoModelForSequenceClassification.from_pretrained(
+    MODEL_DIR, local_files_only=True
+)
+model.eval()
+# ── Edge-case constants ───────────────────────────────────
+LABEL_WORDS = {
+    "water", "electricity", "roads", "garbage",
+    "sanitation", "pollution", "transport", "animals",
+    "पानी", "बिजली", "सड़क", "कचरा",
+    "నీరు", "విద్యుత్", "రోడ్డు", "చెత్త",
+}
+NON_GRIEVANCE_PHRASES = {
+    "hello", "hi", "good morning", "good evening",
+    "thank you", "thanks", "all good", "no issues", "test", "demo",
+    "नमस्ते", "धन्यवाद", "सब ठीक है", "कोई समस्या नहीं",
+    "నమస్తే", "ధన్యవాదాలు", "అన్నీ బాగున్నాయి", "సమస్య లేదు",
+}
+# ── Text cleaning (Indic-safe) ────────────────────────────
+def clean_text(text: str) -> str:
+    text = str(text)
+    text = re.sub(r"<.*?>", " ", text)
+    # Keep Hindi (0900-097F), Telugu (0C00-0C7F), basic ASCII (0020-007F)
+    text = re.sub(r"[^\u0900-\u097F\u0C00-\u0C7F\u0020-\u007F]", " ", text)
+    text = re.sub(r"\s+", " ", text).strip()
+    return text
+# ── Input validation ──────────────────────────────────────
+def validate_input(text: str):
+    if not text or not text.strip():
+        return "empty_text"
+    text_l = text.strip().lower()
+    if len(text_l) < 5:
+        return "too_short"
+    if len(text_l.split()) < 2:
+        return "too_few_words"
+    if text_l in LABEL_WORDS:
+        return "label_only"
+    if text_l in NON_GRIEVANCE_PHRASES:
+        return "non_grievance_text"
+    return None
+# ── Predict ───────────────────────────────────────────────
+def predict(
+    text: str,
+    input_ids=None,       # O3: pre-tokenised tensor from main.py
+    attention_mask=None,  # O3: pre-tokenised tensor from main.py
+) -> dict:
+    """
+    Predict grievance category for Hindi / Telugu text.
+    Args:
+        text           : Raw input string (always required for validation).
+        input_ids      : Optional pre-tokenised tensor (1, seq_len).
+        attention_mask : Required when input_ids is provided.
+    Returns dict with keys: status, category, confidence, class_index.
+    """
+    # 1. Rule-based validation
+    reason = validate_input(text)
+    if reason:
+        return {
+            "status":      "failed",
+            "reason":      reason,
+            "category":    None,
+            "confidence":  0.0,
+            "class_index": None,
+        }
+    # 2. Clean text
+    cleaned = clean_text(text)
+    # 3. O3: use pre-tokenised tensors if supplied; otherwise tokenise now.
+    if input_ids is None:
+        enc = tokenizer(
+            cleaned,
+            return_tensors="pt",
+            truncation=True,
+            padding=False,
+            max_length=MAX_LENGTH,
+        )
+        input_ids      = enc["input_ids"]
+        attention_mask = enc["attention_mask"]
+    # 4. Forward pass
+    with torch.no_grad():
+        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
+    probs      = torch.softmax(outputs.logits, dim=1)
+    conf, pred = torch.max(probs, dim=1)
+    confidence      = conf.item()
+    predicted_index = pred.item()
+    # 5. Confidence gate
+    if confidence < 0.30:
+        return {
+            "status":      "success",
+            "reason":      "low_confidence",
+            "category":    "Other",
+            "confidence":  round(confidence, 4),
+            "class_index": predicted_index,
+        }
+    label = label_encoder.inverse_transform([predicted_index])[0]
+    return {
+        "status":      "success",
+        "category":    label,
+        "confidence":  round(confidence, 4),
+        "class_index": predicted_index,
+    }
+def get_model_and_tokenizer():
+    return model, tokenizer

classification/indic_bert_model.py ADDED Viewed

	@@ -0,0 +1,299 @@

+# =========================================================
+# INDICBERT PREPROCESSING + TRAINING + ARTIFACT GENERATION
+# Hindi + Telugu Grievance Classification
+# =========================================================
+import os
+import re
+import pickle
+import pandas as pd
+import numpy as np
+import torch
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import LabelEncoder
+from sklearn.metrics import (
+    accuracy_score,
+    f1_score,
+    balanced_accuracy_score,
+    matthews_corrcoef
+)
+from transformers import (
+    AutoTokenizer,
+    AutoModelForSequenceClassification,
+    Trainer,
+    TrainingArguments
+)
+from torch.utils.data import Dataset
+# =========================================================
+# CONFIG
+# =========================================================
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+DATA_PATH = os.path.join(BASE_DIR, "indic_train.csv")
+ARTIFACT_DIR = os.path.join(BASE_DIR, "artifacts")
+MODEL_DIR = os.path.join(ARTIFACT_DIR, "indicbert_model")
+MAX_LENGTH = 128
+EPOCHS = 4
+BATCH_SIZE = 16
+LEARNING_RATE = 2e-5
+MODEL_NAME = "ai4bharat/indic-bert"
+os.makedirs(ARTIFACT_DIR, exist_ok=True)
+os.makedirs(MODEL_DIR, exist_ok=True)
+print(f"📄 Loading dataset from: {DATA_PATH}")
+# =========================================================
+# LOAD DATA
+# =========================================================
+df = pd.read_csv(DATA_PATH)
+df = df[['text', 'label']]
+df.dropna(inplace=True)
+df.drop_duplicates(inplace=True)
+# =========================================================
+# CLEAN TEXT (KEEP HINDI & TELUGU SAFE)
+# =========================================================
+def clean_text(text):
+    text = str(text)
+    # Remove HTML
+    text = re.sub(r"<.*?>", " ", text)
+    # Remove unwanted symbols but KEEP Indic unicode
+    text = re.sub(r"[^\u0900-\u097F\u0C00-\u0C7F\u0020-\u007F]", " ", text)
+    text = re.sub(r"\s+", " ", text).strip()
+    return text
+df["text"] = df["text"].apply(clean_text)
+# =========================================================
+# LABEL ENCODING
+# =========================================================
+label_encoder = LabelEncoder()
+df["label_id"] = label_encoder.fit_transform(df["label"])
+label_map = dict(zip(
+    label_encoder.classes_,
+    label_encoder.transform(label_encoder.classes_)
+))
+# SAVE LABEL ARTIFACTS
+with open(os.path.join(ARTIFACT_DIR, "label_encoder.pkl"), "wb") as f:
+    pickle.dump(label_encoder, f)
+with open(os.path.join(ARTIFACT_DIR, "label_map.pkl"), "wb") as f:
+    pickle.dump(label_map, f)
+NUM_LABELS = len(label_map)
+print(f"✅ Number of classes: {NUM_LABELS}")
+# =========================================================
+# TRAIN / VAL / TEST SPLIT
+# =========================================================
+train_df, temp_df = train_test_split(
+    df,
+    test_size=0.30,
+    stratify=df["label_id"],
+    random_state=42
+)
+val_df, test_df = train_test_split(
+    temp_df,
+    test_size=0.50,
+    stratify=temp_df["label_id"],
+    random_state=42
+)
+# SAVE SPLITS
+train_df.to_csv(os.path.join(ARTIFACT_DIR, "indic_train.csv"), index=False)
+val_df.to_csv(os.path.join(ARTIFACT_DIR, "indic_val.csv"), index=False)
+test_df.to_csv(os.path.join(ARTIFACT_DIR, "indic_test.csv"), index=False)
+# =========================================================
+# TOKENIZER
+# =========================================================
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+with open(os.path.join(ARTIFACT_DIR, "indic_tokenizer.pkl"), "wb") as f:
+    pickle.dump(tokenizer, f)
+# =========================================================
+# DATASET CLASS
+# =========================================================
+class GrievanceDataset(Dataset):
+    def __init__(self, texts, labels):
+        self.encodings = tokenizer(
+            list(texts),
+            truncation=True,
+            padding=True,
+            max_length=MAX_LENGTH
+        )
+        self.labels = list(labels)
+    def __getitem__(self, idx):
+        item = {
+            key: torch.tensor(val[idx])
+            for key, val in self.encodings.items()
+        }
+        item["labels"] = torch.tensor(self.labels[idx])
+        return item
+    def __len__(self):
+        return len(self.labels)
+train_dataset = GrievanceDataset(
+    train_df["text"],
+    train_df["label_id"]
+)
+val_dataset = GrievanceDataset(
+    val_df["text"],
+    val_df["label_id"]
+)
+test_dataset = GrievanceDataset(
+    test_df["text"],
+    test_df["label_id"]
+)
+# =========================================================
+# MODEL
+# =========================================================
+model = AutoModelForSequenceClassification.from_pretrained(
+    MODEL_NAME,
+    num_labels=NUM_LABELS
+)
+# =========================================================
+# METRICS
+# =========================================================
+def compute_metrics(eval_pred):
+    logits, labels = eval_pred
+    preds = np.argmax(logits, axis=1)
+    return {
+        "accuracy": accuracy_score(labels, preds),
+        "balanced_accuracy": balanced_accuracy_score(labels, preds),
+        "f1_weighted": f1_score(labels, preds, average="weighted"),
+        "mcc": matthews_corrcoef(labels, preds)
+    }
+# =========================================================
+# TRAINING
+# =========================================================
+training_args = TrainingArguments(
+    output_dir=f"{ARTIFACT_DIR}/indic_results",
+    learning_rate=LEARNING_RATE,
+    per_device_train_batch_size=BATCH_SIZE,
+    per_device_eval_batch_size=BATCH_SIZE,
+    num_train_epochs=EPOCHS,
+    weight_decay=0.01,
+    logging_steps=100,
+    save_strategy="no",
+    report_to="none"
+)
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=train_dataset,
+    eval_dataset=val_dataset,
+    compute_metrics=compute_metrics
+)
+print("\n🚀 Training IndicBERT Model...\n")
+trainer.train()
+# =========================================================
+# FINAL TEST EVALUATION
+# =========================================================
+predictions = trainer.predict(test_dataset)
+y_true = predictions.label_ids
+y_pred = np.argmax(predictions.predictions, axis=1)
+print("\n===== FINAL TEST METRICS =====")
+print(f"Accuracy          : {accuracy_score(y_true, y_pred):.4f}")
+print(f"Balanced Accuracy : {balanced_accuracy_score(y_true, y_pred):.4f}")
+print(f"Weighted F1       : {f1_score(y_true, y_pred, average='weighted'):.4f}")
+print(f"MCC               : {matthews_corrcoef(y_true, y_pred):.4f}")
+# =========================================================
+# SAVE MODEL
+# =========================================================
+model.save_pretrained(MODEL_DIR)
+tokenizer.save_pretrained(MODEL_DIR)
+print("\n✅ INDICBERT TRAINING COMPLETED SUCCESSFULLY")

classification/indic_train.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

classification/train.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

gfas/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+# =========================================================
+# gfas/__init__.py
+# Public surface of the GFAS package.
+# main.py only needs to import `audit` from here.
+# =========================================================
+from .fairness_audit import audit
+__all__ = ["audit"]

gfas/disparity_analysis.py ADDED Viewed

	@@ -0,0 +1,156 @@

+# =========================================================
+# gfas/disparity_analysis.py
+# Per-group metric computation and per-dimension disparity
+# analysis (gaps, flags, breakdown table).
+# =========================================================
+import statistics
+from .fairness_metrics import (
+    URGENCY_POSITIVE,
+    FAIRNESS_DIMENSIONS,
+    PARITY_FLAG_THRESHOLD,
+    PRIORITY_FLAG_THRESHOLD,
+    TPR_FLAG_THRESHOLD,
+    gap_to_score,
+    score_label,
+    severity,
+)
+# ── Internal helpers ──────────────────────────────────────
+def _gap(values: list) -> float | None:
+    """Max – min over a list, ignoring Nones. Returns None if fewer than 2 clean values."""
+    clean = [v for v in values if v is not None]
+    return round(max(clean) - min(clean), 4) if len(clean) >= 2 else None
+# ── Public API ────────────────────────────────────────────
+def compute_group_metrics(items: list) -> dict:
+    """
+    Compute per-group fairness metrics for a single bucket of grievance records.
+    Returns a dict with keys:
+        count, resolution_rate, statistical_parity,
+        equal_opportunity_tpr, mean_priority_score
+    """
+    n = len(items)
+    pred_pos           = sum(1 for r in items if r["predicted_urgency"] in URGENCY_POSITIVE)
+    statistical_parity = round(pred_pos / n, 4)
+    true_pos_pool = [r for r in items if r["true_urgency"] in URGENCY_POSITIVE]
+    if true_pos_pool:
+        tpr_hits              = sum(1 for r in true_pos_pool if r["predicted_urgency"] in URGENCY_POSITIVE)
+        equal_opportunity_tpr = round(tpr_hits / len(true_pos_pool), 4)
+    else:
+        equal_opportunity_tpr = None
+    mean_priority_score = round(statistics.mean([r["priority_score"] for r in items]), 4)
+    resolved_count      = sum(1 for r in items if r.get("status", "") == "resolved")
+    resolution_rate     = round(resolved_count / n, 4)
+    return {
+        "count":                 n,
+        "resolution_rate":       resolution_rate,
+        "statistical_parity":    statistical_parity,
+        "equal_opportunity_tpr": equal_opportunity_tpr,
+        "mean_priority_score":   mean_priority_score,
+    }
+def analyse_dimension(dimension: str, group_metrics: dict) -> dict:
+    """
+    Given a dimension name and its {group → metrics} dict, compute:
+      - gap values across groups
+      - fairness score, label, severity
+      - flagged groups
+      - breakdown table
+      - fairness_flags list
+    """
+    parity_vals   = [v["statistical_parity"]    for v in group_metrics.values()]
+    priority_vals = [v["mean_priority_score"]    for v in group_metrics.values()]
+    tpr_vals      = [v["equal_opportunity_tpr"]  for v in group_metrics.values()
+                     if v["equal_opportunity_tpr"] is not None]
+    res_vals      = [v["resolution_rate"]        for v in group_metrics.values()]
+    sp_gap   = _gap(parity_vals)
+    tpr_gap  = _gap(tpr_vals)
+    pri_gap  = _gap(priority_vals)
+    res_gap  = _gap(res_vals)
+    sub_scores     = [s for s in [gap_to_score(sp_gap), gap_to_score(tpr_gap), gap_to_score(pri_gap)]
+                      if s is not None]
+    fairness_score = min(sub_scores) if sub_scores else 100
+    avg_parity     = round(statistics.mean(parity_vals), 4) if parity_vals else 0
+    avg_resolution = round(statistics.mean(res_vals),    4) if res_vals    else 0
+    flagged_groups = [
+        g for g, m in group_metrics.items()
+        if m["statistical_parity"] < avg_parity - 0.10
+    ]
+    breakdown = sorted(
+        [
+            {
+                dimension:           group,
+                "resolutionRate":    round(m["statistical_parity"] * 100, 2),
+                "total":             m["count"],
+                "statisticalParity": m["statistical_parity"],
+                "tpr":               m["equal_opportunity_tpr"],
+                "meanPriorityScore": m["mean_priority_score"],
+                "isFlagged":         group in flagged_groups,
+            }
+            for group, m in group_metrics.items()
+        ],
+        key=lambda x: x["resolutionRate"],
+    )
+    fairness_flags = []
+    if sp_gap  is not None and sp_gap  > PARITY_FLAG_THRESHOLD:
+        fairness_flags.append({
+            "metric":         "statistical_parity",
+            "gap":            sp_gap,
+            "label":          f"Urgency-rate gap of {sp_gap * 100:.1f}% across {dimension} groups",
+            "interpretation": "Some groups are significantly more (or less) likely to have their grievances classified as high/critical urgency.",
+        })
+    if pri_gap is not None and pri_gap > PRIORITY_FLAG_THRESHOLD:
+        fairness_flags.append({
+            "metric":         "mean_priority_score",
+            "gap":            pri_gap,
+            "label":          f"Priority-score gap of {pri_gap:.3f} across {dimension} groups",
+            "interpretation": "Some groups receive systematically higher or lower priority scores, affecting response speed.",
+        })
+    if tpr_gap is not None and tpr_gap > TPR_FLAG_THRESHOLD:
+        fairness_flags.append({
+            "metric":         "equal_opportunity_tpr",
+            "gap":            tpr_gap,
+            "label":          f"Detection-rate gap of {tpr_gap * 100:.1f}% for truly urgent cases across {dimension} groups",
+            "interpretation": "The model misses urgent cases at different rates across groups.",
+        })
+    return {
+        "fairnessScore":      fairness_score,
+        "fairnessLabel":      score_label(fairness_score),
+        "severity":           severity(fairness_score),
+        "groups_found":       sorted(group_metrics.keys()),
+        "average":            round(avg_parity * 100,     2),
+        "average_resolution": round(avg_resolution * 100, 2),
+        "breakdown":          breakdown,
+        "flagged":            flagged_groups,
+        "group_metrics":      group_metrics,
+        "disparity_summary": {
+            "statistical_parity_gap":         sp_gap,
+            "equal_opportunity_tpr_gap":       tpr_gap,
+            "mean_priority_score_gap":         pri_gap,
+            "resolution_rate_gap":             res_gap,
+            "statistical_parity_gap_label":    f"{round(sp_gap  * 100, 1)}% urgency-rate spread"  if sp_gap  is not None else None,
+            "equal_opportunity_tpr_gap_label": f"{round(tpr_gap * 100, 1)}% detection-rate gap"   if tpr_gap is not None else None,
+            "mean_priority_score_gap_label":   f"{round(pri_gap, 3)} priority-score spread"        if pri_gap is not None else None,
+            "resolution_rate_gap_label":       f"{round(res_gap * 100, 1)}% resolution-rate gap"   if res_gap is not None else None,
+        },
+        "fairness_flags": fairness_flags,
+        "flags_raised":   len(fairness_flags),
+    }

gfas/fairness_audit.py ADDED Viewed

	@@ -0,0 +1,111 @@

+# =========================================================
+# gfas/fairness_audit.py
+# Input validation and the single callable that main.py
+# imports to power the POST /fairness-audit route.
+# =========================================================
+from .fairness_metrics import (
+    VALID_AREAS,
+    VALID_CATEGORIES,
+    VALID_LANGUAGES,
+    VALID_URGENCY,
+)
+from .gfas_engine import run_fairness_audit
+# ── Record-level validation ───────────────────────────────
+def _validate_record(idx: int, r) -> tuple[dict | None, dict | None]:
+    """
+    Normalise and validate a single raw grievance dict.
+    Returns (record, None) on success, (None, skip_entry) on failure.
+    """
+    if not isinstance(r, dict):
+        return None, {"index": idx, "error": "Not a JSON object"}
+    try:
+        area     = str(r.get("area",     "")).strip().lower()
+        category = str(r.get("category", "")).strip().lower()
+        language = str(r.get("language", "english")).strip().lower()
+        pred     = str(r.get("predicted_urgency", "medium")).strip().lower()
+        true_urg = str(r.get("true_urgency", pred)).strip().lower()
+        score    = float(r.get("priority_score", 0))
+        status   = str(r.get("status", "pending")).strip().lower()
+    except Exception as e:
+        return None, {"index": idx, "error": f"Field parse error: {e}"}
+    if area not in VALID_AREAS:
+        return None, {"index": idx, "error": f"area not in VALID_AREAS: '{area}'"}
+    if category not in VALID_CATEGORIES:
+        return None, {"index": idx, "error": f"category not in VALID_CATEGORIES: '{category}'"}
+    # Soft-correct out-of-vocabulary enum values
+    if language not in VALID_LANGUAGES:
+        language = "english"
+    if pred not in VALID_URGENCY:
+        pred = "medium"
+    if true_urg not in VALID_URGENCY:
+        true_urg = pred
+    return {
+        "area":              area,
+        "category":          category,
+        "language":          language,
+        "predicted_urgency": pred,
+        "true_urgency":      true_urg,
+        "priority_score":    score,
+        "status":            status,
+    }, None
+# ── Public callable used by the route ────────────────────
+def audit(raw_grievances: list) -> tuple[dict | None, dict | None, int]:
+    """
+    Validate *raw_grievances* and run the full GFAS pipeline.
+    Returns:
+        (result_dict, None,       200)  — success
+        (None,        error_dict, 4xx)  — validation failure
+    """
+    if not isinstance(raw_grievances, list) or not raw_grievances:
+        return None, {
+            "status":  "failed",
+            "message": "'grievances' must be a non-empty list.",
+        }, 422
+    validated, skipped = [], []
+    for idx, r in enumerate(raw_grievances):
+        record, err = _validate_record(idx, r)
+        if record:
+            validated.append(record)
+        else:
+            skipped.append(err)
+    if len(validated) < 2:
+        return None, {
+            "status":  "failed",
+            "message": (
+                f"Only {len(validated)} valid record(s) after validation "
+                f"({len(skipped)} skipped). Need at least 2 records across different "
+                f"groups to compute fairness metrics."
+            ),
+            "skipped":        skipped[:10],
+            "skipped_count":  len(skipped),
+            "received_count": len(raw_grievances),
+        }, 422
+    audit_result = run_fairness_audit(validated)
+    result = {
+        "status":         "success",
+        "fairness_audit": audit_result,
+        "meta": {
+            "received": len(raw_grievances),
+            "valid":    len(validated),
+            "skipped":  len(skipped),
+            "skipped_details": skipped[:5] if skipped else [],
+        },
+    }
+    return result, None, 200

gfas/fairness_metrics.py ADDED Viewed

	@@ -0,0 +1,80 @@

+# =========================================================
+# gfas/fairness_metrics.py
+# Constants and primitive scoring helpers used across GFAS.
+# =========================================================
+# ── Urgency label sets ────────────────────────────────────
+VALID_URGENCY    = {"low", "medium", "high", "critical"}
+URGENCY_POSITIVE = {"high", "critical"}
+# ── Domain allow-lists ────────────────────────────────────
+VALID_CATEGORIES = {
+    "electricity", "garbage", "pollution", "public transport",
+    "roads", "sanitation", "stray animals", "water"
+}
+VALID_LANGUAGES = {"telugu", "english", "hindi"}
+VALID_AREAS = {
+    # Zone 1
+    "suryaraopeta", "jagannaickpur", "raja rao peta", "bhanugudi",
+    "old town", "rajah street", "main road",
+    # Zone 2
+    "gandhi nagar", "ashok nagar", "nethaji nagar",
+    "srinivasa nagar", "tngo colony", "shankar vilas",
+    "collector's colony",
+    # Zone 3
+    "new town", "bank colony", "drivers colony",
+    "fci colony", "burma colony", "dwaraka nagar",
+    "ayodhya nagar",
+    # Zone 4
+    "kakinada port area", "kakinada industrial area",
+    "fishing harbour", "dairy farm", "auto nagar",
+    "kaleswara rao nagar",
+    # Zone 5
+    "ramanayyapeta", "rama rao peta", "kondayya palem",
+    "ganganapalle", "gudari gunta", "indrapalem",
+    # Zone 6
+    "sarpavaram", "uppada", "kaikavolu",
+    "kothuru", "thammavaram", "thimmapuram",
+    # Zone 7
+    "vivekananda street", "jr ntr road",
+    "jntu kakinada area", "govt general hospital area",
+    "apsp camp",
+    # Other
+    "kakinada beach road", "kakinada bazar",
+    "anjaneya nagar",
+}
+# ── Audit dimensions ──────────────────────────────────────
+FAIRNESS_DIMENSIONS = ["area", "category", "language"]
+# ── Flag thresholds ───────────────────────────────────────
+PARITY_FLAG_THRESHOLD   = 0.20
+PRIORITY_FLAG_THRESHOLD = 0.20
+TPR_FLAG_THRESHOLD      = 0.20
+# ── Primitive scorers ─────────────────────────────────────
+def gap_to_score(gap) -> int:
+    """Convert a disparity gap (0–1) to a 0–100 fairness score (higher = fairer)."""
+    if gap is None:
+        return 100
+    return max(0, min(100, round(100 - float(gap) * 200)))
+def score_label(score: int) -> str:
+    if score >= 80:
+        return "Fair"
+    if score >= 60:
+        return "Moderate"
+    return "Biased"
+def severity(score: int) -> str:
+    if score >= 80:
+        return "ok"
+    if score >= 60:
+        return "warning"
+    return "critical"

gfas/gfas_engine.py ADDED Viewed

	@@ -0,0 +1,81 @@

+# =========================================================
+# gfas/gfas_engine.py
+# Top-level orchestrator: feeds validated records through
+# disparity analysis and report generation to produce the
+# final fairness audit payload.
+# =========================================================
+import statistics
+from collections import defaultdict
+from .fairness_metrics    import FAIRNESS_DIMENSIONS, score_label, severity
+from .disparity_analysis  import compute_group_metrics, analyse_dimension
+from .report_generator    import build_alerts, build_recommendations
+def run_fairness_audit(validated_grievances: list) -> dict:
+    """
+    Main entry point for GFAS.
+    Args:
+        validated_grievances: List of dicts already normalised by fairness_audit.py.
+            Required keys: area, category, language, predicted_urgency,
+                           true_urgency, priority_score, status.
+    Returns:
+        Full fairness audit payload ready for JSON serialisation.
+    """
+    dimension_results: dict = {}
+    for dimension in FAIRNESS_DIMENSIONS:
+        # ── Bucket records by group value ──────────────────────────────────────
+        buckets: dict = defaultdict(list)
+        for r in validated_grievances:
+            buckets[r[dimension]].append(r)
+        # ── Per-group metrics ──────────────────────────────────────────────────
+        group_metrics = {group: compute_group_metrics(items) for group, items in buckets.items()}
+        # ── Dimension-level disparity analysis ─────────────────────────────────
+        dimension_results[dimension] = analyse_dimension(dimension, group_metrics)
+    # ── Overall score (mean of dimension scores) ───────────────────────────────
+    dim_scores    = [dimension_results[d]["fairnessScore"] for d in FAIRNESS_DIMENSIONS]
+    overall_score = round(statistics.mean(dim_scores), 2)
+    # ── Alerts + recommendations ───────────────────────────────────────────────
+    alerts          = build_alerts(dimension_results)
+    recommendations = build_recommendations(dimension_results)
+    # ── Summary block ──────────────────────────────────────────────────────────
+    sp_gaps = [
+        dimension_results[d]["disparity_summary"]["statistical_parity_gap"]
+        for d in FAIRNESS_DIMENSIONS
+    ]
+    disparity_index = (
+        round(max(v for v in sp_gaps if v is not None), 4)
+        if any(v is not None for v in sp_gaps)
+        else None
+    )
+    return {
+        "overallFairnessScore": overall_score,
+        "fairnessLabel":        score_label(overall_score),
+        "severity":             severity(overall_score),
+        "area":                 dimension_results["area"],
+        "category":             dimension_results["category"],
+        "language":             dimension_results["language"],
+        "summary": {
+            "totalGrievances":   len(validated_grievances),
+            "avgResolutionRate": round(
+                statistics.mean([r.get("status", "") == "resolved" for r in validated_grievances]) * 100, 2
+            ),
+            "disparityIndex":    disparity_index,
+            "dimensionsAudited": FAIRNESS_DIMENSIONS,
+            "flagsRaised":       sum(dimension_results[d]["flags_raised"] for d in FAIRNESS_DIMENSIONS),
+        },
+        "alerts":             alerts,
+        "recommendations":    recommendations,
+        "dimensions_audited": FAIRNESS_DIMENSIONS,
+        "total_grievances":   len(validated_grievances),
+        "results":            dimension_results,
+    }

gfas/report_generator.py ADDED Viewed

	@@ -0,0 +1,93 @@

+# =========================================================
+# gfas/report_generator.py
+# Builds human-readable alerts and actionable recommendations
+# from per-dimension disparity analysis results.
+# =========================================================
+from .fairness_metrics import FAIRNESS_DIMENSIONS
+# ── Copy templates ────────────────────────────────────────
+_TITLE_MAP = {
+    "area":     "Improve urgency detection in under-served areas",
+    "category": "Address priority-score gap across grievance categories",
+    "language": "Ensure equitable urgency classification by submission language",
+}
+def _desc(dimension: str, dr: dict) -> str:
+    flagged_str = ", ".join(dr["flagged"][:2]) or "N/A"
+    if dimension == "area":
+        gap_label = dr["disparity_summary"]["statistical_parity_gap_label"] or "unknown"
+        return (
+            f"Areas {flagged_str} show urgency-rate gaps of {gap_label}. "
+            "Assign dedicated officers and increase patrol frequency in these localities."
+        )
+    if dimension == "category":
+        return (
+            f"Categories with low parity scores indicate the model under-prioritises certain complaint types. "
+            f"Retrain or re-weight the urgency classifier for {flagged_str}."
+        )
+    # language
+    return (
+        f"Grievances in {flagged_str} receive lower urgency scores. "
+        "Deploy multilingual reviewers or a translation-aware pre-processing step before classification."
+    )
+# ── Public API ────────────────────────────────────────────
+def build_alerts(dimension_results: dict) -> list[dict]:
+    """
+    Return a list of alert dicts for every dimension whose fairness score < 80.
+    """
+    alerts = []
+    for dim in FAIRNESS_DIMENSIONS:
+        dr = dimension_results[dim]
+        if dr["fairnessScore"] >= 80:
+            continue
+        flagged_str = ""
+        if dr["flagged"]:
+            sample      = dr["flagged"][:3]
+            extra       = len(dr["flagged"]) - 3
+            flagged_str = f" Affected {dim}s: {', '.join(sample)}"
+            if extra > 0:
+                flagged_str += f" +{extra} more"
+            flagged_str += "."
+        flag_details = "; ".join(f["label"] for f in dr["fairness_flags"])
+        severity_word = "Significant" if dr["fairnessScore"] < 60 else "Moderate"
+        action_word   = "Immediate review recommended." if dr["fairnessScore"] < 60 else "Monitor resolution trends."
+        alerts.append({
+            "severity":  dr["severity"],
+            "message":   f"{severity_word} {dim} fairness disparity ({flag_details}).{flagged_str} {action_word}",
+            "dimension": dim,
+        })
+    return alerts
+def build_recommendations(dimension_results: dict) -> list[dict]:
+    """
+    Return actionable recommendations for every dimension whose fairness score < 80,
+    sorted from worst to best.
+    """
+    flagged_dims = sorted(
+        [d for d in FAIRNESS_DIMENSIONS if dimension_results[d]["fairnessScore"] < 80],
+        key=lambda d: dimension_results[d]["fairnessScore"],
+    )
+    recommendations = []
+    for dim in flagged_dims:
+        dr = dimension_results[dim]
+        recommendations.append({
+            "priority":    "high" if dr["fairnessScore"] < 60 else "medium",
+            "title":       _TITLE_MAP[dim],
+            "description": _desc(dim, dr),
+            "dimension":   dim,
+            "affectedArea": ", ".join(dr["flagged"][:2]) or None,
+        })
+    return recommendations

main.py ADDED Viewed

	@@ -0,0 +1,707 @@

+# =========================================================
+# FLASK API — MULTILINGUAL GRIEVANCE + XPE + GFAS
+# INTEGRATED GRADIENTS ONLY (PRODUCTION VERSION)
+# Hugging Face Spaces — Production Deployment
+# Multimodal: text / audio / image(evidence) support
+# =========================================================
+from flask import Flask, request, jsonify
+import re
+import io
+import traceback
+import logging
+import math
+import os
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from datetime import datetime, timezone
+# ── Silence noisy loggers ────────────────────────────────
+logging.getLogger("prophet").setLevel(logging.ERROR)
+logging.getLogger("cmdstanpy").setLevel(logging.ERROR)
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+from prophet import Prophet
+import pandas as pd
+# ── EXIF extraction ──────────────────────────────────────
+from PIL import Image
+import piexif
+# =========================================================
+# CATEGORY PREDICTION
+# =========================================================
+from classification.bert_classify import (
+    predict as predict_category_en,
+    get_model_and_tokenizer as get_cat_en,
+)
+from classification.indic_bert_classify import (
+    predict as predict_category_indic,
+    get_model_and_tokenizer as get_cat_indic,
+)
+# =========================================================
+# URGENCY PREDICTION
+# =========================================================
+from sentiment_analysis.bert_predict import (
+    predict_urgency as predict_urgency_en,
+    get_model_and_tokenizer as get_urg_en,
+)
+from sentiment_analysis.indic_bert_predict import (
+    predict as predict_urgency_indic,
+    get_model_and_tokenizer as get_urg_indic,
+)
+# =========================================================
+# MULTIMODAL
+# =========================================================
+from multi_modal.audio_to_text import transcribe_audio
+from multi_modal.image_to_text import extract_text_from_image
+# =========================================================
+# XPE MODULES
+# =========================================================
+from xpe.priority_engine import compute_priority_score
+from xpe.integrated_gradients_explainer import IntegratedGradientsExplainer
+from xpe.hybrid_explainer import generate_final_reason
+# =========================================================
+# GFAS — Grievance Fairness Audit System
+# =========================================================
+from gfas import audit as gfas_audit
+# =========================================================
+# COMPILED REGEX — MULTILINGUAL (EN + HI + TE)
+# =========================================================
+_RE_HINDI  = re.compile(r'[\u0900-\u097F]')
+_RE_TELUGU = re.compile(r'[\u0C00-\u0C7F]')
+_SMALL_TALK_PATTERNS = re.compile(
+    r"""
+      ^(hi|hello|hey|dear|sir|madam)\b
+    | good\s+(morning|evening|afternoon|night)
+    | how\s+(are\s+you|is\s+it\s+going)
+    | what'?s\s+up
+    | hope\s+you\s+are\s+doing\s+well
+    | \b(thank(s|\s+you)|okay|ok|great|nice|good\s+job)\b
+    | \b(namaste|namaskar|dhanyavaad|shukriya|theek\s+hai|accha|acha|haan|helo)\b
+    | \b(namaskaram|dhanyavadalu|bayapadu|ela\s+unnaru|mee\s+seva)\b
+    """,
+    re.VERBOSE | re.IGNORECASE,
+)
+_GRIEVANCE_PATTERNS = re.compile(
+    r"""
+      \b(problem|issue|complain(t)?|grievance|concern
+        |inconvenience|harassment|injustice|negligence|misconduct)\b
+    | \b(not\s+working|stopped\s+working|not\s+responding|no\s+response
+        |no\s+action|fail(ed|ure)?|malfunction(ing)?|defective
+        |service\s+down|interrupted|disconnected|outage
+        |not\s+restored|not\s+repaired|not\s+fixed|not\s+resolved
+        |not\s+completed?|not\s+done|not\s+processed?
+        |not\s+functioning|non[-\s]functional)\b
+    | \b(delay(ed)?|pending|not\s+received|not\s+delivered
+        |still\s+waiting|no\s+update|no\s+resolution
+        |no\s+acknowledgm[e]?nt|not\s+credited|not\s+sanctioned
+        |not\s+approved|not\s+collected|not\s+cleared
+        |overdue|lapsed|under\s+process|under\s+review|awaiting)\b
+    | \b(refund|charg(ed|ing)|overcharged|overbilled
+        |extra\s+charge|double\s+charg(ed|e)
+        |charged\s+twice|billed\s+twice|debited\s+twice
+        |wrong\s+bill|wrong\s+amount|incorrect\s+(amount|bill)
+        |excess\s+(charge|amount|fee)
+        |payment\s+fail(ed|ure)?|transaction\s+fail(ed|ure)?
+        |unauthorized\s+transaction|debited|deducted
+        |not\s+refunded|duplicate\s+(charge|bill|payment)
+        |invoice)\b
+    | \bbill\b
+    | \b(pothole|waterlogging|no\s+water|water\s+supply
+        |power\s+(cut|outage|failure)|electricity\s+(cut|failure|issue)
+        |sewage|drainage|garbage|waste\s+collection
+        |road\s+(damage|broken|condition|repair)
+        |streetlight|footpath
+        |no\s+(electricity|water|gas|internet|signal|network)
+        |supply\s+(not|stopped|disrupted))\b
+    | \b(certificate|ration\s+card|pension|scholarship|subsidy
+        |license|passport
+        |application\s+(rejected|pending|delayed|not\s+processed)
+        |not\s+issued|not\s+granted|denied|rejected|withheld)\b
+    | \b(rude|misbehav(ed|iour)|bribe|corruption
+        |demanding\s+(money|bribe)|not\s+attending|irresponsible)\b
+    | \b(wrong|missing|damaged?|broken|poor\s+service|substandard
+        |bad\s+service|very\s+bad|worst\s+service|defect(ive)?)\b
+    | \b(unsatisfied|unhappy|disappointed|frustrated|harassed|ignored
+        |cheated|deceived|exploited|victimized)\b
+    | \b(cancel(l?(ed|ation))?|legal\s+action|escalate[d]?
+        |complaint\s+against|take\s+action|file\s+(a\s+)?complaint
+        |report\s+(this|the)|seeking\s+(help|redressal|justice)
+        |urgent(ly)?|immediately)\b
+    | \b(fraud|scam|error|mistake|violation|irregularity|malpractice)\b
+    | (समस्या|शिकायत|परेशानी|दिक्कत|नहीं\s*मिला|नहीं\s*आया
+      |वापसी|रिफंड|विलंब|देरी|धोखा|गलत|टूटा|खराब
+      |बंद\s*हो\s*गया|काम\s*नहीं|जवाब\s*नहीं|कार्रवाई\s*नहीं
+      |नाराज|परेशान|निराश|कानूनी\s*कार्रवाई|भुगतान\s*विफल
+      |दो\s*बार\s*काटा|दो\s*बार|काटा|बिजली|पानी|सड़क
+      |भ्रष्टाचार|रिश्वत|जमा\s*नहीं|जारी\s*नहीं
+      |अनधिकृत|अतिरिक्त\s*शुल्क|बिल)
+    | \b(samasya|shikayat|pareshani|dikkat|nahi\s+mila|vapasi
+        |vilamba|deri|dhokha|galat|tuta|kharab|kaam\s+nahi
+        |jawab\s+nahi|naraaz|nirash|kanuni|bhrashtachar
+        |do\s+baar|bijli|paani|sadak|jamaa\s+nahi)\b
+    | (సమస్య|ఫిర్యాదు|ఇబ్బంది|రాలేదు|పని\s*చేయడం\s*లేదు
+      |తిరిగి\s*చెల్లింపు|ఆలస్యం|మోసం|తప్పు|పాడైంది
+      |సేవ\s*లేదు|జవాబు\s*లేదు|చర్య\s*లేదు
+      |చెల్లింపు\s*విఫలమైంది|నిరాశ|వేధింపు|రద్దు
+      |బిల్లు|విద్యుత్|నీరు|రోడ్డు|రెండుసార్లు|వసూలు
+      |జమకట్టలేదు|లంచం|అవినీతి)
+    | \b(firyadu|ibbandi|raaledu|pani\s+cheyyatledu
+        |tirigichellinpu|aalasyam|mosam|tappu|paadaindi
+        |seva\s+ledu|nirasha|vedhimpu|raddu|rendu\s+sarlu
+        |vasulu|lantham|avineeti)\b
+    """,
+    re.VERBOSE | re.IGNORECASE,
+)
+_RE_JUNK        = re.compile(r'^[\d\W_]+$')
+MIN_TEXT_LENGTH = 8
+UTC = timezone.utc
+# =========================================================
+# KAKINADA GEO HELPERS
+# =========================================================
+def _dms_to_decimal(dms, ref: str) -> float:
+    degrees = dms[0][0] / dms[0][1]
+    minutes = dms[1][0] / dms[1][1]
+    seconds = dms[2][0] / dms[2][1]
+    decimal = degrees + minutes / 60 + seconds / 3600
+    if ref in ("S", "W"):
+        decimal = -decimal
+    return decimal
+def extract_gps_from_image(image_bytes: bytes) -> tuple | None:
+    try:
+        img        = Image.open(io.BytesIO(image_bytes))
+        exif_bytes = img.info.get("exif")
+        if not exif_bytes:
+            return None
+        exif_data = piexif.load(exif_bytes)
+        gps_data  = exif_data.get("GPS", {})
+        if not gps_data:
+            return None
+        lat_dms = gps_data.get(piexif.GPSIFD.GPSLatitude)
+        lat_ref = gps_data.get(piexif.GPSIFD.GPSLatitudeRef)
+        lon_dms = gps_data.get(piexif.GPSIFD.GPSLongitude)
+        lon_ref = gps_data.get(piexif.GPSIFD.GPSLongitudeRef)
+        if not (lat_dms and lat_ref and lon_dms and lon_ref):
+            return None
+        lat = _dms_to_decimal(lat_dms, lat_ref.decode() if isinstance(lat_ref, bytes) else lat_ref)
+        lon = _dms_to_decimal(lon_dms, lon_ref.decode() if isinstance(lon_ref, bytes) else lon_ref)
+        return lat, lon
+    except Exception:
+        return None
+def is_kakinada(lat: float, lon: float) -> bool:
+    try:
+        lat = float(lat)
+        lon = float(lon)
+    except (TypeError, ValueError):
+        return False
+    return 16.85 <= lat <= 17.10 and 82.15 <= lon <= 82.35
+def check_image_location(image_bytes: bytes) -> str:
+    coords = extract_gps_from_image(image_bytes)
+    if coords is None:
+        return "no_gps"
+    lat, lon = coords
+    return "valid" if is_kakinada(lat, lon) else "invalid"
+# =========================================================
+# LANGUAGE DETECTION
+# =========================================================
+def detect_language(text: str) -> str:
+    if _RE_HINDI.search(text):
+        return "hindi"
+    if _RE_TELUGU.search(text):
+        return "telugu"
+    return "english"
+# =========================================================
+# INPUT VALIDATION
+# =========================================================
+_VALIDATION_MESSAGES = {
+    "too_short":    "Text is too short. Please provide at least 8 characters describing your issue.",
+    "junk_input":   "Input contains only numbers or special characters. Please describe your grievance in words.",
+    "small_talk":   "This looks like a greeting or small talk. Please describe the issue you are facing.",
+    "no_grievance": (
+        "No grievance signal detected. Please describe your problem clearly — "
+        "e.g. 'My electricity bill was charged twice' or 'Water supply disrupted for 3 days'."
+    ),
+}
+def validate_text(text) -> tuple:
+    if not isinstance(text, str):
+        return False, "too_short"
+    stripped = text.strip()
+    if len(stripped) < 5:
+        return False, "too_short"
+    if _RE_JUNK.fullmatch(stripped.lower()):
+        return False, "junk_input"
+    return True, None
+# =========================================================
+# INITIALIZE APP
+# =========================================================
+app = Flask(__name__)
+# ── Hugging Face Spaces: disable debug, allow large uploads ──────────────────
+app.config["MAX_CONTENT_LENGTH"] = int(os.environ.get("MAX_UPLOAD_MB", "32")) * 1024 * 1024
+# =========================================================
+# LOAD MODELS (once at startup)
+# =========================================================
+logger.info("🔄 Loading models...")
+cat_model_en,    cat_tok_en    = get_cat_en()
+cat_model_indic, cat_tok_indic = get_cat_indic()
+urg_model_en,    urg_tok_en    = get_urg_en()
+urg_model_indic, urg_tok_indic = get_urg_indic()
+logger.info("✅ Models loaded.")
+# =========================================================
+# INITIALIZE IG EXPLAINERS (once at startup)
+# =========================================================
+logger.info("🔄 Initializing Integrated Gradients explainers...")
+category_explainer_en    = IntegratedGradientsExplainer(cat_model_en,    cat_tok_en)
+category_explainer_indic = IntegratedGradientsExplainer(cat_model_indic, cat_tok_indic)
+urgency_explainer_en     = IntegratedGradientsExplainer(urg_model_en,    urg_tok_en)
+urgency_explainer_indic  = IntegratedGradientsExplainer(urg_model_indic, urg_tok_indic)
+logger.info("✅ Integrated Gradients ready.")
+_RESOURCES = {
+    "english": {
+        "cat_fn":  predict_category_en,
+        "urg_fn":  predict_urgency_en,
+        "cat_exp": category_explainer_en,
+        "urg_exp": urgency_explainer_en,
+    }
+}
+_RESOURCES_INDIC = {
+    "cat_fn":  predict_category_indic,
+    "urg_fn":  predict_urgency_indic,
+    "cat_exp": category_explainer_indic,
+    "urg_exp": urgency_explainer_indic,
+}
+def _get_resources(language: str) -> dict:
+    return _RESOURCES.get(language, _RESOURCES_INDIC)
+# =========================================================
+# HOTSPOT FORECAST CONSTANTS
+# =========================================================
+VALID_LABELS = [
+    "electricity", "garbage", "pollution",
+    "public transport", "roads",
+    "sanitation", "stray animals", "water",
+]
+_PROPHET_MAX_WORKERS = int(os.environ.get("PROPHET_MAX_WORKERS", "4"))
+RISK_LEVEL_THRESHOLDS = [
+    (75, "Critical"),
+    (50, "High"),
+    (25, "Medium"),
+    (0,  "Low"),
+]
+def _risk_to_level(score_0_100: float) -> str:
+    for threshold, label in RISK_LEVEL_THRESHOLDS:
+        if score_0_100 >= threshold:
+            return label
+    return "Low"
+def _fit_and_forecast(area: str, category: str, group_df, horizon: int) -> dict | None:
+    if group_df["ds"].nunique() < 2:
+        return None
+    ts    = group_df[["ds", "y"]].sort_values("ds")
+    model = Prophet(weekly_seasonality=False, daily_seasonality=False)
+    model.fit(ts)
+    future   = model.make_future_dataframe(periods=horizon)
+    forecast = model.predict(future)
+    recent_avg   = ts.tail(3)["y"].mean()
+    forecast_avg = forecast.tail(horizon)["yhat"].mean()
+    if recent_avg == 0:
+        growth = 0.0
+    else:
+        raw_growth = ((forecast_avg - recent_avg) / recent_avg) * 100
+        growth     = max(-500.0, min(500.0, raw_growth))
+    avg_priority   = float(group_df["priorityScore"].mean())
+    raw_risk       = 0.5 * (growth / 100) + 0.3 * avg_priority + 0.2 * (recent_avg / 5)
+    risk_score_100 = round(100 / (1 + math.exp(-raw_risk)), 2)
+    horizon_fc = forecast.tail(horizon)
+    yhat_range = (horizon_fc["yhat_upper"] - horizon_fc["yhat_lower"]).mean()
+    yhat_mean  = horizon_fc["yhat"].abs().mean()
+    confidence = round(1.0 - min(1.0, yhat_range / (yhat_mean + 1e-9)), 4)
+    level = _risk_to_level(risk_score_100)
+    return {
+        "area":                area,
+        "category":            category,
+        "riskScore":           risk_score_100,
+        "level":               level,
+        "growthPercent":       round(float(growth), 2),
+        "forecastHorizonDays": horizon,
+        "confidenceScore":     confidence,
+        "_recentAvg":          round(float(recent_avg),   2),
+        "_forecastAvg":        round(float(forecast_avg), 2),
+    }
+# =========================================================
+# HEALTH CHECK
+# =========================================================
+@app.route("/", methods=["GET"])
+def health():
+    return jsonify({
+        "status":  "ok",
+        "version": os.environ.get("APP_VERSION", "1.0.0"),
+        "message": "Multilingual Grievance API (EN / HI / TE) with IG + GFAS — running",
+        "endpoints": {
+            "POST /predict":          "Classify a single grievance — text / audio / image (multipart/form-data).",
+            "POST /fairness-audit":   "GFAS audit over N grievance records.",
+            "POST /hotspot-forecast": "Prophet-based hotspot forecasting.",
+        },
+    })
+@app.route("/health", methods=["GET"])
+def health_check():
+    """Dedicated health probe for HF Spaces liveness checks."""
+    return jsonify({"status": "ok"}), 200
+# =========================================================
+# POST /predict
+# =========================================================
+@app.route("/predict", methods=["POST"])
+def predict_grievance():
+    try:
+        content_type = request.content_type or ""
+        if "application/json" in content_type:
+            data         = request.get_json(silent=True) or {}
+            text_input   = data.get("text", "").strip()
+            explain_flag = bool(data.get("explain", False))
+            has_text     = bool(text_input)
+            has_audio    = False
+            has_image    = False
+            image_bytes  = None
+            audio_file   = None
+        else:
+            text_input   = request.form.get("text", "").strip()
+            explain_raw  = request.form.get("explain", "false").strip().lower()
+            explain_flag = explain_raw in ("true", "1", "yes")
+            has_text     = bool(text_input)
+            has_audio    = "audio" in request.files
+            has_image    = "image" in request.files
+            image_bytes  = request.files["image"].read() if has_image else None
+            audio_file   = request.files["audio"]        if has_audio else None
+        logger.info(
+            "[predict] content_type=%s has_text=%s has_audio=%s has_image=%s",
+            content_type[:40], has_text, has_audio, has_image,
+        )
+        if not has_text and not has_audio and not has_image:
+            return jsonify({
+                "status":  "failed",
+                "code":    "missing_input",
+                "message": "Please provide at least one of: 'text', 'audio', or 'image'.",
+            }), 400
+        # ── Mode A — IMAGE ONLY ────────────────────────────────────────────────
+        if has_image and not has_text and not has_audio:
+            location_status = check_image_location(image_bytes)
+            if location_status in ("invalid", "no_gps"):
+                return jsonify({
+                    "status":   "failed",
+                    "code":     "location_invalid",
+                    "message":  "Request rejected. Image location is outside Kakinada jurisdiction or contains no GPS metadata.",
+                    "location": "invalid",
+                }), 403
+            grievance_text = extract_text_from_image(image_bytes)
+            input_mode     = "image"
+            location_field = None
+        # ── Mode B — AUDIO ONLY ────────────────────────────────────────────────
+        elif has_audio and not has_text and not has_image:
+            grievance_text = transcribe_audio(audio_file)
+            input_mode     = "audio"
+            location_field = None
+        # ── Mode C — TEXT ONLY ─────────────────────────────────────────────────
+        elif has_text and not has_image and not has_audio:
+            grievance_text = text_input
+            input_mode     = "text"
+            location_field = None
+        # ── Mode D — TEXT + IMAGE (evidence) ──────────────────────────────────
+        elif has_text and has_image and not has_audio:
+            grievance_text = text_input
+            input_mode     = "text+image"
+            loc_status     = check_image_location(image_bytes)
+            location_field = "valid" if loc_status == "valid" else "invalid"
+        # ── Mode E — AUDIO + IMAGE (evidence) ─────────────────────────────────
+        elif has_audio and has_image and not has_text:
+            grievance_text = transcribe_audio(audio_file)
+            input_mode     = "audio+image"
+            loc_status     = check_image_location(image_bytes)
+            location_field = "valid" if loc_status == "valid" else "invalid"
+        else:
+            return jsonify({
+                "status":  "failed",
+                "code":    "missing_input",
+                "message": "Please provide at least one of: 'text', 'audio', or 'image'.",
+            }), 400
+        is_valid, error_code = validate_text(grievance_text)
+        if not is_valid:
+            return jsonify({
+                "status":  "failed",
+                "code":    error_code,
+                "message": _VALIDATION_MESSAGES[error_code],
+            }), 422
+        language = detect_language(grievance_text)
+        res      = _get_resources(language)
+        category_result = res["cat_fn"](grievance_text)
+        category        = category_result["category"]
+        category_conf   = category_result["confidence"]
+        category_index  = category_result.get("class_index", 0)
+        urgency_result = res["urg_fn"](grievance_text)
+        urgency        = urgency_result["urgency"]
+        urgency_conf   = urgency_result["confidence"]
+        urgency_index  = urgency_result.get("class_index", 0)
+        priority_result = compute_priority_score(category, urgency, urgency_conf)
+        priority_score  = priority_result["score"]
+        priority_band   = priority_result["band"]
+        category_tokens: list = []
+        urgency_tokens:  list = []
+        if explain_flag:
+            category_tokens = res["cat_exp"].explain(grievance_text, category_index)
+            urgency_tokens  = res["urg_exp"].explain(grievance_text, urgency_index)
+        explanation = generate_final_reason(
+            grievance_text, category, urgency, priority_score,
+            category_tokens, urgency_tokens,
+        )
+        response_body = {
+            "status":               "success",
+            "input_mode":           input_mode,
+            "text":                 grievance_text,
+            "language":             language,
+            "category":             category,
+            "category_confidence":  category_conf,
+            "urgency":              urgency,
+            "urgency_confidence":   urgency_conf,
+            "priority_score":       priority_score,
+            "priority_band":        priority_band,
+            "explanation": {
+                "category_tokens":   category_tokens,
+                "urgency_tokens":    urgency_tokens,
+                "category_decision": explanation["category_decision"],
+                "urgency_decision":  explanation["urgency_decision"],
+                "priority_summary":  explanation["priority_summary"],
+                "final_reason":      explanation["final_reason"],
+            },
+        }
+        if location_field is not None:
+            response_body["location"] = location_field
+        return jsonify(response_body)
+    except Exception as e:
+        logger.exception("[predict] Unhandled exception")
+        return jsonify({
+            "status":  "failed",
+            "code":    "internal_error",
+            "message": str(e),
+            "trace":   traceback.format_exc(),
+        }), 500
+# =========================================================
+# POST /fairness-audit
+# =========================================================
+@app.route("/fairness-audit", methods=["POST"])
+def fairness_audit():
+    try:
+        data = request.get_json(silent=True)
+        if not data:
+            return jsonify({"status": "failed", "message": "Invalid JSON body."}), 400
+        result, error, status_code = gfas_audit(data.get("grievances"))
+        if error:
+            return jsonify(error), status_code
+        return jsonify(result), status_code
+    except Exception as e:
+        logger.exception("[fairness-audit] Unhandled exception")
+        return jsonify({
+            "status": "failed",
+            "error":  str(e),
+            "trace":  traceback.format_exc(),
+        }), 500
+# =========================================================
+# POST /hotspot-forecast
+# =========================================================
+@app.route("/hotspot-forecast", methods=["POST"])
+def hotspot_forecast():
+    try:
+        data          = request.get_json(force=True)
+        grievances    = data.get("grievances", [])
+        horizon       = int(data.get("horizon_days",       1))
+        top_n         = int(data.get("top_n",              10))
+        source_window = int(data.get("source_window_days", 45))
+        generated_at  = datetime.now(UTC).isoformat()
+        if not grievances:
+            return jsonify({"status": "failed", "message": "No grievances supplied"}), 422
+        df = pd.DataFrame(grievances)
+        if df.empty:
+            return jsonify({
+                "status":       "success",
+                "generated_at": generated_at,
+                "top_hotspots": [],
+            })
+        df["area"]     = df["area"].astype(str).str.lower().str.strip()
+        df["category"] = df["category"].astype(str).str.lower().str.strip()
+        df["ds"]       = pd.to_datetime(df["createdAt"], errors="coerce", utc=True).dt.tz_convert(None)
+        df             = df.dropna(subset=["ds"])
+        df["y"]        = 1
+        df             = df[df["category"].isin(VALID_LABELS)]
+        if df.empty:
+            return jsonify({
+                "status":       "success",
+                "generated_at": generated_at,
+                "top_hotspots": [],
+            })
+        df = df.groupby(["area", "category", "ds"]).agg(
+            {"y": "sum", "priorityScore": "mean"}
+        ).reset_index()
+        groups   = list(df.groupby(["area", "category"]))
+        hotspots = []
+        errors   = []
+        with ThreadPoolExecutor(max_workers=_PROPHET_MAX_WORKERS) as executor:
+            futures = {
+                executor.submit(_fit_and_forecast, area, cat, gdf, horizon): (area, cat)
+                for (area, cat), gdf in groups
+            }
+            for future in as_completed(futures):
+                area, category = futures[future]
+                try:
+                    result = future.result()
+                    if result is None:
+                        continue
+                    result["flaskSnapshot"] = {
+                        "recentAvg":           result.pop("_recentAvg"),
+                        "forecastAvg":         result.pop("_forecastAvg"),
+                        "sourceWindowDays":    source_window,
+                        "forecastHorizonDays": horizon,
+                        "generatedAt":         generated_at,
+                    }
+                    result["sourceWindowDays"] = source_window
+                    hotspots.append(result)
+                except Exception as e:
+                    errors.append({"area": area, "category": category, "error": str(e)})
+                    logger.warning("[hotspot] Prophet failed for %s/%s: %s", area, category, e)
+        ranked = sorted(hotspots, key=lambda x: x["riskScore"], reverse=True)
+        return jsonify({
+            "status":       "success",
+            "generated_at": generated_at,
+            "top_hotspots": ranked[:top_n],
+            "meta": {
+                "groups_evaluated":   len(groups),
+                "forecasts_computed": len(hotspots),
+                "error_count":        len(errors),
+                "errors":             errors,
+                "source_window_days": source_window,
+                "horizon_days":       horizon,
+            },
+        })
+    except Exception as e:
+        logger.exception("[hotspot-forecast] Unhandled exception")
+        return jsonify({"status": "failed", "message": str(e)}), 500
+# =========================================================
+# GLOBAL ERROR HANDLERS
+# =========================================================
+@app.errorhandler(413)
+def request_entity_too_large(e):
+    return jsonify({
+        "status":  "failed",
+        "code":    "payload_too_large",
+        "message": f"Upload exceeds the {app.config['MAX_CONTENT_LENGTH'] // (1024*1024)} MB limit.",
+    }), 413
+@app.errorhandler(404)
+def not_found(e):
+    return jsonify({"status": "failed", "code": "not_found", "message": "Endpoint not found."}), 404
+@app.errorhandler(405)
+def method_not_allowed(e):
+    return jsonify({"status": "failed", "code": "method_not_allowed", "message": "HTTP method not allowed."}), 405
+# =========================================================
+# RUN SERVER — Hugging Face Spaces uses port 7860
+# =========================================================
+if __name__ == "__main__":
+    port  = int(os.environ.get("PORT", 7860))
+    debug = os.environ.get("FLASK_DEBUG", "false").lower() == "true"
+    logger.info("🚀 Starting Multilingual Grievance API on port %d (debug=%s)", port, debug)
+    app.run(host="0.0.0.0", port=port, debug=debug, threaded=True)

multi_modal/audio_to_text.py ADDED Viewed

	@@ -0,0 +1,463 @@

+# =========================================================
+# multi_modal/audio_to_text.py
+#
+# Converts an uploaded audio file to text using Whisper.
+#
+# Supports: WAV, MP3, OGG, FLAC, M4A, WEBM (mobile browsers)
+# Languages: Telugu / Hindi / English (forced, no random scripts)
+#
+# FIXES vs previous version:
+#   1. Hallucination detection — Georgian/Chinese/Arabic output
+#      (ვვვვ... etc.) is detected and discarded, returns ""
+#   2. Language forcing — tries TE → HI → EN in order instead
+#      of pure auto-detect which picks random scripts
+#   3. Valid script check — only accepts Latin, Telugu,
+#      Devanagari output. Anything else = hallucination.
+#   4. 500 error fix — empty/invalid transcription now safely
+#      returns "" instead of passing garbage to BERT classifier
+# =========================================================
+import os
+import tempfile
+import unicodedata
+import torch
+import numpy as np
+from transformers import pipeline
+# ── Environment ────────────────────────────────────────────────────────────────
+_AUDIO_BACKEND = os.environ.get("AUDIO_BACKEND", "local")  # "local" | "hf_api"
+_HF_TOKEN      = os.environ.get("HF_TOKEN", "")
+# ── Model selection ────────────────────────────────────────────────────────────
+MODEL_ID = os.environ.get("WHISPER_MODEL", "openai/whisper-small")
+_DEVICE  = "cuda" if torch.cuda.is_available() else "cpu"
+# ── Valid Unicode scripts for EN / HI / TE ────────────────────────────────────
+# Whisper hallucinates Georgian (ვ), Chinese (的), Arabic (ال) on bad audio.
+# Only these script prefixes (from unicodedata.name) are accepted as real output.
+_VALID_SCRIPTS = {
+    "LATIN",        # English
+    "DEVANAGARI",   # Hindi
+    "TELUGU",       # Telugu
+    "COMMON",       # punctuation, digits, spaces
+}
+# Languages tried in order.
+# EN first — fastest for English audio (most common).
+# Only these 3 are permitted — no other language accepted.
+_LANGUAGE_ORDER = ["en", "te", "hi"]
+_ALLOWED_LANGUAGES = {"en", "te", "hi"}
+# Expected dominant script per forced language.
+# If we force "te" but get back Devanagari-heavy text, it is wrong.
+# If we force "hi" but get back Telugu-heavy text, it is wrong.
+# This prevents Telugu audio from being accepted as Hindi.
+_LANG_EXPECTED_SCRIPT = {
+    "en": {"LATIN"},
+    "te": {"TELUGU"},
+    "hi": {"DEVANAGARI"},
+}
+# ── Load Whisper ONCE at import time ──────────────────────────────────────────
+if _AUDIO_BACKEND == "local":
+    print(f"🔄 Loading Whisper '{MODEL_ID}' on {_DEVICE}…")
+    _ASR_PIPELINE = pipeline(
+        task   = "automatic-speech-recognition",
+        model  = MODEL_ID,
+        device = _DEVICE,
+    )
+    print(f"✅ Whisper '{MODEL_ID}' loaded.")
+else:
+    _ASR_PIPELINE = None
+    print(f"ℹ️  Whisper skipped — using HF API backend.")
+# ─────────────────────────────────────────────────────────────────────────────
+# HALLUCINATION DETECTION
+# ─────────────────────────────────────────────────────────────────────────────
+def _is_valid_transcription(text: str) -> bool:
+    """
+    Returns True only if the transcription looks like real speech.
+    Checks:
+      1. Script check  -- must be mostly Latin / Devanagari / Telugu
+      2. Repetition check -- rejects looping hallucinations like
+         "apne apne apne apne..." where a word repeats 5+ times
+    """
+    if not text or len(text.strip()) < 3:
+        return False
+    chars = [c for c in text if not c.isspace()]
+    if not chars:
+        return False
+    # Check 1: Script validation
+    valid_count = 0
+    for c in chars:
+        try:
+            char_name = unicodedata.name(c, "")
+            script    = char_name.split()[0] if char_name else "UNKNOWN"
+            if script in _VALID_SCRIPTS:
+                valid_count += 1
+        except Exception:
+            pass
+    ratio = valid_count / len(chars)
+    if ratio < 0.60:
+        print(f"[audio_to_text] WARNING script hallucination "
+              f"(valid_ratio={ratio:.2f}) discarding: {text[:60]!r}")
+        return False
+    # Check 2: Repetition detection
+    # "apne apne apne apne apne apne..." = Whisper looping hallucination
+    words = text.strip().split()
+    if len(words) >= 6:
+        # Max consecutive repeated word
+        max_repeat = 1
+        cur_repeat = 1
+        for i in range(1, len(words)):
+            if words[i].lower() == words[i - 1].lower():
+                cur_repeat += 1
+                max_repeat = max(max_repeat, cur_repeat)
+            else:
+                cur_repeat = 1
+        if max_repeat >= 5:
+            print(f"[audio_to_text] WARNING repetition hallucination "
+                  f"(word repeats {max_repeat}x) discarding: {text[:60]!r}")
+            return False
+        # Low vocabulary diversity = looping hallucination
+        # "I love you. I love you..." = 3 unique / 15 words = 0.20 unique ratio
+        # Real speech always has more variety — threshold: <0.15 for longer texts
+        unique_ratio = len(set(w.lower() for w in words)) / len(words)
+        if unique_ratio < 0.15 and len(words) > 15:
+            print(f"[audio_to_text] WARNING low-diversity hallucination "
+                  f"(unique_ratio={unique_ratio:.2f}) discarding: {text[:60]!r}")
+            return False
+    # Check 3: Character-level repetition — catches "अग्वावावावाव..." patterns
+    # where substrings repeat at character level (not caught by word check)
+    if len(text) > 20:
+        # Take a 4-char ngram from position 10 and count how many times it appears
+        probe     = text[8:12]
+        rep_count = text.count(probe)
+        if rep_count > len(text) // 8:   # appears more than once per 8 chars = looping
+            print(f"[audio_to_text] WARNING char-level repetition "
+                  f"(probe {probe!r} repeats {rep_count}x) discarding: {text[:60]!r}")
+            return False
+    return True
+# ─────────────────────────────────────────────────────────────────────────────
+# PUBLIC API
+# ─────────────────────────────────────────────────────────────────────────────
+def transcribe_audio(audio_file) -> str:
+    """
+    Transcribe an uploaded audio file to text.
+    Parameters
+    ----------
+    audio_file : werkzeug.datastructures.FileStorage
+        File from Flask request.files["audio"].
+        Accepts WAV, MP3, OGG, FLAC, M4A, WEBM.
+    Returns
+    -------
+    str
+        Transcribed text in EN / HI / TE.
+        Returns "" on failure or hallucination — never raises.
+    """
+    if _AUDIO_BACKEND == "hf_api" and _HF_TOKEN:
+        return _transcribe_via_hf_api(audio_file)
+    return _transcribe_local(audio_file)
+# ─────────────────────────────────────────────────────────────────────────────
+# LOCAL PATH
+# ─────────────────────────────────────────────────────────────────────────────
+def _transcribe_local(audio_file) -> str:
+    try:
+        audio_bytes = audio_file.read()
+        if not audio_bytes:
+            print("[audio_to_text] ⚠️  Empty audio file.")
+            return ""
+        suffix = _get_suffix(audio_file)
+        # Write to temp file — pydub needs a file path on disk
+        with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
+            tmp.write(audio_bytes)
+            tmp_path = tmp.name
+        try:
+            audio_array, sample_rate = _load_audio(tmp_path, suffix)
+        finally:
+            try:
+                os.unlink(tmp_path)
+            except OSError:
+                pass
+        if audio_array is None:
+            print("[audio_to_text] ❌ Could not decode audio — is ffmpeg installed?")
+            return ""
+        # ── Audio quality diagnostics ──────────────────────────────────────────
+        duration_sec = len(audio_array) / 16_000
+        rms          = float(np.sqrt(np.mean(audio_array ** 2)))
+        peak         = float(np.max(np.abs(audio_array)))
+        print(f"[audio_to_text] 🔍 duration={duration_sec:.1f}s | rms={rms:.4f} | peak={peak:.4f}")
+        # Reject silent or extremely quiet audio — Whisper hallucinates on silence
+        if rms < 0.001:
+            print("[audio_to_text] ❌ Audio is silent (rms<0.001) — nothing to transcribe")
+            return ""
+        if duration_sec < 0.5:
+            print(f"[audio_to_text] ❌ Audio too short ({duration_sec:.2f}s) — minimum 0.5s")
+            return ""
+        # ── Try EN → TE → HI — never pure auto-detect ─────────────────────────
+        # language=None causes Whisper to hallucinate Georgian/Chinese on bad audio.
+        # Forcing each language and validating the output is far more reliable.
+        #
+        # IMPORTANT: the pipeline mutates the input dict internally on the first
+        # call, so subsequent calls receive a broken dict. Fix: rebuild it fresh
+        # for every language attempt using a copy of the original numpy array.
+        audio_array_copy = audio_array.copy()
+        for lang in _LANGUAGE_ORDER:
+            try:
+                # Fresh dict every iteration — never reuse across pipeline calls
+                audio_input = {"raw": audio_array_copy.copy(), "sampling_rate": 16_000}
+                result = _ASR_PIPELINE(
+                    audio_input,
+                    generate_kwargs={
+                        "language": lang,
+                        "task":     "transcribe",
+                        # temperature and compression_ratio_threshold cause a
+                        # 'logprobs' bug in some transformers versions — removed.
+                        # Hallucination is handled by our own validator instead.
+                    },
+                    return_timestamps=False,
+                )
+                text = result.get("text", "").strip()
+                if not text:
+                    print(f"[audio_to_text] ↩️  lang={lang} -> empty, trying next")
+                    continue
+                # Strict language whitelist — only EN / HI / TE accepted.
+                # Whisper sometimes returns text in a completely different language
+                # even when forced (e.g. forced TE returns Khmer). Detect this by
+                # checking the detected_language field when available.
+                detected_lang = result.get("chunks", [{}])[0].get("language", lang) if isinstance(result.get("chunks"), list) else lang
+                if _is_valid_transcription(text):
+                    # Extra check: does the output script match the forced language?
+                    # Whisper-small often outputs Hindi (Devanagari) when forced to TE.
+                    # Reject if dominant script does not match expected script for lang.
+                    expected_scripts = _LANG_EXPECTED_SCRIPT.get(lang, None)
+                    if expected_scripts and lang != "en":
+                        chars = [c for c in text if not c.isspace()]
+                        script_counts = {}
+                        for c in chars:
+                            try:
+                                sc = unicodedata.name(c, "").split()[0]
+                                script_counts[sc] = script_counts.get(sc, 0) + 1
+                            except Exception:
+                                pass
+                        dominant = max(script_counts, key=script_counts.get) if script_counts else "UNKNOWN"
+                        if dominant not in expected_scripts and dominant not in ("COMMON", "LATIN"):
+                            print(f"[audio_to_text] script mismatch: forced {lang} but got {dominant} — trying next")
+                            continue
+                    print(f"[audio_to_text] OK lang={lang} | "
+                          f"{len(text)} chars: {text[:100]}")
+                    return text
+                else:
+                    print(f"[audio_to_text] lang={lang} hallucinated — trying next")
+                    continue
+            except Exception as e:
+                print(f"[audio_to_text] ❌ lang={lang} error: {e}")
+                continue
+        print("[audio_to_text] ❌ All language attempts failed — returning empty")
+        return ""
+    except Exception as e:
+        print(f"[audio_to_text] ❌ Transcription failed: {e}")
+        return ""
+def _load_audio(file_path: str, suffix: str):
+    """
+    Load audio file as float32 numpy array at 16 kHz mono.
+    Strategy:
+      1. pydub  — handles MP3, OGG, WEBM, M4A, WAV, FLAC (needs ffmpeg)
+      2. soundfile fallback — WAV and FLAC only (no ffmpeg needed)
+    Returns (audio_array, 16000) or (None, None) on failure.
+    """
+    # ── pydub (primary) ────────────────────────────────────────────────────────
+    try:
+        from pydub import AudioSegment
+        fmt     = suffix.lstrip(".").lower()
+        fmt_map = {"m4a": "mp4", "webm": "webm", "ogg": "ogg"}
+        fmt     = fmt_map.get(fmt, fmt)
+        audio_seg = AudioSegment.from_file(file_path, format=fmt)
+        audio_seg = audio_seg.set_channels(1).set_frame_rate(16_000)
+        samples   = np.array(audio_seg.get_array_of_samples(), dtype=np.float32)
+        # Normalize based on actual sample width — pydub can return int16 OR int32
+        # depending on source format. Always normalize to float32 [-1.0, 1.0]
+        sample_width = audio_seg.sample_width  # bytes per sample: 1=8bit, 2=16bit, 4=32bit
+        max_val      = float(2 ** (8 * sample_width - 1))
+        samples      = samples / max_val
+        # Safety clamp — should already be in range but guard against edge cases
+        samples      = np.clip(samples, -1.0, 1.0)
+        print(f"[audio_to_text] pydub decoded: sample_width={sample_width}B "
+              f"max_val={max_val:.0f} post_rms={float(np.sqrt(np.mean(samples**2))):.4f}")
+        return samples, 16_000
+    except ImportError:
+        print("[audio_to_text] pydub not installed — falling back to soundfile")
+        print("                pip install pydub  +  install ffmpeg")
+    except Exception as e:
+        print(f"[audio_to_text] pydub failed ({e}) — trying soundfile")
+    # ── soundfile (fallback — WAV/FLAC only) ───────────────────────────────────
+    try:
+        import soundfile as sf
+        audio_array, sample_rate = sf.read(file_path, dtype="float32")
+        if audio_array.ndim > 1:
+            audio_array = audio_array.mean(axis=1)
+        if sample_rate != 16_000:
+            audio_array = _resample(audio_array, sample_rate, 16_000)
+        return audio_array, 16_000
+    except Exception as e:
+        print(f"[audio_to_text] soundfile failed: {e}")
+        return None, None
+# ─────────────────────────────────────────────────────────────────────────────
+# HF API PATH — production / HF Spaces
+# ─────────────────────────────────────────────────────────────────────────────
+def _transcribe_via_hf_api(audio_file) -> str:
+    """
+    Production path — HuggingFace Inference API (whisper-large-v3 on HF GPU).
+    Set AUDIO_BACKEND=hf_api and HF_TOKEN=hf_xxx in HF Space Secrets.
+    Why large-v3 via API instead of loading locally:
+      - large-v3 = 3GB — too large to load on free HF Spaces
+      - HF API runs it on GPU — faster than local CPU anyway (~15-30s vs 3min)
+      - Free tier: 1000 requests/day — enough for a civic portal
+    large-v3 auto-detect is accurate enough for EN/TE/HI — no need for
+    the 3-attempt language loop used in local path.
+    """
+    import requests
+    try:
+        audio_bytes = audio_file.read()
+        if not audio_bytes:
+            return ""
+        print(f"[audio_to_text] HF API: sending {len(audio_bytes)} bytes to whisper-large-v3...")
+        # First attempt: auto-detect language (large-v3 is accurate enough)
+        res = requests.post(
+            "https://api-inference.huggingface.co/models/openai/whisper-large-v3",
+            headers = {"Authorization": f"Bearer {_HF_TOKEN}"},
+            data    = audio_bytes,
+            timeout = 120,   # HF free tier can queue up to 60s before processing
+        )
+        # Handle model loading (HF cold start)
+        if res.status_code == 503:
+            import time
+            print("[audio_to_text] HF API: model loading — waiting 20s...")
+            time.sleep(20)
+            res = requests.post(
+                "https://api-inference.huggingface.co/models/openai/whisper-large-v3",
+                headers = {"Authorization": f"Bearer {_HF_TOKEN}"},
+                data    = audio_bytes,
+                timeout = 120,
+            )
+        if res.ok:
+            data = res.json()
+            # HF API returns {"text": "..."} or [{"generated_text": "..."}]
+            if isinstance(data, dict):
+                text = data.get("text", "").strip()
+            elif isinstance(data, list) and data:
+                text = data[0].get("generated_text", "").strip()
+            else:
+                text = ""
+            if _is_valid_transcription(text):
+                print(f"[audio_to_text] HF API OK: {len(text)} chars: {text[:100]}")
+                return text
+            else:
+                print(f"[audio_to_text] HF API hallucination discarded: {text[:60]!r}")
+                return ""
+        else:
+            print(f"[audio_to_text] HF API error {res.status_code}: {res.text[:300]}")
+            return ""
+    except requests.exceptions.Timeout:
+        print("[audio_to_text] HF API timeout — model may be overloaded")
+        return ""
+    except Exception as e:
+        print(f"[audio_to_text] HF API exception: {e}")
+        return ""
+# ─────────────────────────────────────────────────────────────────────────────
+# HELPERS
+# ─────────────────────────────────────────────────────────────────────────────
+def _get_suffix(audio_file) -> str:
+    """Determine file extension from FileStorage. Defaults to .webm."""
+    filename = getattr(audio_file, "filename", "") or ""
+    mime     = getattr(audio_file, "mimetype",  "") or ""
+    _MIME_TO_EXT = {
+        "audio/wav":   ".wav",  "audio/x-wav": ".wav",  "audio/wave": ".wav",
+        "audio/mpeg":  ".mp3",  "audio/mp3":   ".mp3",
+        "audio/ogg":   ".ogg",
+        "audio/flac":  ".flac", "audio/x-flac": ".flac",
+        "audio/mp4":   ".m4a",  "audio/x-m4a":  ".m4a",
+        "audio/webm":  ".webm", "video/webm":   ".webm",
+    }
+    if "." in filename:
+        return "." + filename.rsplit(".", 1)[-1].lower()
+    # Default to .webm — Chrome/Edge MediaRecorder always sends webm
+    return _MIME_TO_EXT.get(mime.lower(), ".webm")
+def _resample(audio: np.ndarray, orig_sr: int, target_sr: int) -> np.ndarray:
+    """Resample audio array from orig_sr to target_sr."""
+    try:
+        from scipy.signal import resample_poly
+        from math import gcd
+        g = gcd(orig_sr, target_sr)
+        return resample_poly(audio, target_sr // g, orig_sr // g).astype(np.float32)
+    except ImportError:
+        target_length = int(len(audio) * target_sr / orig_sr)
+        return np.interp(
+            np.linspace(0, len(audio) - 1, target_length),
+            np.arange(len(audio)),
+            audio,
+        ).astype(np.float32)

multi_modal/image_to_text.py ADDED Viewed

	@@ -0,0 +1,346 @@

+# =========================================================
+# multi_modal/image_to_text.py
+#
+# Converts an uploaded image into grievance text for BERT.
+#
+# Labels: Electricity | Garbage | Pollution | Public Transport |
+#         Roads | Sanitation | Stray Animals | Water
+#
+# OUTPUT RULE:
+#   Only raw observed text from OCR + BLIP caption.
+#   No predefined phrases, no templates, no appended context.
+#   BERT classifier must receive unbiased descriptive text.
+#
+# PIPELINE:
+#   Step 1 — Preprocess (sharpen, contrast, resize)
+#   Step 2 — EasyOCR   (visible text in EN/HI/TE)
+#   Step 3 — BLIP-base, 5 civic prompts, best-of-5 by keyword score
+#   Step 4 — Clean fusion: OCR + caption, no added words
+#
+# FOR RENDER:
+#   Set IMAGE_BACKEND=hf_api + HF_TOKEN=hf_xxx in .env
+# =========================================================
+import io
+import os
+import re
+import torch
+import numpy as np
+from PIL import Image, ImageFilter, ImageEnhance
+# ── Environment ────────────────────────────────────────────────────────────────
+_BACKEND  = os.environ.get("IMAGE_BACKEND", "local")
+_HF_TOKEN = os.environ.get("HF_TOKEN", "")
+_DEVICE   = "cuda" if torch.cuda.is_available() else "cpu"
+# ── Lazy model handles ─────────────────────────────────────────────────────────
+_ocr_reader     = None
+_blip_processor = None
+_blip_model     = None
+# ── Five civic prompts for BLIP ────────────────────────────────────────────────
+# Steers BLIP toward civic observation language.
+# Best-scoring caption across all 5 is selected.
+_CIVIC_PROMPTS = [
+    "a civic grievance showing",
+    "a public infrastructure problem showing",
+    "a sanitation or garbage problem showing",
+    "a water or drainage problem showing",
+    "a road or footpath damage showing",
+]
+# ── Civic keyword set — for scoring captions only, never appended to output ───
+_CIVIC_KEYWORDS = {
+    "garbage", "waste", "trash", "litter", "dumped", "overflowing", "filth",
+    "sewage", "drain", "clog", "smell", "foul", "unhygienic", "sanitation",
+    "pothole", "road", "damaged", "broken", "crack", "footpath", "pavement",
+    "accident", "vehicle", "commuter", "traffic",
+    "water", "flood", "waterlog", "overflow", "leak", "pipe", "supply",
+    "stagnant", "puddle", "inundated",
+    "electricity", "wire", "pole", "streetlight", "cable", "spark", "fallen",
+    "pollution", "smoke", "dust", "emission", "contamination",
+    "animal", "stray", "dog", "cattle", "menace",
+    "transport", "bus", "auto", "road", "signal",
+    "hazard", "risk", "danger", "health", "resident", "street", "public",
+    "municipal", "colony", "area", "locality", "civic", "problem",
+    "issue", "blocked", "accumulated", "piled", "scattered",
+}
+# ─────────────────────────────────────────────────────────────────────────────
+# PUBLIC API
+# ─────────────────────────────────────────────────────────────────────────────
+def extract_text_from_image(image_bytes: bytes) -> str:
+    """
+    Convert raw image bytes into clean descriptive grievance text.
+    Output is purely what the image contains — no predefined phrases.
+    BERT classifier receives unbiased input.
+    Parameters
+    ----------
+    image_bytes : bytes
+        Raw bytes from Flask request.files["image"].read()
+    Returns
+    -------
+    str
+        Clean observed description e.g.:
+          "garbage dumped on the road near residential area"
+          "pothole on the main road"
+          "water supply pipeline broken leaking on street"
+          "stray dogs near garbage pile"
+        Returns "" on total failure (never raises).
+    """
+    if not image_bytes:
+        return ""
+    if _BACKEND == "hf_api" and _HF_TOKEN:
+        return _extract_via_hf_api(image_bytes)
+    return _extract_local(image_bytes)
+# ─────────────────────────────────────────────────────────────────────────────
+# STEP 1 — PREPROCESSING
+# ─────────────────────────────────────────────────────────────────────────────
+def _preprocess_image(image_bytes: bytes) -> Image.Image:
+    """
+    Prepare image for best BLIP + OCR accuracy.
+    - RGB conversion
+    - Resize: long edge capped at 1024px
+    - UnsharpMask: recovers blurry phone shots
+    - Contrast +20%: helps BLIP detect civic features
+    """
+    img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+    w, h = img.size
+    if max(w, h) > 1024:
+        scale = 1024 / max(w, h)
+        img   = img.resize((int(w * scale), int(h * scale)), Image.LANCZOS)
+    img = img.filter(ImageFilter.UnsharpMask(radius=1.5, percent=120, threshold=3))
+    img = ImageEnhance.Contrast(img).enhance(1.2)
+    return img
+# ─────────────────────────────────────────────────────────────────────────────
+# STEP 2 — OCR
+# ─────────────────────────────────────────────────────────────────────────────
+def _load_ocr():
+    global _ocr_reader
+    if _ocr_reader is not None:
+        return None if _ocr_reader == "unavailable" else _ocr_reader
+    try:
+        import easyocr
+        print("🔄 Loading EasyOCR (EN + HI + TE)…")
+        _ocr_reader = easyocr.Reader(
+            ["en", "hi", "te"],
+            gpu=torch.cuda.is_available(),
+            verbose=False,
+        )
+        print("✅ EasyOCR loaded.")
+    except ImportError:
+        print("⚠️  easyocr not installed — run: pip install easyocr")
+        _ocr_reader = "unavailable"
+    except Exception as e:
+        print(f"⚠️  EasyOCR load error: {e}")
+        _ocr_reader = "unavailable"
+    return None if _ocr_reader == "unavailable" else _ocr_reader
+def _run_ocr(img: Image.Image) -> str:
+    """Extract visible text from image. Returns '' if nothing meaningful."""
+    try:
+        reader = _load_ocr()
+        if reader is None:
+            return ""
+        img_np  = np.array(img)
+        results = reader.readtext(img_np, detail=0, paragraph=True)
+        text    = " ".join(results).strip()
+        return text if len(text) >= 6 else ""
+    except Exception as e:
+        print(f"[image_to_text] OCR error: {e}")
+        return ""
+# ─────────────────────────────────────────────────────────────────────────────
+# STEP 3 — BLIP MULTI-PROMPT CAPTIONING
+# ─────────────────────────────────────────────────────────────────────────────
+def _load_blip():
+    global _blip_processor, _blip_model
+    if _blip_model is not None:
+        return (None, None) if _blip_model == "unavailable" else (_blip_processor, _blip_model)
+    try:
+        from transformers import BlipProcessor, BlipForConditionalGeneration
+        print("🔄 Loading BLIP-base captioning model (~450 MB)…")
+        _blip_processor = BlipProcessor.from_pretrained(
+            "Salesforce/blip-image-captioning-base"
+        )
+        _blip_model = BlipForConditionalGeneration.from_pretrained(
+            "Salesforce/blip-image-captioning-base"
+        ).to(_DEVICE)
+        _blip_model.eval()
+        print("✅ BLIP-base loaded.")
+    except Exception as e:
+        print(f"⚠️  BLIP load error: {e}")
+        _blip_model = "unavailable"
+    return (None, None) if _blip_model == "unavailable" else (_blip_processor, _blip_model)
+def _score_caption(caption: str) -> int:
+    """Count civic keywords in caption. Used for selection only — never added to output."""
+    words = set(re.findall(r'\b\w+\b', caption.lower()))
+    return len(words & _CIVIC_KEYWORDS)
+def _run_blip_multi_prompt(img: Image.Image) -> str:
+    """
+    Run BLIP with 5 civic prompts.
+    Returns the caption with the highest civic keyword score.
+    Raw caption only — no extra words added.
+    """
+    processor, model = _load_blip()
+    if model is None:
+        return ""
+    best_caption = ""
+    best_score   = -1
+    for prompt in _CIVIC_PROMPTS:
+        try:
+            inputs = processor(
+                img,
+                text           = prompt,
+                return_tensors = "pt",
+            ).to(_DEVICE)
+            with torch.no_grad():
+                output = model.generate(
+                    **inputs,
+                    max_new_tokens       = 60,
+                    num_beams            = 5,
+                    early_stopping       = True,
+                    no_repeat_ngram_size = 3,
+                )
+            caption = processor.decode(output[0], skip_special_tokens=True).strip()
+            # Skip if model just echoed the prompt with no new content
+            if len(caption) <= len(prompt) + 5:
+                continue
+            score = _score_caption(caption)
+            if score > best_score:
+                best_score   = score
+                best_caption = caption
+        except Exception as e:
+            print(f"[image_to_text] Prompt failed: {e}")
+            continue
+    # Unconditional fallback
+    if not best_caption:
+        try:
+            inputs = processor(img, return_tensors="pt").to(_DEVICE)
+            with torch.no_grad():
+                output = model.generate(
+                    **inputs,
+                    max_new_tokens       = 60,
+                    num_beams            = 5,
+                    no_repeat_ngram_size = 3,
+                )
+            best_caption = processor.decode(output[0], skip_special_tokens=True).strip()
+        except Exception as e:
+            print(f"[image_to_text] Unconditional fallback failed: {e}")
+    return best_caption
+# ─────────────────────────────────────────────────────────────────────────────
+# STEP 4 — CLEAN FUSION (no predefined phrases added)
+# ─────────────────────────────────────────────────────────────────────────────
+def _is_redundant(text_a: str, text_b: str) -> bool:
+    """True if text_a is >60% word-overlap with text_b (already covered)."""
+    words_a = set(text_a.lower().split())
+    words_b = set(text_b.lower().split())
+    if not words_a:
+        return True
+    return len(words_a & words_b) / len(words_a) > 0.6
+def _fuse(ocr_text: str, caption: str) -> str:
+    """
+    Combine OCR + caption into a single clean string.
+    Rules (no predefined text added at any point):
+      OCR > 20 chars  → OCR is primary (it's the actual complaint text)
+                         Caption appended only if it adds new information
+      OCR short/none  → Caption is the output, as-is from BLIP
+      Both empty      → return ""
+    """
+    ocr     = ocr_text.strip()
+    caption = caption.strip()
+    if len(ocr) > 20:
+        # OCR has the real complaint text — use it directly
+        if caption and not _is_redundant(caption, ocr):
+            return f"{ocr}. {caption}"
+        return ocr
+    if caption:
+        # Pure photo — BLIP caption is the output, nothing added
+        if ocr and not _is_redundant(ocr, caption):
+            return f"{caption}. {ocr}"
+        return caption
+    return ocr or caption
+# ─────────────────────────────────────────────────────────────────────────────
+# LOCAL PIPELINE
+# ─────────────────────────────────────────────────────────────────────────────
+def _extract_local(image_bytes: bytes) -> str:
+    try:
+        img      = _preprocess_image(image_bytes)
+        ocr_text = _run_ocr(img)
+        caption  = _run_blip_multi_prompt(img)
+        if ocr_text:
+            print(f"[image_to_text] OCR: {ocr_text[:100]}")
+        if caption:
+            print(f"[image_to_text] Caption (score={_score_caption(caption)}): {caption[:100]}")
+        result = _fuse(ocr_text, caption)
+        print(f"[image_to_text] ✅ Output: {result[:160]}")
+        return result
+    except Exception as e:
+        print(f"[image_to_text] ❌ Pipeline failed: {e}")
+        return ""
+# ─────────────────────────────────────────────────────────────────────────────
+# HF API PATH — Render / production
+# ─────────────────────────────────────────────────────────────────────────────
+def _extract_via_hf_api(image_bytes: bytes) -> str:
+    """
+    Production path — HuggingFace Inference API.
+    Raw caption returned as-is. No predefined text added.
+    """
+    import requests
+    try:
+        res = requests.post(
+            "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large",
+            headers = {"Authorization": f"Bearer {_HF_TOKEN}"},
+            data    = image_bytes,
+            timeout = 30,
+        )
+        if res.ok:
+            data    = res.json()
+            caption = data[0].get("generated_text", "").strip() if isinstance(data, list) else ""
+            print(f"[image_to_text] ✅ HF API output: {caption[:160]}")
+            return caption
+    except Exception as e:
+        print(f"[image_to_text] HF API error: {e}")
+    return ""

requirements.txt ADDED Viewed

	@@ -0,0 +1,50 @@

+# ── Core ML / DL ──────────────────────────────────────────────
+torch
+transformers==5.2.0
+tokenizers==0.22.2
+accelerate>=1.1.0
+safetensors>=0.4.3
+huggingface-hub>=1.3.0
+# ── Audio ──────────────────────────────────────────────────────
+pydub
+soundfile
+scipy
+# ── Image ──────────────────────────────────────────────────────
+Pillow
+easyocr
+opencv-python-headless
+# ── NLP / Text ─────────────────────────────────────────────────
+sentencepiece
+tiktoken
+protobuf>=5.28.0
+regex
+nltk
+indic-nlp-library
+stopwordsiso
+# ── Explainability ─────────────────────────────────────────────
+captum
+shap>=0.44
+# ── Forecasting ────────────────────────────────────────────────
+prophet
+# ── Data / ML ──────────────────────────────────────────────────
+pandas
+numpy
+scikit-learn
+matplotlib
+seaborn
+# ── Backend ────────────────────────────────────────────────────
+# Flask only — MongoDB + Cloudinary are handled by Express/Node
+flask
+flask-cors
+gunicorn
+werkzeug
+python-dotenv
+requests==2.32.3
+python-dotenv==1.0.1

sentiment_analysis/artifacts/indic_urgency_model/config.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "architectures": [
+    "AlbertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0,
+  "bos_token_id": 2,
+  "classifier_dropout_prob": 0.1,
+  "down_scale_factor": 1,
+  "dtype": "float32",
+  "embedding_size": 128,
+  "eos_token_id": 3,
+  "gap_size": 0,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3"
+  },
+  "initializer_range": 0.02,
+  "inner_group_num": 1,
+  "intermediate_size": 3072,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2,
+    "LABEL_3": 3
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "albert",
+  "net_structure_type": 0,
+  "num_attention_heads": 12,
+  "num_hidden_groups": 1,
+  "num_hidden_layers": 12,
+  "num_memory_blocks": 0,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "tie_word_embeddings": true,
+  "transformers_version": "5.1.0",
+  "type_vocab_size": 2,
+  "use_cache": false,
+  "vocab_size": 200000
+}

sentiment_analysis/artifacts/indic_urgency_model/label_encoder.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:693a9eaa2ea5336e580da8eb27a85d30bb0e2100184c6a491f5d60f5df14abf7
+size 275

sentiment_analysis/artifacts/indic_urgency_model/label_map.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d80d3aacf4a3ee5cabfa43c871d944fce68e1e4602d2154417d1c4ca3899edf7
+size 194

sentiment_analysis/artifacts/indic_urgency_model/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c5eb8a6dd044987f78c003f910440efb162c76d3b780ff2c0026c19158fac2df
+size 14969267

sentiment_analysis/artifacts/indic_urgency_model/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "add_prefix_space": true,
+  "backend": "tokenizers",
+  "bos_token": "[CLS]",
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "eos_token": "[SEP]",
+  "extra_special_tokens": [
+    "<pad>",
+    "[CLS]",
+    "[SEP]",
+    "[MASK]"
+  ],
+  "is_local": false,
+  "keep_accents": false,
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<pad>",
+  "sep_token": "[SEP]",
+  "tokenizer_class": "AlbertTokenizer",
+  "trim_offsets": true,
+  "unk_id": 1,
+  "unk_token": "<unk>"
+}

sentiment_analysis/artifacts/urgency_bert_model/config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "add_cross_attention": false,
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": null,
+  "classifier_dropout": null,
+  "dtype": "float32",
+  "eos_token_id": null,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "is_decoder": false,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2,
+    "LABEL_3": 3
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "tie_word_embeddings": true,
+  "transformers_version": "5.1.0",
+  "type_vocab_size": 2,
+  "use_cache": false,
+  "vocab_size": 30522
+}

sentiment_analysis/artifacts/urgency_bert_model/label_encoder.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:693a9eaa2ea5336e580da8eb27a85d30bb0e2100184c6a491f5d60f5df14abf7
+size 275

sentiment_analysis/artifacts/urgency_bert_model/label_map.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d80d3aacf4a3ee5cabfa43c871d944fce68e1e4602d2154417d1c4ca3899edf7
+size 194

sentiment_analysis/artifacts/urgency_bert_model/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

sentiment_analysis/artifacts/urgency_bert_model/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "backend": "tokenizers",
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "is_local": false,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

sentiment_analysis/bert_model.py ADDED Viewed

	@@ -0,0 +1,268 @@

+# =========================================================
+# BERT URGENCY MODEL TRAINING
+# File: bert_model.py
+# Purpose: Train urgency prediction (Low, Medium, High, Critical)
+# =========================================================
+import os
+import re
+import pickle
+import pandas as pd
+import numpy as np
+import torch
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import LabelEncoder
+from sklearn.metrics import accuracy_score, f1_score, balanced_accuracy_score, matthews_corrcoef
+from transformers import (
+    BertTokenizer,
+    BertForSequenceClassification,
+    Trainer,
+    TrainingArguments
+)
+from torch.utils.data import Dataset
+# =========================================================
+# PATH CONFIGURATION
+# =========================================================
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+DATA_PATH = os.path.join(BASE_DIR, "urgency_train.csv")
+ARTIFACT_DIR = os.path.join(BASE_DIR, "artifacts")
+MODEL_DIR = os.path.join(ARTIFACT_DIR, "urgency_bert_model")
+os.makedirs(MODEL_DIR, exist_ok=True)
+# =========================================================
+# PARAMETERS
+# =========================================================
+MAX_LENGTH = 128
+EPOCHS = 4
+BATCH_SIZE = 16
+LEARNING_RATE = 2e-5
+# =========================================================
+# LOAD DATASET
+# =========================================================
+print(f"\nLoading dataset from: {DATA_PATH}")
+df = pd.read_csv(DATA_PATH)
+df = df[["text", "urgency"]]
+df.dropna(inplace=True)
+df.drop_duplicates(inplace=True)
+# =========================================================
+# CLEAN TEXT
+# =========================================================
+def clean_text(text):
+    text = str(text)
+    text = re.sub(r"<.*?>", " ", text)
+    text = re.sub(r"\s+", " ", text).strip()
+    return text
+df["text"] = df["text"].apply(clean_text)
+# =========================================================
+# LABEL ENCODING
+# =========================================================
+label_encoder = LabelEncoder()
+df["label_id"] = label_encoder.fit_transform(df["urgency"])
+label_map = dict(zip(
+    label_encoder.classes_,
+    label_encoder.transform(label_encoder.classes_)
+))
+# SAVE LABEL ARTIFACTS
+with open(os.path.join(MODEL_DIR, "label_encoder.pkl"), "wb") as f:
+    pickle.dump(label_encoder, f)
+with open(os.path.join(MODEL_DIR, "label_map.pkl"), "wb") as f:
+    pickle.dump(label_map, f)
+NUM_LABELS = len(label_map)
+print("Classes:", label_map)
+# =========================================================
+# SPLIT DATA
+# =========================================================
+train_df, temp_df = train_test_split(
+    df,
+    test_size=0.30,
+    stratify=df["label_id"],
+    random_state=42
+)
+val_df, test_df = train_test_split(
+    temp_df,
+    test_size=0.50,
+    stratify=temp_df["label_id"],
+    random_state=42
+)
+# =========================================================
+# TOKENIZER
+# =========================================================
+tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
+tokenizer.save_pretrained(MODEL_DIR)
+# =========================================================
+# DATASET CLASS
+# =========================================================
+class UrgencyDataset(Dataset):
+    def __init__(self, texts, labels):
+        self.encodings = tokenizer(
+            list(texts),
+            truncation=True,
+            padding=True,
+            max_length=MAX_LENGTH
+        )
+        self.labels = list(labels)
+    def __getitem__(self, idx):
+        item = {k: torch.tensor(v[idx]) for k, v in self.encodings.items()}
+        item["labels"] = torch.tensor(self.labels[idx])
+        return item
+    def __len__(self):
+        return len(self.labels)
+train_dataset = UrgencyDataset(train_df["text"], train_df["label_id"])
+val_dataset = UrgencyDataset(val_df["text"], val_df["label_id"])
+test_dataset = UrgencyDataset(test_df["text"], test_df["label_id"])
+# =========================================================
+# LOAD MODEL
+# =========================================================
+model = BertForSequenceClassification.from_pretrained(
+    "bert-base-uncased",
+    num_labels=NUM_LABELS
+)
+# =========================================================
+# METRICS
+# =========================================================
+def compute_metrics(eval_pred):
+    logits, labels = eval_pred
+    preds = np.argmax(logits, axis=1)
+    return {
+        "accuracy": accuracy_score(labels, preds),
+        "balanced_accuracy": balanced_accuracy_score(labels, preds),
+        "f1_weighted": f1_score(labels, preds, average="weighted"),
+        "mcc": matthews_corrcoef(labels, preds)
+    }
+# =========================================================
+# TRAINING CONFIG
+# =========================================================
+training_args = TrainingArguments(
+    output_dir=os.path.join(ARTIFACT_DIR, "results"),
+    learning_rate=LEARNING_RATE,
+    per_device_train_batch_size=BATCH_SIZE,
+    per_device_eval_batch_size=BATCH_SIZE,
+    num_train_epochs=EPOCHS,
+    weight_decay=0.01,
+    logging_steps=100,
+    save_strategy="no",
+    report_to="none"
+)
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=train_dataset,
+    eval_dataset=val_dataset,
+    compute_metrics=compute_metrics
+)
+print("\nTraining urgency BERT model...")
+trainer.train()
+# =========================================================
+# FINAL TEST EVALUATION
+# =========================================================
+predictions = trainer.predict(test_dataset)
+y_true = predictions.label_ids
+y_pred = np.argmax(predictions.predictions, axis=1)
+print("\nFINAL TEST RESULTS")
+print("Accuracy:", accuracy_score(y_true, y_pred))
+print("F1:", f1_score(y_true, y_pred, average="weighted"))
+# =========================================================
+# SAVE MODEL
+# =========================================================
+trainer.save_model(MODEL_DIR)
+print("\nUrgency BERT model saved successfully.")

sentiment_analysis/bert_predict.py ADDED Viewed

	@@ -0,0 +1,82 @@

+# =========================================================
+# BERT URGENCY PREDICTION — ENGLISH
+# =========================================================
+import os
+import torch
+import pickle
+from transformers import BertTokenizer, BertForSequenceClassification
+# ── Load artifacts ────────────────────────────────────────
+BASE_DIR  = os.path.dirname(__file__)
+MODEL_DIR = os.path.join(BASE_DIR, "artifacts", "urgency_bert_model")
+tokenizer = BertTokenizer.from_pretrained(MODEL_DIR)
+model     = BertForSequenceClassification.from_pretrained(MODEL_DIR)
+label_encoder = pickle.load(
+    open(os.path.join(MODEL_DIR, "label_encoder.pkl"), "rb")
+)
+model.eval()
+MAX_LENGTH = 128
+# ── Predict ───────────────────────────────────────────────
+def predict_urgency(
+    text: str,
+    input_ids=None,       # O3: pre-tokenised tensor from main.py
+    attention_mask=None,  # O3: pre-tokenised tensor from main.py
+) -> dict:
+    """
+    Predict urgency level for English grievance text.
+    Args:
+        text           : Raw input string.
+        input_ids      : Optional pre-tokenised tensor (1, seq_len).
+        attention_mask : Required when input_ids is provided.
+    Returns dict with keys: urgency, confidence, class_index.
+    """
+    # O3: use pre-tokenised tensors if supplied; otherwise tokenise now.
+    if input_ids is None:
+        enc = tokenizer(
+            text,
+            return_tensors="pt",
+            truncation=True,
+            padding=False,
+            max_length=MAX_LENGTH,
+        )
+        input_ids      = enc["input_ids"]
+        attention_mask = enc["attention_mask"]
+    with torch.no_grad():
+        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
+    probs      = torch.softmax(outputs.logits, dim=1)
+    conf, pred = torch.max(probs, dim=1)
+    confidence      = conf.item()
+    predicted_index = pred.item()
+    urgency = label_encoder.inverse_transform([predicted_index])[0]
+    return {
+        "urgency":     urgency,
+        "confidence":  round(confidence, 4),
+        "class_index": predicted_index,
+    }
+def get_model_and_tokenizer():
+    return model, tokenizer
+# ── Standalone test ───────────────────────────────────────
+if __name__ == "__main__":
+    print("\nBERT Urgency Prediction Test")
+    while True:
+        text = input("\nEnter grievance (or 'exit'): ")
+        if text.lower() == "exit":
+            break
+        print(predict_urgency(text))

sentiment_analysis/indic_bert_model.py ADDED Viewed

	@@ -0,0 +1,260 @@

+# =========================================================
+# INDICBERT URGENCY MODEL TRAINING
+# File: indic_model.py
+# Supports: Hindi + Telugu urgency prediction
+# Labels: Low, Medium, High, Critical
+# =========================================================
+import os
+import re
+import pickle
+import pandas as pd
+import numpy as np
+import torch
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import LabelEncoder
+from sklearn.metrics import accuracy_score, f1_score, balanced_accuracy_score, matthews_corrcoef
+from transformers import (
+    AutoTokenizer,
+    AutoModelForSequenceClassification,
+    Trainer,
+    TrainingArguments
+)
+from torch.utils.data import Dataset
+# =========================================================
+# PATH CONFIG
+# =========================================================
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+DATA_PATH = os.path.join(BASE_DIR, "urgency_indic.csv")
+ARTIFACT_DIR = os.path.join(BASE_DIR, "artifacts")
+MODEL_DIR = os.path.join(ARTIFACT_DIR, "indic_urgency_model")
+os.makedirs(MODEL_DIR, exist_ok=True)
+# =========================================================
+# PARAMETERS
+# =========================================================
+MODEL_NAME = "ai4bharat/indic-bert"
+MAX_LENGTH = 128
+EPOCHS = 4
+BATCH_SIZE = 16
+LEARNING_RATE = 2e-5
+# =========================================================
+# LOAD DATASET
+# =========================================================
+print(f"\nLoading Indic urgency dataset from: {DATA_PATH}")
+df = pd.read_csv(DATA_PATH)
+df = df[["text", "urgency"]]
+df.dropna(inplace=True)
+df.drop_duplicates(inplace=True)
+# =========================================================
+# CLEAN TEXT
+# =========================================================
+def clean_text(text):
+    text = str(text)
+    text = re.sub(r"<.*?>", " ", text)
+    text = re.sub(r"\s+", " ", text).strip()
+    return text
+df["text"] = df["text"].apply(clean_text)
+# =========================================================
+# LABEL ENCODING
+# =========================================================
+label_encoder = LabelEncoder()
+df["label_id"] = label_encoder.fit_transform(df["urgency"])
+label_map = dict(zip(
+    label_encoder.classes_,
+    label_encoder.transform(label_encoder.classes_)
+))
+# SAVE LABEL ARTIFACTS
+with open(os.path.join(MODEL_DIR, "label_encoder.pkl"), "wb") as f:
+    pickle.dump(label_encoder, f)
+with open(os.path.join(MODEL_DIR, "label_map.pkl"), "wb") as f:
+    pickle.dump(label_map, f)
+NUM_LABELS = len(label_map)
+print("Classes:", label_map)
+# =========================================================
+# TRAIN / VAL / TEST SPLIT
+# =========================================================
+train_df, temp_df = train_test_split(
+    df,
+    test_size=0.30,
+    stratify=df["label_id"],
+    random_state=42
+)
+val_df, test_df = train_test_split(
+    temp_df,
+    test_size=0.50,
+    stratify=temp_df["label_id"],
+    random_state=42
+)
+# =========================================================
+# TOKENIZER
+# =========================================================
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+tokenizer.save_pretrained(MODEL_DIR)
+# =========================================================
+# DATASET CLASS
+# =========================================================
+class IndicUrgencyDataset(Dataset):
+    def __init__(self, texts, labels):
+        self.encodings = tokenizer(
+            list(texts),
+            truncation=True,
+            padding=True,
+            max_length=MAX_LENGTH
+        )
+        self.labels = list(labels)
+    def __getitem__(self, idx):
+        item = {
+            key: torch.tensor(val[idx])
+            for key, val in self.encodings.items()
+        }
+        item["labels"] = torch.tensor(self.labels[idx])
+        return item
+    def __len__(self):
+        return len(self.labels)
+train_dataset = IndicUrgencyDataset(train_df["text"], train_df["label_id"])
+val_dataset = IndicUrgencyDataset(val_df["text"], val_df["label_id"])
+test_dataset = IndicUrgencyDataset(test_df["text"], test_df["label_id"])
+# =========================================================
+# MODEL
+# =========================================================
+model = AutoModelForSequenceClassification.from_pretrained(
+    MODEL_NAME,
+    num_labels=NUM_LABELS
+)
+# =========================================================
+# METRICS
+# =========================================================
+def compute_metrics(eval_pred):
+    logits, labels = eval_pred
+    preds = np.argmax(logits, axis=1)
+    return {
+        "accuracy": accuracy_score(labels, preds),
+        "balanced_accuracy": balanced_accuracy_score(labels, preds),
+        "f1_weighted": f1_score(labels, preds, average="weighted"),
+        "mcc": matthews_corrcoef(labels, preds)
+    }
+# =========================================================
+# TRAINING CONFIG
+# =========================================================
+training_args = TrainingArguments(
+    output_dir=f"{ARTIFACT_DIR}/indic_results",
+    learning_rate=LEARNING_RATE,
+    per_device_train_batch_size=BATCH_SIZE,
+    per_device_eval_batch_size=BATCH_SIZE,
+    num_train_epochs=EPOCHS,
+    weight_decay=0.01,
+    logging_steps=100,
+    save_strategy="no",
+    report_to="none"
+)
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=train_dataset,
+    eval_dataset=val_dataset,
+    compute_metrics=compute_metrics
+)
+print("\nTraining IndicBERT urgency model...")
+trainer.train()
+# =========================================================
+# SAVE MODEL
+# =========================================================
+trainer.save_model(MODEL_DIR)
+print("\nIndicBERT urgency model saved successfully.")

sentiment_analysis/indic_bert_predict.py ADDED Viewed

	@@ -0,0 +1,89 @@

+# =========================================================
+# INDICBERT URGENCY PREDICTION — HINDI + TELUGU
+# =========================================================
+import os
+import re
+import torch
+import pickle
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+# ── Load artifacts ────────────────────────────────────────
+BASE_DIR  = os.path.dirname(os.path.abspath(__file__))
+MODEL_DIR = os.path.join(BASE_DIR, "artifacts", "indic_urgency_model")
+tokenizer     = AutoTokenizer.from_pretrained(MODEL_DIR)
+model         = AutoModelForSequenceClassification.from_pretrained(MODEL_DIR)
+model.eval()
+with open(os.path.join(MODEL_DIR, "label_encoder.pkl"), "rb") as f:
+    label_encoder = pickle.load(f)
+MAX_LENGTH = 128
+# ── Text cleaning ─────────────────────────────────────────
+def clean_text(text: str) -> str:
+    text = str(text)
+    text = re.sub(r"<.*?>", " ", text)
+    text = re.sub(r"\s+", " ", text).strip()
+    return text
+# ── Predict ───────────────────────────────────────────────
+def predict(
+    text: str,
+    input_ids=None,       # O3: pre-tokenised tensor from main.py
+    attention_mask=None,  # O3: pre-tokenised tensor from main.py
+) -> dict:
+    """
+    Predict urgency level for Hindi / Telugu grievance text.
+    Args:
+        text           : Raw input string.
+        input_ids      : Optional pre-tokenised tensor (1, seq_len).
+        attention_mask : Required when input_ids is provided.
+    Returns dict with keys: urgency, confidence, class_index.
+    """
+    # O3: use pre-tokenised tensors if supplied; otherwise tokenise now.
+    if input_ids is None:
+        cleaned = clean_text(text)
+        enc = tokenizer(
+            cleaned,
+            return_tensors="pt",
+            truncation=True,
+            padding=False,
+            max_length=MAX_LENGTH,
+        )
+        input_ids      = enc["input_ids"]
+        attention_mask = enc["attention_mask"]
+    with torch.no_grad():
+        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
+    probs      = torch.softmax(outputs.logits, dim=1)
+    conf, pred = torch.max(probs, dim=1)
+    confidence      = conf.item()
+    predicted_index = pred.item()
+    urgency = label_encoder.inverse_transform([predicted_index])[0]
+    return {
+        "urgency":     urgency,
+        "confidence":  round(confidence, 4),
+        "class_index": predicted_index,
+    }
+def get_model_and_tokenizer():
+    return model, tokenizer
+# ── Standalone test ───────────────────────────────────────
+if __name__ == "__main__":
+    while True:
+        text = input("\nEnter Hindi/Telugu grievance (or 'exit'): ")
+        if text.lower() == "exit":
+            break
+        print(predict(text))