Spaces:
Sleeping
Sleeping
Mohan Rao Boddu commited on
Commit Β·
1daba48
1
Parent(s): 736bc2b
Fix folder structure for HuggingFace
Browse files- .DS_Store +0 -0
- ml-backend-with-image/.DS_Store +0 -0
- ml-backend-with-image/Dockerfile +16 -0
- ml-backend-with-image/README.md +24 -0
- ml-backend-with-image/app/__init__.py +14 -0
- ml-backend-with-image/app/__pycache__/__init__.cpython-311.pyc +0 -0
- ml-backend-with-image/app/__pycache__/__init__.cpython-312.pyc +0 -0
- ml-backend-with-image/app/__pycache__/__init__.cpython-313.pyc +0 -0
- ml-backend-with-image/app/__pycache__/__init__.cpython-314.pyc +0 -0
- ml-backend-with-image/app/__pycache__/dataset.cpython-311.pyc +0 -0
- ml-backend-with-image/app/__pycache__/dataset.cpython-312.pyc +0 -0
- ml-backend-with-image/app/__pycache__/dataset.cpython-313.pyc +0 -0
- ml-backend-with-image/app/__pycache__/dataset.cpython-314.pyc +0 -0
- ml-backend-with-image/app/__pycache__/image_classifier.cpython-311.pyc +0 -0
- ml-backend-with-image/app/__pycache__/image_classifier.cpython-312.pyc +0 -0
- ml-backend-with-image/app/__pycache__/image_classifier.cpython-313.pyc +0 -0
- ml-backend-with-image/app/__pycache__/image_classifier.cpython-314.pyc +0 -0
- ml-backend-with-image/app/__pycache__/main.cpython-311.pyc +0 -0
- ml-backend-with-image/app/__pycache__/main.cpython-312.pyc +0 -0
- ml-backend-with-image/app/__pycache__/main.cpython-313.pyc +0 -0
- ml-backend-with-image/app/__pycache__/main.cpython-314.pyc +0 -0
- ml-backend-with-image/app/__pycache__/models.cpython-311.pyc +0 -0
- ml-backend-with-image/app/__pycache__/models.cpython-312.pyc +0 -0
- ml-backend-with-image/app/__pycache__/models.cpython-313.pyc +0 -0
- ml-backend-with-image/app/__pycache__/models.cpython-314.pyc +0 -0
- ml-backend-with-image/app/__pycache__/pipeline.cpython-311.pyc +0 -0
- ml-backend-with-image/app/__pycache__/pipeline.cpython-312.pyc +0 -0
- ml-backend-with-image/app/__pycache__/pipeline.cpython-313.pyc +0 -0
- ml-backend-with-image/app/__pycache__/pipeline.cpython-314.pyc +0 -0
- ml-backend-with-image/app/__pycache__/storage.cpython-311.pyc +0 -0
- ml-backend-with-image/app/__pycache__/storage.cpython-312.pyc +0 -0
- ml-backend-with-image/app/__pycache__/storage.cpython-313.pyc +0 -0
- ml-backend-with-image/app/__pycache__/storage.cpython-314.pyc +0 -0
- ml-backend-with-image/app/dataset.py +14 -0
- ml-backend-with-image/app/image_classifier.py +91 -0
- ml-backend-with-image/app/main.py +21 -0
- ml-backend-with-image/app/models.py +20 -0
- ml-backend-with-image/app/pipeline.py +282 -0
- ml-backend-with-image/app/storage.py +64 -0
- ml-backend-with-image/data/dataset.jsonl +106 -0
- ml-backend-with-image/debug_profanity.py +114 -0
- ml-backend-with-image/requirements.txt +9 -0
- ml-backend-with-image/test_classification.py +93 -0
- ml-backend-with-image/test_profanity.py +84 -0
.DS_Store
ADDED
|
Binary file (8.2 kB). View file
|
|
|
ml-backend-with-image/.DS_Store
ADDED
|
Binary file (8.2 kB). View file
|
|
|
ml-backend-with-image/Dockerfile
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
| 2 |
+
# you will also find guides on how best to write your Dockerfile
|
| 3 |
+
|
| 4 |
+
FROM python:3.9
|
| 5 |
+
|
| 6 |
+
RUN useradd -m -u 1000 user
|
| 7 |
+
USER user
|
| 8 |
+
ENV PATH="/home/user/.local/bin:$PATH"
|
| 9 |
+
|
| 10 |
+
WORKDIR /app
|
| 11 |
+
|
| 12 |
+
COPY --chown=user ./requirements.txt requirements.txt
|
| 13 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 14 |
+
|
| 15 |
+
COPY --chown=user . /app
|
| 16 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
ml-backend-with-image/README.md
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Civic ML Backend (Full: pHash + Location)
|
| 2 |
+
|
| 3 |
+
Overview:
|
| 4 |
+
|
| 5 |
+
- FastAPI backend that accepts citizen reports and performs:
|
| 6 |
+
- rule-based abuse detection
|
| 7 |
+
- text duplicate detection
|
| 8 |
+
- image duplicate detection using pHash (imagehash)
|
| 9 |
+
- location-based duplicate detection using Haversine formula
|
| 10 |
+
- image classification using CLIP (if available) with URL fallback
|
| 11 |
+
- priority detection (keyword-based)
|
| 12 |
+
|
| 13 |
+
Run:
|
| 14 |
+
|
| 15 |
+
1. python -m venv .venv
|
| 16 |
+
2. source .venv/bin/activate # Windows: .venv\Scripts\Activate
|
| 17 |
+
3. pip install -r requirements.txt
|
| 18 |
+
4. uvicorn app.main:app --reload --port 8000
|
| 19 |
+
|
| 20 |
+
Notes:
|
| 21 |
+
|
| 22 |
+
- The in-memory stores (seen_reports, seen_image_hashes, seen_locations) are ephemeral and reset on server restart.
|
| 23 |
+
- CLIP model download requires internet and may take time; if unavailable, the system uses URL keyword fallback for image labels.
|
| 24 |
+
- data/dataset.jsonl collects all incoming reports and results for later training/audit.
|
ml-backend-with-image/app/__init__.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI
|
| 2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
+
|
| 4 |
+
app = FastAPI(title="Civic ML Backend (with Image)")
|
| 5 |
+
|
| 6 |
+
app.add_middleware(
|
| 7 |
+
CORSMiddleware,
|
| 8 |
+
allow_origins=["http://localhost:3000", "http://localhost:3001", "*"],
|
| 9 |
+
allow_credentials=True,
|
| 10 |
+
allow_methods=["*"],
|
| 11 |
+
allow_headers=["*"]
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
|
ml-backend-with-image/app/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (655 Bytes). View file
|
|
|
ml-backend-with-image/app/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (173 Bytes). View file
|
|
|
ml-backend-with-image/app/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (567 Bytes). View file
|
|
|
ml-backend-with-image/app/__pycache__/__init__.cpython-314.pyc
ADDED
|
Binary file (581 Bytes). View file
|
|
|
ml-backend-with-image/app/__pycache__/dataset.cpython-311.pyc
ADDED
|
Binary file (1.13 kB). View file
|
|
|
ml-backend-with-image/app/__pycache__/dataset.cpython-312.pyc
ADDED
|
Binary file (935 Bytes). View file
|
|
|
ml-backend-with-image/app/__pycache__/dataset.cpython-313.pyc
ADDED
|
Binary file (916 Bytes). View file
|
|
|
ml-backend-with-image/app/__pycache__/dataset.cpython-314.pyc
ADDED
|
Binary file (1.28 kB). View file
|
|
|
ml-backend-with-image/app/__pycache__/image_classifier.cpython-311.pyc
ADDED
|
Binary file (3.87 kB). View file
|
|
|
ml-backend-with-image/app/__pycache__/image_classifier.cpython-312.pyc
ADDED
|
Binary file (2.28 kB). View file
|
|
|
ml-backend-with-image/app/__pycache__/image_classifier.cpython-313.pyc
ADDED
|
Binary file (3.58 kB). View file
|
|
|
ml-backend-with-image/app/__pycache__/image_classifier.cpython-314.pyc
ADDED
|
Binary file (5.36 kB). View file
|
|
|
ml-backend-with-image/app/__pycache__/main.cpython-311.pyc
ADDED
|
Binary file (1.53 kB). View file
|
|
|
ml-backend-with-image/app/__pycache__/main.cpython-312.pyc
ADDED
|
Binary file (975 Bytes). View file
|
|
|
ml-backend-with-image/app/__pycache__/main.cpython-313.pyc
ADDED
|
Binary file (1.3 kB). View file
|
|
|
ml-backend-with-image/app/__pycache__/main.cpython-314.pyc
ADDED
|
Binary file (1.46 kB). View file
|
|
|
ml-backend-with-image/app/__pycache__/models.cpython-311.pyc
ADDED
|
Binary file (1.48 kB). View file
|
|
|
ml-backend-with-image/app/__pycache__/models.cpython-312.pyc
ADDED
|
Binary file (1.1 kB). View file
|
|
|
ml-backend-with-image/app/__pycache__/models.cpython-313.pyc
ADDED
|
Binary file (1.29 kB). View file
|
|
|
ml-backend-with-image/app/__pycache__/models.cpython-314.pyc
ADDED
|
Binary file (1.89 kB). View file
|
|
|
ml-backend-with-image/app/__pycache__/pipeline.cpython-311.pyc
ADDED
|
Binary file (10.2 kB). View file
|
|
|
ml-backend-with-image/app/__pycache__/pipeline.cpython-312.pyc
ADDED
|
Binary file (2.53 kB). View file
|
|
|
ml-backend-with-image/app/__pycache__/pipeline.cpython-313.pyc
ADDED
|
Binary file (9.26 kB). View file
|
|
|
ml-backend-with-image/app/__pycache__/pipeline.cpython-314.pyc
ADDED
|
Binary file (12.9 kB). View file
|
|
|
ml-backend-with-image/app/__pycache__/storage.cpython-311.pyc
ADDED
|
Binary file (4.16 kB). View file
|
|
|
ml-backend-with-image/app/__pycache__/storage.cpython-312.pyc
ADDED
|
Binary file (692 Bytes). View file
|
|
|
ml-backend-with-image/app/__pycache__/storage.cpython-313.pyc
ADDED
|
Binary file (3.65 kB). View file
|
|
|
ml-backend-with-image/app/__pycache__/storage.cpython-314.pyc
ADDED
|
Binary file (4.29 kB). View file
|
|
|
ml-backend-with-image/app/dataset.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
|
| 4 |
+
# Always resolve the dataset path relative to this file so that it works
|
| 5 |
+
# no matter where the application is started from (repo root, service dir, etc.)
|
| 6 |
+
BASE_DIR = Path(__file__).resolve().parent.parent # points to ml-backend-with-image/
|
| 7 |
+
DATA_FILE = BASE_DIR / "data" / "dataset.jsonl"
|
| 8 |
+
DATA_FILE.parent.mkdir(parents=True, exist_ok=True)
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def save_report(report_dict: dict):
|
| 12 |
+
"""Append raw report to dataset.jsonl (build dataset dynamically)."""
|
| 13 |
+
with DATA_FILE.open("a", encoding="utf8") as f:
|
| 14 |
+
f.write(json.dumps(report_dict, ensure_ascii=False) + "\n")
|
ml-backend-with-image/app/image_classifier.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Lightweight CLIP-based image classifier with safe fallbacks.
|
| 2 |
+
from PIL import Image
|
| 3 |
+
import requests
|
| 4 |
+
import io
|
| 5 |
+
import threading
|
| 6 |
+
|
| 7 |
+
_clip_lock = threading.Lock()
|
| 8 |
+
_clip_model = None
|
| 9 |
+
_clip_processor = None
|
| 10 |
+
_available = False
|
| 11 |
+
|
| 12 |
+
def initialize_clip():
|
| 13 |
+
global _clip_model, _clip_processor, _available
|
| 14 |
+
try:
|
| 15 |
+
from transformers import CLIPProcessor, CLIPModel
|
| 16 |
+
_clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
|
| 17 |
+
_clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
|
| 18 |
+
_available = True
|
| 19 |
+
except Exception as e:
|
| 20 |
+
# Failed to load CLIP (no internet or packages). Continue with fallback.
|
| 21 |
+
_available = False
|
| 22 |
+
|
| 23 |
+
def classify_image(image_url: str, candidate_labels=None) -> str:
|
| 24 |
+
"""Return best matching label from candidate_labels or 'other' on failure."""
|
| 25 |
+
if candidate_labels is None:
|
| 26 |
+
candidate_labels = [
|
| 27 |
+
# Roads & Transport
|
| 28 |
+
"pothole", "damaged road", "illegal parking", "broken footpath",
|
| 29 |
+
"traffic signal not working", "road accident", "road", "street", "traffic",
|
| 30 |
+
"speed breaker", "crosswalk", "footpath", "pavement",
|
| 31 |
+
|
| 32 |
+
# Sanitation & Waste
|
| 33 |
+
"garbage dump", "overflowing dustbin", "open drain", "sewage overflow",
|
| 34 |
+
"dead animal", "toilet issue", "garbage", "trash", "waste", "bin",
|
| 35 |
+
"sanitation", "dirty", "sewage", "cleanliness", "dustbin",
|
| 36 |
+
|
| 37 |
+
# Electricity & Lighting
|
| 38 |
+
"streetlight not working", "fallen electric pole", "loose wire", "power outage",
|
| 39 |
+
"streetlight", "lamp", "bulb", "pole", "light", "electric pole",
|
| 40 |
+
"street lamp", "lighting", "dark area", "electricity", "power",
|
| 41 |
+
"broken streetlight", "non-working light", "flickering light", "dim light",
|
| 42 |
+
"street lighting", "outdoor lighting", "public lighting", "night lighting",
|
| 43 |
+
|
| 44 |
+
# Water Supply & Flood
|
| 45 |
+
"waterlogging", "pipe burst", "no water supply", "drainage issue", "flood",
|
| 46 |
+
"drain", "drainage", "sewage", "sewer", "leak", "leaking", "leakage",
|
| 47 |
+
"pipe", "water", "overflow", "water supply", "drainage system",
|
| 48 |
+
|
| 49 |
+
# Environment & Public Spaces
|
| 50 |
+
"tree fallen", "illegal construction", "park maintenance", "encroachment",
|
| 51 |
+
"park", "garden", "playground", "tree", "bench", "grass", "lawn",
|
| 52 |
+
"recreation", "green space", "park area", "garden area", "flooded park",
|
| 53 |
+
"water in park", "park with water", "playground equipment", "walking path",
|
| 54 |
+
"fountain", "pond", "lake", "outdoor space", "public space",
|
| 55 |
+
|
| 56 |
+
# Safety & Emergency
|
| 57 |
+
"fire", "gas leak", "building collapse", "accident site",
|
| 58 |
+
"crime", "robbery", "theft", "violence", "hazard", "danger",
|
| 59 |
+
"safety", "harassment", "emergency", "accident",
|
| 60 |
+
|
| 61 |
+
# Noise & Pollution
|
| 62 |
+
"noise pollution", "air pollution", "industrial waste",
|
| 63 |
+
|
| 64 |
+
# General
|
| 65 |
+
"other"
|
| 66 |
+
]
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
if not image_url:
|
| 70 |
+
return "other"
|
| 71 |
+
|
| 72 |
+
# If CLIP not available, use heuristic keywords from URL
|
| 73 |
+
if not _available:
|
| 74 |
+
url = image_url.lower()
|
| 75 |
+
for lbl in candidate_labels:
|
| 76 |
+
if lbl in url:
|
| 77 |
+
return lbl
|
| 78 |
+
return "other"
|
| 79 |
+
|
| 80 |
+
try:
|
| 81 |
+
resp = requests.get(image_url, timeout=5)
|
| 82 |
+
resp.raise_for_status()
|
| 83 |
+
image = Image.open(io.BytesIO(resp.content)).convert("RGB")
|
| 84 |
+
inputs = _clip_processor(text=candidate_labels, images=image, return_tensors="pt", padding=True)
|
| 85 |
+
outputs = _clip_model(**inputs)
|
| 86 |
+
logits_per_image = outputs.logits_per_image # shape (1, num_labels)
|
| 87 |
+
probs = logits_per_image.softmax(dim=1)
|
| 88 |
+
best = int(probs.argmax().item())
|
| 89 |
+
return candidate_labels[best]
|
| 90 |
+
except Exception:
|
| 91 |
+
return "other"
|
ml-backend-with-image/app/main.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI
|
| 2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
+
from app.pipeline import classify_report
|
| 4 |
+
|
| 5 |
+
app = FastAPI(title="Civic ML Backend API")
|
| 6 |
+
|
| 7 |
+
app.add_middleware(
|
| 8 |
+
CORSMiddleware,
|
| 9 |
+
allow_origins=["*"],
|
| 10 |
+
allow_credentials=True,
|
| 11 |
+
allow_methods=["*"],
|
| 12 |
+
allow_headers=["*"],
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
@app.get("/")
|
| 16 |
+
def health():
|
| 17 |
+
return {"status": "ML API running"}
|
| 18 |
+
|
| 19 |
+
@app.post("/submit")
|
| 20 |
+
def submit_report(data: dict):
|
| 21 |
+
return classify_report(data)
|
ml-backend-with-image/app/models.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel
|
| 2 |
+
from typing import Optional
|
| 3 |
+
|
| 4 |
+
class ReportIn(BaseModel):
|
| 5 |
+
report_id: str
|
| 6 |
+
description: str
|
| 7 |
+
category: str
|
| 8 |
+
user_id: Optional[str] = None
|
| 9 |
+
image_url: Optional[str] = None
|
| 10 |
+
latitude: Optional[float] = None
|
| 11 |
+
longitude: Optional[float] = None
|
| 12 |
+
|
| 13 |
+
class ReportStatus(BaseModel):
|
| 14 |
+
report_id: str
|
| 15 |
+
status: str
|
| 16 |
+
reason: Optional[str] = None
|
| 17 |
+
priority: Optional[str] = None
|
| 18 |
+
category: Optional[str] = None
|
| 19 |
+
text_category: Optional[str] = None
|
| 20 |
+
image_category: Optional[str] = None
|
ml-backend-with-image/app/pipeline.py
ADDED
|
@@ -0,0 +1,282 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app import storage, dataset
|
| 2 |
+
from app import image_classifier as ic
|
| 3 |
+
import re
|
| 4 |
+
|
| 5 |
+
# Profanity detection
|
| 6 |
+
import warnings
|
| 7 |
+
# Suppress pkg_resources deprecation warnings globally (they're just warnings, not errors)
|
| 8 |
+
warnings.filterwarnings("ignore", category=UserWarning, message=".*pkg_resources.*")
|
| 9 |
+
|
| 10 |
+
PROFANITY_AVAILABLE = False
|
| 11 |
+
_profanity_predict = None
|
| 12 |
+
try:
|
| 13 |
+
from profanity_check import predict as _profanity_predict
|
| 14 |
+
# Test that it works with a known profane word
|
| 15 |
+
_test_result = _profanity_predict(["fuck"])
|
| 16 |
+
# Check if result is valid (should be numpy array or list with 1)
|
| 17 |
+
if _test_result is not None:
|
| 18 |
+
PROFANITY_AVAILABLE = True
|
| 19 |
+
except ImportError:
|
| 20 |
+
# Library truly not installed
|
| 21 |
+
pass # Silently fall back to keyword-based detection
|
| 22 |
+
except Exception as e:
|
| 23 |
+
# Any other import/runtime error - silently fall back
|
| 24 |
+
PROFANITY_AVAILABLE = False
|
| 25 |
+
_profanity_predict = None
|
| 26 |
+
|
| 27 |
+
# Category-scoped urgent keywords (same methodology as CATEGORY_KEYWORDS)
|
| 28 |
+
# Keep a Global bucket to preserve previous behavior while enabling per-category tuning
|
| 29 |
+
URGENT_KEYWORDS = {
|
| 30 |
+
"Global": [
|
| 31 |
+
# Road & Transport
|
| 32 |
+
"accident", "collision", "roadblock", "traffic jam", "hit and run",
|
| 33 |
+
"bridge collapse", "pothole accident", "road caved in",
|
| 34 |
+
|
| 35 |
+
# Fire & Hazard
|
| 36 |
+
"fire", "burning", "smoke", "blast", "explosion", "short circuit",
|
| 37 |
+
"gas leak", "electric spark", "transformer burst",
|
| 38 |
+
|
| 39 |
+
# Water & Flood
|
| 40 |
+
"flood", "waterlogging", "sewage overflow", "pipe burst",
|
| 41 |
+
"drain blocked", "heavy rain", "overflowing drain", "contaminated water",
|
| 42 |
+
|
| 43 |
+
# Health & Sanitation
|
| 44 |
+
"garbage overflow", "dead animal", "toxic smell", "mosquito breeding",
|
| 45 |
+
"epidemic", "dengue outbreak", "cholera", "sanitation hazard",
|
| 46 |
+
|
| 47 |
+
# Safety & Security
|
| 48 |
+
"violence", "crime", "theft", "fight", "robbery", "public hazard",
|
| 49 |
+
"building collapse", "wall collapse", "tree fallen", "landslide",
|
| 50 |
+
|
| 51 |
+
# Other Civic Emergencies
|
| 52 |
+
"power outage", "electric shock", "streetlight sparks",
|
| 53 |
+
"ambulance needed", "emergency help"
|
| 54 |
+
],
|
| 55 |
+
# Per-category overrides/extensions (can be expanded as needed)
|
| 56 |
+
"Road & Traffic": ["accident", "collision", "roadblock", "pothole"],
|
| 57 |
+
"Water & Drainage": ["flood", "waterlogging", "sewage overflow", "pipe burst"],
|
| 58 |
+
"Electricity": ["power outage", "short circuit", "electric shock"],
|
| 59 |
+
"Garbage & Sanitation": ["garbage overflow", "toxic smell"],
|
| 60 |
+
"Street Lighting": ["streetlight sparks", "dark area"],
|
| 61 |
+
"Public Safety": ["violence", "robbery", "fire"],
|
| 62 |
+
"Parks & Recreation": ["tree fallen"],
|
| 63 |
+
"Other": []
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
# optional model availability flags are handled inside image_classifier
|
| 68 |
+
|
| 69 |
+
def initialize_models():
|
| 70 |
+
# initialize image classifier (CLIP) if available
|
| 71 |
+
try:
|
| 72 |
+
ic.initialize_clip()
|
| 73 |
+
except Exception:
|
| 74 |
+
pass
|
| 75 |
+
|
| 76 |
+
def classify_report(report: dict):
|
| 77 |
+
desc = (report.get("description") or "").strip()
|
| 78 |
+
desc_lower = desc.lower()
|
| 79 |
+
cat = (report.get("category") or "").strip()
|
| 80 |
+
|
| 81 |
+
# 1. Reject if abusive (using profanity-check library + keyword matching)
|
| 82 |
+
# Comprehensive keyword list for abusive language detection
|
| 83 |
+
# This is the PRIMARY detection method - ML model is secondary
|
| 84 |
+
fallback_bad_words = {
|
| 85 |
+
"fuck", "fucking", "fucked", "fucker", "fuckin", "fucks",
|
| 86 |
+
"shit", "shitting", "shitted", "shitty", "shits",
|
| 87 |
+
"bitch", "bitches", "bitching", "bitched",
|
| 88 |
+
"bastard", "bastards",
|
| 89 |
+
"asshole", "ass", "asses", "assholes",
|
| 90 |
+
"dick", "dicks", "dickhead",
|
| 91 |
+
"cunt", "cunts",
|
| 92 |
+
"prick", "pricks",
|
| 93 |
+
"slut", "sluts","bloody",
|
| 94 |
+
"whore", "whores","rascal",
|
| 95 |
+
"damn", "damned", "damnit", "dammit",
|
| 96 |
+
"hell", "hells",
|
| 97 |
+
"crap", "crappy",
|
| 98 |
+
"piss", "pissed", "pissing",
|
| 99 |
+
"idiot", "idiots", "idiotic",
|
| 100 |
+
"stupid", "stupidity", "stupidly",
|
| 101 |
+
"moron", "morons",
|
| 102 |
+
"retard", "retarded",
|
| 103 |
+
"gay", "gays", # Context-dependent, but often used abusively
|
| 104 |
+
"hate", "hateful", "hating"
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
# ALWAYS check keywords first (most reliable)
|
| 108 |
+
# Check if any profane word appears in the description (case-insensitive substring match)
|
| 109 |
+
is_profane_keywords = any(word in desc_lower for word in fallback_bad_words)
|
| 110 |
+
|
| 111 |
+
# Also try ML model if available
|
| 112 |
+
is_profane_ml = False
|
| 113 |
+
if PROFANITY_AVAILABLE and _profanity_predict is not None:
|
| 114 |
+
try:
|
| 115 |
+
# profanity_check.predict returns a numpy array with 1 for profane, 0 for clean
|
| 116 |
+
predictions = _profanity_predict([desc])
|
| 117 |
+
# Handle numpy array, list, or scalar returns
|
| 118 |
+
if hasattr(predictions, '__getitem__') and len(predictions) > 0:
|
| 119 |
+
is_profane_ml = bool(int(predictions[0]) == 1)
|
| 120 |
+
elif hasattr(predictions, 'item'):
|
| 121 |
+
is_profane_ml = bool(int(predictions.item()) == 1)
|
| 122 |
+
else:
|
| 123 |
+
is_profane_ml = bool(int(predictions) == 1)
|
| 124 |
+
except Exception:
|
| 125 |
+
# If profanity_check fails at runtime, just use keyword check
|
| 126 |
+
pass
|
| 127 |
+
|
| 128 |
+
# Reject if EITHER detection method finds profanity
|
| 129 |
+
is_profane = is_profane_keywords or is_profane_ml
|
| 130 |
+
|
| 131 |
+
if is_profane:
|
| 132 |
+
result = {
|
| 133 |
+
"report_id": report["report_id"],
|
| 134 |
+
"status": "rejected",
|
| 135 |
+
"reason": "Abusive language detected"
|
| 136 |
+
}
|
| 137 |
+
dataset.save_report({**report, **result})
|
| 138 |
+
return result
|
| 139 |
+
|
| 140 |
+
# 2. Category consistency validation (description/image must relate to selected category)
|
| 141 |
+
CATEGORY_KEYWORDS = {
|
| 142 |
+
"Road & Traffic": [
|
| 143 |
+
"pothole", "road", "street", "traffic", "signal", "accident", "jam", "footpath", "speed breaker", "crosswalk", "pavement", "highway", "bridge", "intersection"
|
| 144 |
+
],
|
| 145 |
+
"Water & Drainage": [
|
| 146 |
+
"drain", "drainage", "sewage", "sewer", "leak", "leaking", "leakage", "waterlogging", "pipe", "flood", "overflow", "water", "supply", "system", "blocked", "clogged"
|
| 147 |
+
],
|
| 148 |
+
"Electricity": [
|
| 149 |
+
"electric", "electricity", "power", "outage", "wire", "transformer", "short circuit", "shock", "cable", "meter", "electrical", "voltage", "current"
|
| 150 |
+
],
|
| 151 |
+
"Garbage & Sanitation": [
|
| 152 |
+
"garbage", "trash", "waste", "bin", "dump", "sanitation", "dirty", "sewage", "overflow", "cleanliness", "collection", "disposal", "rubbish", "litter"
|
| 153 |
+
],
|
| 154 |
+
"Street Lighting": [
|
| 155 |
+
"street light", "streetlight", "lamp", "bulb", "pole", "light not working", "dark", "lighting", "illumination", "street lamp", "electric pole", "night light", "dim", "flickering"
|
| 156 |
+
],
|
| 157 |
+
"Public Safety": [
|
| 158 |
+
"crime", "robbery", "theft", "violence", "fire", "hazard", "danger", "safety", "harassment", "emergency", "accident", "security", "threat", "risk"
|
| 159 |
+
],
|
| 160 |
+
"Parks & Recreation": [
|
| 161 |
+
"park", "garden", "playground", "tree", "bench", "grass", "lawn", "recreation", "green space", "playground equipment", "walking path", "fountain", "pond", "lake", "water", "flooded", "maintenance", "amenities"
|
| 162 |
+
],
|
| 163 |
+
"Other": []
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
# Image labels allowed per category (same methodology: category -> labels)
|
| 167 |
+
IMAGE_LABELS = {
|
| 168 |
+
"Road & Traffic": ["pothole", "road", "traffic", "street", "footpath", "pavement", "speed breaker", "crosswalk", "damaged road", "illegal parking", "broken footpath", "traffic signal not working", "road accident"],
|
| 169 |
+
"Garbage & Sanitation": ["garbage", "trash", "dump", "waste", "bin", "sanitation", "dirty", "sewage", "cleanliness", "dustbin", "garbage dump", "overflowing dustbin", "open drain", "sewage overflow", "dead animal", "toilet issue"],
|
| 170 |
+
"Street Lighting": ["streetlight", "lamp", "bulb", "pole", "light", "electric pole", "street lamp", "lighting", "dark area", "electricity", "power", "streetlight not working", "fallen electric pole", "loose wire", "power outage"],
|
| 171 |
+
"Water & Drainage": ["drain", "drainage", "sewage", "sewer", "leak", "leaking", "leakage", "pipe", "water", "overflow", "water supply", "drainage system", "waterlogging", "pipe burst", "no water supply", "drainage issue", "flood"],
|
| 172 |
+
"Electricity": ["transformer", "wire", "meter", "electricity", "power", "electric", "cable", "short circuit", "shock"],
|
| 173 |
+
"Public Safety": ["fire", "hazard", "danger", "safety", "emergency", "accident", "crime", "robbery", "theft", "violence", "harassment", "gas leak", "building collapse", "accident site"],
|
| 174 |
+
"Parks & Recreation": ["park", "garden", "playground", "tree", "bench", "grass", "lawn", "recreation", "green space", "park area", "garden area", "tree fallen", "illegal construction", "park maintenance", "encroachment"],
|
| 175 |
+
"Other": ["other"]
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
allowed_keywords = CATEGORY_KEYWORDS.get(cat, [])
|
| 179 |
+
desc_match = True if cat == "Other" else any(k in desc_lower for k in allowed_keywords)
|
| 180 |
+
|
| 181 |
+
image_url = report.get("image_url")
|
| 182 |
+
image_cat = None
|
| 183 |
+
if image_url:
|
| 184 |
+
image_cat = ic.classify_image(image_url)
|
| 185 |
+
labels_for_cat = [s.lower() for s in IMAGE_LABELS.get(cat, [])]
|
| 186 |
+
image_label = str(image_cat).lower() if image_cat is not None else "other"
|
| 187 |
+
|
| 188 |
+
# More flexible matching - check if any part of the image label matches
|
| 189 |
+
image_match = (cat == "Other") or any(label in image_label or image_label in label for label in labels_for_cat)
|
| 190 |
+
|
| 191 |
+
# Special cases for better matching
|
| 192 |
+
if cat == "Parks & Recreation":
|
| 193 |
+
# Accept images that contain park-related terms or water-related terms (for flooded parks)
|
| 194 |
+
park_related = ["park", "garden", "playground", "tree", "bench", "grass", "lawn", "recreation", "green", "water", "flood", "pond", "lake"]
|
| 195 |
+
image_match = any(term in image_label for term in park_related)
|
| 196 |
+
elif cat == "Street Lighting":
|
| 197 |
+
# Accept images that contain lighting-related terms
|
| 198 |
+
lighting_related = ["streetlight", "lamp", "bulb", "pole", "light", "electric", "power", "dark", "illumination", "street", "night"]
|
| 199 |
+
image_match = any(term in image_label for term in lighting_related)
|
| 200 |
+
else:
|
| 201 |
+
image_match = False
|
| 202 |
+
|
| 203 |
+
has_image = bool(image_url)
|
| 204 |
+
# If an image is provided and does NOT relate to the category, reject early with a clear reason
|
| 205 |
+
if has_image and not image_match:
|
| 206 |
+
result = {
|
| 207 |
+
"report_id": report["report_id"],
|
| 208 |
+
"status": "rejected",
|
| 209 |
+
"reason": "image not related to category"
|
| 210 |
+
}
|
| 211 |
+
dataset.save_report({**report, **result})
|
| 212 |
+
return result
|
| 213 |
+
|
| 214 |
+
valid_for_category = (desc_match or image_match) if has_image else desc_match
|
| 215 |
+
|
| 216 |
+
if not valid_for_category:
|
| 217 |
+
reason_parts = []
|
| 218 |
+
if not desc_match:
|
| 219 |
+
reason_parts.append("description not related to category")
|
| 220 |
+
reason = ", ".join(reason_parts) or "not related to category"
|
| 221 |
+
result = {
|
| 222 |
+
"report_id": report["report_id"],
|
| 223 |
+
"status": "rejected",
|
| 224 |
+
"reason": reason
|
| 225 |
+
}
|
| 226 |
+
dataset.save_report({**report, **result})
|
| 227 |
+
return result
|
| 228 |
+
|
| 229 |
+
# 3. Reject if duplicate (spam)
|
| 230 |
+
if storage.is_duplicate(report.get("user_id"), desc, cat):
|
| 231 |
+
result = {
|
| 232 |
+
"report_id": report["report_id"],
|
| 233 |
+
"status": "rejected",
|
| 234 |
+
"reason": "Duplicate spam"
|
| 235 |
+
}
|
| 236 |
+
dataset.save_report({**report, **result})
|
| 237 |
+
return result
|
| 238 |
+
# 3b. Reject image duplicate using pHash
|
| 239 |
+
if image_url and storage.is_duplicate_image(image_url):
|
| 240 |
+
result = {
|
| 241 |
+
"report_id": report["report_id"],
|
| 242 |
+
"status": "rejected",
|
| 243 |
+
"reason": "Duplicate spam (image)"
|
| 244 |
+
}
|
| 245 |
+
dataset.save_report({**report, **result})
|
| 246 |
+
return result
|
| 247 |
+
|
| 248 |
+
# 4. Reject location duplicate (same text+category within threshold meters)
|
| 249 |
+
lat = report.get("latitude")
|
| 250 |
+
lon = report.get("longitude")
|
| 251 |
+
if lat is not None and lon is not None:
|
| 252 |
+
if storage.is_duplicate_location(lat, lon, desc, cat):
|
| 253 |
+
result = {
|
| 254 |
+
"report_id": report["report_id"],
|
| 255 |
+
"status": "rejected",
|
| 256 |
+
"reason": "Duplicate spam (location)"
|
| 257 |
+
}
|
| 258 |
+
dataset.save_report({**report, **result})
|
| 259 |
+
return result
|
| 260 |
+
|
| 261 |
+
# 5. Priority (urgent vs normal) using keywords (category-scoped + global)
|
| 262 |
+
urgent_words = [
|
| 263 |
+
*(URGENT_KEYWORDS.get(cat, [])),
|
| 264 |
+
*(URGENT_KEYWORDS.get("Global", []))
|
| 265 |
+
]
|
| 266 |
+
is_urgent = any(word in desc_lower for word in urgent_words)
|
| 267 |
+
priority = "urgent" if is_urgent else "normal"
|
| 268 |
+
|
| 269 |
+
# 6. Image classification (if image provided and not already classified)
|
| 270 |
+
if image_url and image_cat is None:
|
| 271 |
+
image_cat = ic.classify_image(image_url)
|
| 272 |
+
|
| 273 |
+
result = {
|
| 274 |
+
"report_id": report["report_id"],
|
| 275 |
+
"status": "accepted",
|
| 276 |
+
"priority": priority,
|
| 277 |
+
"category": cat,
|
| 278 |
+
"text_category": cat,
|
| 279 |
+
"image_category": image_cat
|
| 280 |
+
}
|
| 281 |
+
dataset.save_report({**report, **result})
|
| 282 |
+
return result
|
ml-backend-with-image/app/storage.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from PIL import Image
|
| 2 |
+
import imagehash
|
| 3 |
+
import requests
|
| 4 |
+
import io
|
| 5 |
+
from math import radians, cos, sin, asin, sqrt
|
| 6 |
+
|
| 7 |
+
# In-memory stores (ephemeral - reset on server restart)
|
| 8 |
+
seen_reports = set()
|
| 9 |
+
seen_image_hashes = [] # store imagehash objects
|
| 10 |
+
seen_locations = [] # list of tuples (lat, lon, description, category)
|
| 11 |
+
|
| 12 |
+
def is_duplicate(user_id: str, description: str, category: str) -> bool:
|
| 13 |
+
key = (user_id or "anon", description.strip().lower(), category.lower())
|
| 14 |
+
if key in seen_reports:
|
| 15 |
+
return True
|
| 16 |
+
seen_reports.add(key)
|
| 17 |
+
return False
|
| 18 |
+
|
| 19 |
+
def is_duplicate_image(image_url: str, threshold: int = 5) -> bool:
|
| 20 |
+
"""Check if an image is a duplicate using perceptual hash (pHash).
|
| 21 |
+
threshold = maximum Hamming distance allowed to consider images equal.
|
| 22 |
+
Returns True if duplicate found, else stores the hash and returns False.
|
| 23 |
+
"""
|
| 24 |
+
try:
|
| 25 |
+
resp = requests.get(image_url, timeout=5)
|
| 26 |
+
resp.raise_for_status()
|
| 27 |
+
img = Image.open(io.BytesIO(resp.content)).convert('RGB')
|
| 28 |
+
img_hash = imagehash.phash(img)
|
| 29 |
+
|
| 30 |
+
for h in seen_image_hashes:
|
| 31 |
+
# imagehash library supports subtraction to get Hamming distance
|
| 32 |
+
if abs(img_hash - h) <= threshold:
|
| 33 |
+
return True
|
| 34 |
+
|
| 35 |
+
# store hash
|
| 36 |
+
seen_image_hashes.append(img_hash)
|
| 37 |
+
return False
|
| 38 |
+
except Exception:
|
| 39 |
+
# On any failure to fetch/process image, treat as non-duplicate
|
| 40 |
+
return False
|
| 41 |
+
|
| 42 |
+
def haversine(lat1, lon1, lat2, lon2):
|
| 43 |
+
"""Calculate great-circle distance between two lat/lon points in meters."""
|
| 44 |
+
lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
|
| 45 |
+
delta_lat = lat2 - lat1
|
| 46 |
+
delta_lon = lon2 - lon1
|
| 47 |
+
a = sin(delta_lat/2)**2 + cos(lat1) * cos(lat2) * sin(delta_lon/2)**2
|
| 48 |
+
c = 2 * asin(sqrt(a))
|
| 49 |
+
r = 6371000 # Earth radius in meters
|
| 50 |
+
return c * r
|
| 51 |
+
|
| 52 |
+
def is_duplicate_location(lat: float, lon: float, description: str, category: str, threshold: int = 20) -> bool:
|
| 53 |
+
"""Return True if an existing report with same text+category exists within threshold meters."""
|
| 54 |
+
try:
|
| 55 |
+
for (lat0, lon0, desc0, cat0) in seen_locations:
|
| 56 |
+
if desc0.strip().lower() == description.strip().lower() and cat0.lower() == category.lower():
|
| 57 |
+
dist = haversine(lat, lon, lat0, lon0)
|
| 58 |
+
if dist <= threshold:
|
| 59 |
+
return True
|
| 60 |
+
# not duplicate β store this location for future checks
|
| 61 |
+
seen_locations.append((lat, lon, description, category))
|
| 62 |
+
return False
|
| 63 |
+
except Exception:
|
| 64 |
+
return False
|
ml-backend-with-image/data/dataset.jsonl
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"report_id": "r1", "description": "Huge pothole urgent fix", "category": "Road", "user_id": "u1", "image_url": "https://ultralytics.com/images/bus.jpg", "status": "accepted", "priority": "urgent", "text_category": "Road", "image_category": "other"}
|
| 2 |
+
{"report_id": "r1", "description": "Huge pothole urgent fix", "category": "Road", "user_id": "u1", "image_url": "https://ultralytics.com/images/bus.jpg", "status": "rejected", "reason": "Duplicate spam"}
|
| 3 |
+
{"report_id": "r2", "description": "This road is nonsense idiot", "category": "Road", "user_id": "u2", "image_url": null, "status": "rejected", "reason": "Abusive language"}
|
| 4 |
+
{"report_id": "r10", "description": "Streetlight not working near school", "category": "Streetlight", "user_id": "u15", "image_url": "https://example.com/streetlight.jpg", "status": "accepted", "priority": "normal", "text_category": "Streetlight", "image_category": "other"}
|
| 5 |
+
{"report_id": "s1000", "description": "fuck off", "category": "garbage", "user_id": "u100", "image_url": "string", "status": "rejected", "reason": "Abusive language"}
|
| 6 |
+
{"report_id": "1758718240735", "description": "drainage is flowing on road", "category": "Water & Drainage", "user_id": "68d3e8e4eae029d19869bc20", "image_url": null, "status": "accepted", "priority": "normal", "text_category": "Water & Drainage", "image_category": "none"}
|
| 7 |
+
{"report_id": "1758718396632", "description": "drainage is flowing on road", "category": "Water & Drainage", "user_id": "68d3e8e4eae029d19869bc20", "image_url": null, "status": "rejected", "reason": "Duplicate spam"}
|
| 8 |
+
{"report_id": "1758718426795", "description": "drainage is flowing on fuck", "category": "Water & Drainage", "user_id": "68d3e8e4eae029d19869bc20", "image_url": null, "status": "rejected", "reason": "Abusive language"}
|
| 9 |
+
{"report_id": "1758718445366", "description": "drainage is on road", "category": "Water & Drainage", "user_id": "68d3e8e4eae029d19869bc20", "image_url": null, "status": "accepted", "priority": "normal", "text_category": "Water & Drainage", "image_category": "none"}
|
| 10 |
+
{"report_id": "1758718581585", "description": "Fire accident is going", "category": "Public Safety", "user_id": "68d3e8e4eae029d19869bc20", "image_url": null, "status": "accepted", "priority": "urgent", "text_category": "Public Safety", "image_category": "none"}
|
| 11 |
+
{"report_id": "1758726513196", "description": "drainage is flowing on road", "category": "Water & Drainage", "user_id": "68d406f931b81f9e82196b6b", "image_url": null, "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Water & Drainage", "image_category": null}
|
| 12 |
+
{"report_id": "1758726580862", "description": "drainage is flowing on road", "category": "Water & Drainage", "user_id": "68d406f931b81f9e82196b6b", "image_url": null, "latitude": null, "longitude": null, "status": "rejected", "reason": "Duplicate spam"}
|
| 13 |
+
{"report_id": "1758726597051", "description": "drainage is on road", "category": "Water & Drainage", "user_id": "68d406f931b81f9e82196b6b", "image_url": null, "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Water & Drainage", "image_category": null}
|
| 14 |
+
{"report_id": "1758727741888", "description": "drainage is nonsense", "category": "Water & Drainage", "user_id": "68d406f931b81f9e82196b6b", "image_url": null, "latitude": null, "longitude": null, "status": "rejected", "reason": "Abusive language"}
|
| 15 |
+
{"report_id": "1758727753045", "description": "drainage is fuck\n", "category": "Water & Drainage", "user_id": "68d406f931b81f9e82196b6b", "image_url": null, "latitude": null, "longitude": null, "status": "rejected", "reason": "Abusive language"}
|
| 16 |
+
{"report_id": "1758727993140", "description": "drainage is fuck\n", "category": "Water & Drainage", "user_id": "68d406f931b81f9e82196b6b", "image_url": null, "latitude": null, "longitude": null, "status": "rejected", "reason": "Abusive language"}
|
| 17 |
+
{"report_id": "1758728078194", "description": "drainage is fuck\n", "category": "Water & Drainage", "user_id": "68d406f931b81f9e82196b6b", "image_url": null, "latitude": null, "longitude": null, "status": "rejected", "reason": "Abusive language"}
|
| 18 |
+
{"report_id": "1758728333506", "description": "drainage is dick", "category": "Water & Drainage", "user_id": "68d4106f31b81f9e82196bb4", "image_url": null, "latitude": null, "longitude": null, "status": "rejected", "reason": "Abusive language"}
|
| 19 |
+
{"report_id": "1758728372941", "description": "fire is occuring", "category": "Water & Drainage", "user_id": "68d4106f31b81f9e82196bb4", "image_url": null, "latitude": null, "longitude": null, "status": "accepted", "priority": "urgent", "text_category": "Water & Drainage", "image_category": null}
|
| 20 |
+
{"report_id": "1758728777677", "description": "one home got caught fire in my location", "category": "Public Safety", "user_id": "68d4120b31b81f9e82196c67", "image_url": null, "latitude": null, "longitude": null, "status": "accepted", "priority": "urgent", "text_category": "Public Safety", "image_category": null}
|
| 21 |
+
{"report_id": "1758729656495", "description": "drainage is flowing on road", "category": "Water & Drainage", "user_id": "68d41589c72a935fab2181a5", "image_url": null, "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Water & Drainage", "image_category": null}
|
| 22 |
+
{"report_id": "1758730246763", "description": "drainage is bloody ", "category": "Water & Drainage", "user_id": "68d4172602906a425b987332", "image_url": null, "latitude": null, "longitude": null, "status": "rejected", "reason": "Abusive language"}
|
| 23 |
+
{"report_id": "1758730261736", "description": "drainage is bloody ", "category": "Water & Drainage", "user_id": "68d4172602906a425b987332", "image_url": null, "latitude": null, "longitude": null, "status": "rejected", "reason": "Abusive language"}
|
| 24 |
+
{"report_id": "1758730271464", "description": "drainage is bloody", "category": "Water & Drainage", "user_id": "68d4172602906a425b987332", "image_url": null, "latitude": null, "longitude": null, "status": "rejected", "reason": "Abusive language"}
|
| 25 |
+
{"report_id": "1758731182959", "description": "drainage is not bloody fuck", "category": "Water & Drainage", "user_id": "68d4172602906a425b987332", "image_url": null, "latitude": null, "longitude": null, "status": "rejected", "reason": "Abusive language detected"}
|
| 26 |
+
{"report_id": "1758732151611", "description": "our area is surrounded with smoke from dumping yard", "category": "Water & Drainage", "user_id": "68d4172602906a425b987332", "image_url": null, "latitude": null, "longitude": null, "status": "accepted", "priority": "urgent", "text_category": "Water & Drainage", "image_category": null}
|
| 27 |
+
{"report_id": "1758734534221", "description": "drainage is flowing on road", "category": "Water & Drainage", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758734230/civicconnect/images/c7qwy98p7iw3vzxpjj7v.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Water & Drainage", "image_category": "other"}
|
| 28 |
+
{"report_id": "1758734723324", "description": "drainage is blocking road", "category": "Water & Drainage", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758734419/civicconnect/images/fnbpd5j2x5cmhwohu6tr.jpg", "latitude": null, "longitude": null, "status": "rejected", "reason": "Duplicate spam (image)"}
|
| 29 |
+
{"report_id": "1758734793540", "description": "drainage is blocking road", "category": "Water & Drainage", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758734489/civicconnect/images/zsgbjhecpflj01eh92bg.jpg", "latitude": null, "longitude": null, "status": "rejected", "reason": "Duplicate spam"}
|
| 30 |
+
{"report_id": "1758736167137", "description": "fire caught for our home", "category": "Other", "user_id": "68d4172602906a425b987332", "image_url": null, "latitude": null, "longitude": null, "status": "accepted", "priority": "urgent", "text_category": "Other", "image_category": null}
|
| 31 |
+
{"report_id": "1758736749788", "description": "grabage are not cleaned", "category": "Garbage & Sanitation", "user_id": "68d4172602906a425b987332", "image_url": null, "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Garbage & Sanitation", "image_category": null}
|
| 32 |
+
{"report_id": "1758737927727", "description": "path hole is present in our locality", "category": "Road & Traffic", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758737623/civicconnect/images/f02evua8n5tf4hrtxggx.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Road & Traffic", "image_category": "pothole"}
|
| 33 |
+
{"report_id": "1758738038661", "description": "tash is not collected in our area", "category": "Garbage & Sanitation", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758737734/civicconnect/images/mrsaue9fisfwev06pmqn.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Garbage & Sanitation", "image_category": "garbage dump"}
|
| 34 |
+
{"report_id": "1758738196955", "description": "trash bin is full", "category": "Garbage & Sanitation", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758737893/civicconnect/images/vcjzc9rymekypiyjadse.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Garbage & Sanitation", "image_category": "overflowing dustbin"}
|
| 35 |
+
{"report_id": "1758739578535", "description": "trash bin not cleaned", "category": "Garbage & Sanitation", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758739274/civicconnect/images/b7m4scex3edkgr7hxcxm.jpg", "latitude": null, "longitude": null, "status": "rejected", "reason": "Duplicate spam (image)"}
|
| 36 |
+
{"report_id": "1758739664334", "description": "trash bin not cleaned", "category": "Garbage & Sanitation", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758739360/civicconnect/images/o2ntywlfnqrg4pohbawm.jpg", "latitude": null, "longitude": null, "status": "rejected", "reason": "Duplicate spam"}
|
| 37 |
+
{"report_id": "1758739674270", "description": "trash bin not cleaned", "category": "Garbage & Sanitation", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758739370/civicconnect/images/vatbsnytuzewywietgn7.jpg", "latitude": null, "longitude": null, "status": "rejected", "reason": "Duplicate spam"}
|
| 38 |
+
{"report_id": "1758739687838", "description": "trash bin not cleaned", "category": "Garbage & Sanitation", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758739384/civicconnect/images/yy76wr0v8nogtkm2ayfb.jpg", "latitude": null, "longitude": null, "status": "rejected", "reason": "Duplicate spam"}
|
| 39 |
+
{"report_id": "1758739698330", "description": "trash bin not cleaned", "category": "Garbage & Sanitation", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758739394/civicconnect/images/g07gsmw3hedpohoepbgr.jpg", "latitude": null, "longitude": null, "status": "rejected", "reason": "Duplicate spam"}
|
| 40 |
+
{"report_id": "1758739716008", "description": "trash bin not cleaned", "category": "Garbage & Sanitation", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758739412/civicconnect/images/azykwgknswwdkhnrkfhi.jpg", "latitude": null, "longitude": null, "status": "rejected", "reason": "Duplicate spam"}
|
| 41 |
+
{"report_id": "1758739757088", "description": "one house caught fire", "category": "Public Safety", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758739452/civicconnect/images/ljn1cptwvnm6yww5xb5x.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "urgent", "text_category": "Public Safety", "image_category": "building collapse"}
|
| 42 |
+
{"report_id": "1758784586140", "description": "trash is not being collected", "category": "Garbage & Sanitation", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758784281/civicconnect/images/qe5l02jtbcakqx9w2mzp.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Garbage & Sanitation", "image_category": "overflowing dustbin"}
|
| 43 |
+
{"report_id": "1758784836958", "description": "fire is fuck", "category": "Public Safety", "user_id": "68d4172602906a425b987332", "image_url": null, "latitude": null, "longitude": null, "status": "rejected", "reason": "Abusive language detected"}
|
| 44 |
+
{"report_id": "1758784852611", "description": "fire is bloody\n", "category": "Public Safety", "user_id": "68d4172602906a425b987332", "image_url": null, "latitude": null, "longitude": null, "status": "accepted", "priority": "urgent", "text_category": "Public Safety", "image_category": null}
|
| 45 |
+
{"report_id": "1758784856423", "description": "fire is bloody\n", "category": "Public Safety", "user_id": "68d4172602906a425b987332", "image_url": null, "latitude": null, "longitude": null, "status": "rejected", "reason": "Duplicate spam"}
|
| 46 |
+
{"report_id": "1758784864844", "description": "fire is bloody\n", "category": "Public Safety", "user_id": "68d4172602906a425b987332", "image_url": null, "latitude": null, "longitude": null, "status": "rejected", "reason": "Duplicate spam"}
|
| 47 |
+
{"report_id": "1758784871698", "description": "fire is bloody fuck\n", "category": "Public Safety", "user_id": "68d4172602906a425b987332", "image_url": null, "latitude": null, "longitude": null, "status": "rejected", "reason": "Abusive language detected"}
|
| 48 |
+
{"report_id": "1758804278432", "description": "bloody joshu fuck", "category": "Water & Drainage", "user_id": "68d4172602906a425b987332", "image_url": null, "latitude": null, "longitude": null, "status": "rejected", "reason": "Abusive language detected"}
|
| 49 |
+
{"report_id": "1758814805925", "description": "trash is not collected ", "category": "Road & Traffic", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758814501/civicconnect/images/y052qokoswbjmndgtnpz.jpg", "latitude": null, "longitude": null, "status": "rejected", "reason": "description not related to category, image not related to category"}
|
| 50 |
+
{"report_id": "1758814837430", "description": "trash is not collected ", "category": "Road & Traffic", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758814532/civicconnect/images/nbdv5o6r8odrwnbbqhnb.jpg", "latitude": null, "longitude": null, "status": "rejected", "reason": "description not related to category, image not related to category"}
|
| 51 |
+
{"report_id": "1758814863184", "description": "trash is not collected ", "category": "Garbage & Sanitation", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758814558/civicconnect/images/rodu3ukajgaztnqsajht.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Garbage & Sanitation", "image_category": "garbage dump"}
|
| 52 |
+
{"report_id": "1758814944937", "description": "fire accident is happening", "category": "Public Safety", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758814640/civicconnect/images/gpdvec5bmpf8c7r94zib.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "urgent", "text_category": "Public Safety", "image_category": "building collapse"}
|
| 53 |
+
{"report_id": "1758815805811", "description": "trash is not collected ", "category": "Garbage & Sanitation", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758815500/civicconnect/images/nbeiotde26j7lmkgyeik.jpg", "latitude": null, "longitude": null, "status": "rejected", "reason": "Duplicate spam"}
|
| 54 |
+
{"report_id": "1758815830860", "description": "trash is not collected ", "category": "Garbage & Sanitation", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758815526/civicconnect/images/ei5wycrh89dn9cofayms.jpg", "latitude": null, "longitude": null, "status": "rejected", "reason": "Duplicate spam"}
|
| 55 |
+
{"report_id": "1758815846432", "description": "trash is not collected ", "category": "Garbage & Sanitation", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758815542/civicconnect/images/ol0av71dpy905vjdcpaz.jpg", "latitude": null, "longitude": null, "status": "rejected", "reason": "Duplicate spam"}
|
| 56 |
+
{"report_id": "1758815866799", "description": "trash is not collected ", "category": "Road & Traffic", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758815562/civicconnect/images/g3pyj98aeku1tzkh4dyt.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Road & Traffic", "image_category": "pothole"}
|
| 57 |
+
{"report_id": "1758816016324", "description": "trash is not fuck", "category": "Garbage & Sanitation", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758815711/civicconnect/images/vkkhq0oeqywqyqtwhooj.jpg", "latitude": null, "longitude": null, "status": "rejected", "reason": "Abusive language detected"}
|
| 58 |
+
{"report_id": "1758816360844", "description": "trash is on the road not cleaned", "category": "Water & Drainage", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758816056/civicconnect/images/mt6f0tzntplegawnpyaf.jpg", "latitude": null, "longitude": null, "status": "rejected", "reason": "image not related to category"}
|
| 59 |
+
{"report_id": "1758820625296", "description": "trash s not collected from so many days", "category": "Garbage & Sanitation", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758820320/civicconnect/images/jp3bvfzwkv0vazsk2nmk.jpg", "latitude": null, "longitude": null, "status": "rejected", "reason": "image not related to category"}
|
| 60 |
+
{"report_id": "1758820650418", "description": "trash s not collected from so many days", "category": "Garbage & Sanitation", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758820346/civicconnect/images/ozikgqwef67duxftthln.jpg", "latitude": null, "longitude": null, "status": "rejected", "reason": "image not related to category"}
|
| 61 |
+
{"report_id": "1758820661734", "description": "trash s not collected from so many days", "category": "Garbage & Sanitation", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758820357/civicconnect/images/ayfaz9qg0y1k8p86slmu.jpg", "latitude": null, "longitude": null, "status": "rejected", "reason": "image not related to category"}
|
| 62 |
+
{"report_id": "1758820689481", "description": "trash s not collected from so many days", "category": "Other", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758820384/civicconnect/images/fzexguiuntjnyfxztdqu.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Other", "image_category": "overflowing dustbin"}
|
| 63 |
+
{"report_id": "1758889488834", "description": "drainage is leaking here", "category": "Water & Drainage", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758889489/civicconnect/images/ysn4iv41oocrsgsrcjl5.jpg", "latitude": null, "longitude": null, "status": "rejected", "reason": "image not related to category"}
|
| 64 |
+
{"report_id": "1758889505009", "description": "drainage is leaking here fuck", "category": "Water & Drainage", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758889504/civicconnect/images/o9exokxsfokixm8yzins.jpg", "latitude": null, "longitude": null, "status": "rejected", "reason": "Abusive language detected"}
|
| 65 |
+
{"report_id": "1758889531653", "description": "drainage is leaking here bloody\n", "category": "Water & Drainage", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758889531/civicconnect/images/scrniw5mhfebgjosmkef.jpg", "latitude": null, "longitude": null, "status": "rejected", "reason": "image not related to category"}
|
| 66 |
+
{"report_id": "1758889552261", "description": "drainage is leaking here bloody bitch\n", "category": "Water & Drainage", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758889552/civicconnect/images/jqipf1eny9owfidcwv0l.jpg", "latitude": null, "longitude": null, "status": "rejected", "reason": "Abusive language detected"}
|
| 67 |
+
{"report_id": "1758895738978", "description": "fire accident took place", "category": "Public Safety", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758895739/civicconnect/images/j257yh9npkfxqcbrvurd.jpg", "latitude": null, "longitude": null, "status": "rejected", "reason": "image not related to category"}
|
| 68 |
+
{"report_id": "1758896696704", "description": "path hole is bloody stupid", "category": "Road & Traffic", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758896697/civicconnect/images/fsez2ag01jubeowq1ujz.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Road & Traffic", "image_category": "pothole"}
|
| 69 |
+
{"report_id": "1758896778759", "description": "garbabge is fuck", "category": "Garbage & Sanitation", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758896778/civicconnect/images/emoevgldc12aa0ocixaf.jpg", "latitude": null, "longitude": null, "status": "rejected", "reason": "Abusive language detected"}
|
| 70 |
+
{"report_id": "1758901794580", "description": "Garbage is on road is full", "category": "Garbage & Sanitation", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758901794/civicconnect/images/ke0o4aew7ydzpcov6xpl.jpg", "latitude": null, "longitude": null, "status": "rejected", "reason": "image not related to category"}
|
| 71 |
+
{"report_id": "1758901820680", "description": "Garbage is on road is full", "category": "Other", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758901820/civicconnect/images/riob8f5c1oz3m0deumyp.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Other", "image_category": "overflowing dustbin"}
|
| 72 |
+
{"report_id": "1758907723799", "description": "strret light is not working in this location", "category": "Street Lighting", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758907724/civicconnect/images/g6d64y5sd4murycnu80o.jpg", "latitude": null, "longitude": null, "status": "rejected", "reason": "image not related to category"}
|
| 73 |
+
{"report_id": "1758907818963", "description": "strret light is not working in this location", "category": "Street Lighting", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758907819/civicconnect/images/dwefzcmauoazbshn35tv.jpg", "latitude": null, "longitude": null, "status": "rejected", "reason": "image not related to category"}
|
| 74 |
+
{"report_id": "1758907835205", "description": "strret light is not working in this location", "category": "Electricity", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758907835/civicconnect/images/u7qbgykieffgkjxwqa0w.jpg", "latitude": null, "longitude": null, "status": "rejected", "reason": "image not related to category"}
|
| 75 |
+
{"report_id": "1758907860526", "description": "strret light is not working in this location", "category": "Parks & Recreation", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758907860/civicconnect/images/bf3f8ttwuxc9enbcpqhs.jpg", "latitude": null, "longitude": null, "status": "rejected", "reason": "image not related to category"}
|
| 76 |
+
{"report_id": "1758907981434", "description": "park is filled with water in this location", "category": "Parks & Recreation", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758907981/civicconnect/images/hd8h4jx8u6r4qmilrzon.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Parks & Recreation", "image_category": "park"}
|
| 77 |
+
{"report_id": "1758908109234", "description": "park is filled with water in this location", "category": "Parks & Recreation", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758908109/civicconnect/images/kl01gjqyezvd1yhezc60.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Parks & Recreation", "image_category": "park"}
|
| 78 |
+
{"report_id": "1758907723799", "description": "strret light is not working in this location", "category": "Street Lighting", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758907724/civicconnect/images/g6d64y5sd4murycnu80o.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Street Lighting", "image_category": "streetlight"}
|
| 79 |
+
{"report_id": "1758907818963", "description": "strret light is not working in this location", "category": "Street Lighting", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758907819/civicconnect/images/dwefzcmauoazbshn35tv.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Street Lighting", "image_category": "streetlight"}
|
| 80 |
+
{"report_id": "1758907835205", "description": "strret light is not working in this location", "category": "Street Lighting", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758907835/civicconnect/images/u7qbgykieffgkjxwqa0w.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Street Lighting", "image_category": "streetlight"}
|
| 81 |
+
{"report_id": "1758907860526", "description": "strret light is not working in this location", "category": "Street Lighting", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758907860/civicconnect/images/bf3f8ttwuxc9enbcpqhs.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Street Lighting", "image_category": "streetlight"}
|
| 82 |
+
{"report_id": "train_park_001", "description": "park flooded with water after heavy rain", "category": "Parks & Recreation", "user_id": "train_user", "image_url": "https://example.com/park_flooded.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Parks & Recreation", "image_category": "park"}
|
| 83 |
+
{"report_id": "train_park_002", "description": "playground equipment damaged in park", "category": "Parks & Recreation", "user_id": "train_user", "image_url": "https://example.com/playground_damaged.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Parks & Recreation", "image_category": "park"}
|
| 84 |
+
{"report_id": "train_park_003", "description": "garden area needs maintenance", "category": "Parks & Recreation", "user_id": "train_user", "image_url": "https://example.com/garden_maintenance.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Parks & Recreation", "image_category": "park"}
|
| 85 |
+
{"report_id": "train_park_004", "description": "tree fallen in park area", "category": "Parks & Recreation", "user_id": "train_user", "image_url": "https://example.com/tree_fallen_park.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "urgent", "text_category": "Parks & Recreation", "image_category": "tree fallen"}
|
| 86 |
+
{"report_id": "train_park_005", "description": "park bench broken and needs repair", "category": "Parks & Recreation", "user_id": "train_user", "image_url": "https://example.com/broken_bench.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Parks & Recreation", "image_category": "park"}
|
| 87 |
+
{"report_id": "train_light_001", "description": "street light not working properly", "category": "Street Lighting", "user_id": "train_user", "image_url": "https://example.com/streetlight_broken.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Street Lighting", "image_category": "streetlight"}
|
| 88 |
+
{"report_id": "train_light_002", "description": "lamp post damaged and needs fixing", "category": "Street Lighting", "user_id": "train_user", "image_url": "https://example.com/lamp_post_damaged.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Street Lighting", "image_category": "streetlight"}
|
| 89 |
+
{"report_id": "train_light_003", "description": "electric pole light not functioning", "category": "Street Lighting", "user_id": "train_user", "image_url": "https://example.com/electric_pole_light.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Street Lighting", "image_category": "streetlight"}
|
| 90 |
+
{"report_id": "train_light_004", "description": "street lamp flickering and dim", "category": "Street Lighting", "user_id": "train_user", "image_url": "https://example.com/flickering_lamp.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Street Lighting", "image_category": "streetlight"}
|
| 91 |
+
{"report_id": "train_light_005", "description": "dark area due to non-working streetlight", "category": "Street Lighting", "user_id": "train_user", "image_url": "https://example.com/dark_street.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Street Lighting", "image_category": "streetlight"}
|
| 92 |
+
{"report_id": "1758908444579", "description": "street light is not working near our locality", "category": "Street Lighting", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758908445/civicconnect/images/gy5oznfklrrsh3u2wzl3.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Street Lighting", "image_category": "street lighting"}
|
| 93 |
+
{"report_id": "1758908512274", "description": "street light is not working in our locality", "category": "Street Lighting", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758908512/civicconnect/images/qum6cuhgkdsxsjtnjwhh.jpg", "latitude": null, "longitude": null, "status": "rejected", "reason": "image not related to category"}
|
| 94 |
+
{"report_id": "1758908588848", "description": "park full with water entirely", "category": "Other", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758908589/civicconnect/images/zfhfxklqxbcimvmtzvmu.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Other", "image_category": "flooded park"}
|
| 95 |
+
{"report_id": "1758911266239", "description": "streetlight is not working in our locality", "category": "Street Lighting", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758911266/civicconnect/images/hul0fmnlmbrrpytno61f.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Street Lighting", "image_category": "street lighting"}
|
| 96 |
+
{"report_id": "1758911953881", "description": "park id totally filled with the water", "category": "Parks & Recreation", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1758911954/civicconnect/images/egz3lwnf1kstbehsrrrg.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Parks & Recreation", "image_category": "flooded park"}
|
| 97 |
+
{"report_id": "1759913465036", "description": "trash is not collected fuck", "category": "Garbage & Sanitation", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1759913464/civicconnect/images/lpwi3zyb1adhqvgmknt7.jpg", "latitude": null, "longitude": null, "status": "rejected", "reason": "Abusive language detected"}
|
| 98 |
+
{"report_id": "1759913483383", "description": "trash is not collected in this location", "category": "Garbage & Sanitation", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1759913482/civicconnect/images/psb9jgo0jtyzh71b2esf.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Garbage & Sanitation", "image_category": "overflowing dustbin"}
|
| 99 |
+
{"report_id": "1765894932155", "description": "bloody rascal", "category": "Garbage & Sanitation", "user_id": "68d4172602906a425b987332", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1765894930/civicconnect/images/ffid8y15akucvy3z7ard.png", "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Garbage & Sanitation", "image_category": "dustbin"}
|
| 100 |
+
{"report_id": "1765905306799", "description": "Bloody is bloody blody", "category": "Water & Drainage", "user_id": "692029e2aeec8c1dd08cd88a", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1765905305/civicconnect/images/q5146lqbpj0qdzt7hgvi.png", "latitude": null, "longitude": null, "status": "rejected", "reason": "image not related to category"}
|
| 101 |
+
{"report_id": "1765905360447", "description": "Bloody is bloody blody", "category": "Water & Drainage", "user_id": "692029e2aeec8c1dd08cd88a", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1765905359/civicconnect/images/aqb85fum8kgzcrnbkj7e.jpg", "latitude": null, "longitude": null, "status": "rejected", "reason": "image not related to category"}
|
| 102 |
+
{"report_id": "1765905377190", "description": "Bloody is bloody blody", "category": "Garbage & Sanitation", "user_id": "692029e2aeec8c1dd08cd88a", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1765905376/civicconnect/images/h7g4zczzljwrvn59wqr3.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Garbage & Sanitation", "image_category": "bin"}
|
| 103 |
+
{"report_id": "1765905946346", "description": "Bloody rascal Bloody rascal", "category": "Garbage & Sanitation", "user_id": "692029e2aeec8c1dd08cd88a", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1765905945/civicconnect/images/pifodkreri5jil7nukrb.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "normal", "text_category": "Garbage & Sanitation", "image_category": "bin"}
|
| 104 |
+
{"report_id": "1765906207021", "description": "bloody stupid bloody stupid", "category": "Garbage & Sanitation", "user_id": "692029e2aeec8c1dd08cd88a", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1765906206/civicconnect/images/mwvyijxrgnybyxv7yiqr.jpg", "latitude": null, "longitude": null, "status": "rejected", "reason": "Abusive language detected"}
|
| 105 |
+
{"report_id": "1765906298424", "description": "fire acciedent fire acciedent", "category": "Public Safety", "user_id": "692029e2aeec8c1dd08cd88a", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1765906297/civicconnect/images/ufzollwqqjwwnha7xgfc.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "urgent", "text_category": "Public Safety", "image_category": "building collapse"}
|
| 106 |
+
{"report_id": "1765942883964", "description": "fire occured nearby in this lication", "category": "Public Safety", "user_id": "692029e2aeec8c1dd08cd88a", "image_url": "https://res.cloudinary.com/dhidj8n1c/image/upload/v1765942883/civicconnect/images/nkatgbrti1ydirokmrgt.jpg", "latitude": null, "longitude": null, "status": "accepted", "priority": "urgent", "text_category": "Public Safety", "image_category": "building collapse"}
|
ml-backend-with-image/debug_profanity.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Debug script to test profanity detection
|
| 4 |
+
"""
|
| 5 |
+
import sys
|
| 6 |
+
import os
|
| 7 |
+
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
| 8 |
+
|
| 9 |
+
# Test import
|
| 10 |
+
print("=" * 60)
|
| 11 |
+
print("Testing Profanity-Check Import")
|
| 12 |
+
print("=" * 60)
|
| 13 |
+
|
| 14 |
+
try:
|
| 15 |
+
from profanity_check import predict
|
| 16 |
+
print("β
Successfully imported profanity_check.predict")
|
| 17 |
+
|
| 18 |
+
# Test with clean text
|
| 19 |
+
print("\nTesting with clean text: 'This is a clean sentence'")
|
| 20 |
+
result_clean = predict(["This is a clean sentence"])
|
| 21 |
+
print(f"Result: {result_clean}")
|
| 22 |
+
print(f"Type: {type(result_clean)}")
|
| 23 |
+
if hasattr(result_clean, '__getitem__'):
|
| 24 |
+
print(f"Value: {result_clean[0]}")
|
| 25 |
+
print(f"Is profane: {bool(int(result_clean[0]) == 1)}")
|
| 26 |
+
|
| 27 |
+
# Test with profane text
|
| 28 |
+
print("\nTesting with profane text: 'This is a fucking pothole'")
|
| 29 |
+
result_profane = predict(["This is a fucking pothole"])
|
| 30 |
+
print(f"Result: {result_profane}")
|
| 31 |
+
print(f"Type: {type(result_profane)}")
|
| 32 |
+
if hasattr(result_profane, '__getitem__'):
|
| 33 |
+
print(f"Value: {result_profane[0]}")
|
| 34 |
+
print(f"Is profane: {bool(int(result_profane[0]) == 1)}")
|
| 35 |
+
|
| 36 |
+
# Test with more profane words
|
| 37 |
+
test_cases = [
|
| 38 |
+
"fuck",
|
| 39 |
+
"shit",
|
| 40 |
+
"bitch",
|
| 41 |
+
"damn",
|
| 42 |
+
"hell",
|
| 43 |
+
"This is a clean description of a pothole",
|
| 44 |
+
"fuck this pothole",
|
| 45 |
+
"what the hell is wrong",
|
| 46 |
+
]
|
| 47 |
+
|
| 48 |
+
print("\n" + "=" * 60)
|
| 49 |
+
print("Testing Multiple Cases")
|
| 50 |
+
print("=" * 60)
|
| 51 |
+
|
| 52 |
+
for test_text in test_cases:
|
| 53 |
+
result = predict([test_text])
|
| 54 |
+
is_profane = bool(int(result[0]) == 1) if hasattr(result, '__getitem__') else bool(int(result) == 1)
|
| 55 |
+
status = "PROFANE" if is_profane else "CLEAN"
|
| 56 |
+
print(f"{status:8} | '{test_text}'")
|
| 57 |
+
|
| 58 |
+
except ImportError as e:
|
| 59 |
+
print(f"β Failed to import profanity_check: {e}")
|
| 60 |
+
except Exception as e:
|
| 61 |
+
print(f"β Error: {e}")
|
| 62 |
+
import traceback
|
| 63 |
+
traceback.print_exc()
|
| 64 |
+
|
| 65 |
+
# Test the actual pipeline
|
| 66 |
+
print("\n" + "=" * 60)
|
| 67 |
+
print("Testing Pipeline Function")
|
| 68 |
+
print("=" * 60)
|
| 69 |
+
|
| 70 |
+
try:
|
| 71 |
+
from app.pipeline import classify_report, PROFANITY_AVAILABLE, _profanity_predict
|
| 72 |
+
|
| 73 |
+
print(f"PROFANITY_AVAILABLE: {PROFANITY_AVAILABLE}")
|
| 74 |
+
print(f"_profanity_predict is None: {_profanity_predict is None}")
|
| 75 |
+
|
| 76 |
+
# Test cases
|
| 77 |
+
test_reports = [
|
| 78 |
+
{
|
| 79 |
+
"report_id": "test1",
|
| 80 |
+
"description": "This is a fucking pothole",
|
| 81 |
+
"category": "Road & Traffic",
|
| 82 |
+
"user_id": "test"
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"report_id": "test2",
|
| 86 |
+
"description": "There is a large pothole on Main Street",
|
| 87 |
+
"category": "Road & Traffic",
|
| 88 |
+
"user_id": "test"
|
| 89 |
+
},
|
| 90 |
+
{
|
| 91 |
+
"report_id": "test3",
|
| 92 |
+
"description": "What the hell is wrong with this road",
|
| 93 |
+
"category": "Road & Traffic",
|
| 94 |
+
"user_id": "test"
|
| 95 |
+
},
|
| 96 |
+
]
|
| 97 |
+
|
| 98 |
+
for report in test_reports:
|
| 99 |
+
result = classify_report(report)
|
| 100 |
+
status = result.get("status")
|
| 101 |
+
reason = result.get("reason", "")
|
| 102 |
+
print(f"\nDescription: '{report['description']}'")
|
| 103 |
+
print(f"Status: {status}")
|
| 104 |
+
if reason:
|
| 105 |
+
print(f"Reason: {reason}")
|
| 106 |
+
if status == "rejected" and "abusive" in reason.lower():
|
| 107 |
+
print("β
Profanity detected correctly!")
|
| 108 |
+
elif status == "accepted" and any(word in report['description'].lower() for word in ["fuck", "hell"]):
|
| 109 |
+
print("β Profanity NOT detected!")
|
| 110 |
+
|
| 111 |
+
except Exception as e:
|
| 112 |
+
print(f"β Error testing pipeline: {e}")
|
| 113 |
+
import traceback
|
| 114 |
+
traceback.print_exc()
|
ml-backend-with-image/requirements.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn[standard]
|
| 3 |
+
pydantic
|
| 4 |
+
transformers
|
| 5 |
+
torch
|
| 6 |
+
pillow
|
| 7 |
+
requests
|
| 8 |
+
imagehash
|
| 9 |
+
profanity-check
|
ml-backend-with-image/test_classification.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Test script to verify the improved image classification
|
| 4 |
+
"""
|
| 5 |
+
import sys
|
| 6 |
+
import os
|
| 7 |
+
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
| 8 |
+
|
| 9 |
+
from app.pipeline import classify_report
|
| 10 |
+
|
| 11 |
+
def test_park_water_classification():
|
| 12 |
+
"""Test park filled with water classification"""
|
| 13 |
+
print("Testing park filled with water classification...")
|
| 14 |
+
|
| 15 |
+
report = {
|
| 16 |
+
"report_id": "test_park_001",
|
| 17 |
+
"description": "park is filled with water in this location",
|
| 18 |
+
"category": "Parks & Recreation",
|
| 19 |
+
"user_id": "test_user",
|
| 20 |
+
"image_url": "https://example.com/park_water.jpg"
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
result = classify_report(report)
|
| 24 |
+
print(f"Result: {result}")
|
| 25 |
+
|
| 26 |
+
if result["status"] == "accepted":
|
| 27 |
+
print("β
Park water classification PASSED")
|
| 28 |
+
else:
|
| 29 |
+
print(f"β Park water classification FAILED: {result.get('reason', 'Unknown reason')}")
|
| 30 |
+
|
| 31 |
+
return result["status"] == "accepted"
|
| 32 |
+
|
| 33 |
+
def test_streetlight_classification():
|
| 34 |
+
"""Test streetlight classification"""
|
| 35 |
+
print("\nTesting streetlight classification...")
|
| 36 |
+
|
| 37 |
+
report = {
|
| 38 |
+
"report_id": "test_light_001",
|
| 39 |
+
"description": "street light is not working in this location",
|
| 40 |
+
"category": "Street Lighting",
|
| 41 |
+
"user_id": "test_user",
|
| 42 |
+
"image_url": "https://example.com/streetlight.jpg"
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
result = classify_report(report)
|
| 46 |
+
print(f"Result: {result}")
|
| 47 |
+
|
| 48 |
+
if result["status"] == "accepted":
|
| 49 |
+
print("β
Streetlight classification PASSED")
|
| 50 |
+
else:
|
| 51 |
+
print(f"β Streetlight classification FAILED: {result.get('reason', 'Unknown reason')}")
|
| 52 |
+
|
| 53 |
+
return result["status"] == "accepted"
|
| 54 |
+
|
| 55 |
+
def test_without_image():
|
| 56 |
+
"""Test classification without image (should rely on text)"""
|
| 57 |
+
print("\nTesting without image (text-only classification)...")
|
| 58 |
+
|
| 59 |
+
report = {
|
| 60 |
+
"report_id": "test_text_001",
|
| 61 |
+
"description": "park is filled with water in this location",
|
| 62 |
+
"category": "Parks & Recreation",
|
| 63 |
+
"user_id": "test_user",
|
| 64 |
+
"image_url": None
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
result = classify_report(report)
|
| 68 |
+
print(f"Result: {result}")
|
| 69 |
+
|
| 70 |
+
if result["status"] == "accepted":
|
| 71 |
+
print("β
Text-only classification PASSED")
|
| 72 |
+
else:
|
| 73 |
+
print(f"β Text-only classification FAILED: {result.get('reason', 'Unknown reason')}")
|
| 74 |
+
|
| 75 |
+
return result["status"] == "accepted"
|
| 76 |
+
|
| 77 |
+
if __name__ == "__main__":
|
| 78 |
+
print("π§ͺ Testing improved image classification...")
|
| 79 |
+
print("=" * 50)
|
| 80 |
+
|
| 81 |
+
# Run tests
|
| 82 |
+
park_test = test_park_water_classification()
|
| 83 |
+
light_test = test_streetlight_classification()
|
| 84 |
+
text_test = test_without_image()
|
| 85 |
+
|
| 86 |
+
print("\n" + "=" * 50)
|
| 87 |
+
print("π Test Results:")
|
| 88 |
+
print(f"Park water classification: {'β
PASSED' if park_test else 'β FAILED'}")
|
| 89 |
+
print(f"Streetlight classification: {'β
PASSED' if light_test else 'β FAILED'}")
|
| 90 |
+
print(f"Text-only classification: {'β
PASSED' if text_test else 'β FAILED'}")
|
| 91 |
+
|
| 92 |
+
all_passed = park_test and light_test and text_test
|
| 93 |
+
print(f"\nOverall: {'β
ALL TESTS PASSED' if all_passed else 'β SOME TESTS FAILED'}")
|
ml-backend-with-image/test_profanity.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Test script to verify profanity detection is working correctly
|
| 4 |
+
"""
|
| 5 |
+
import sys
|
| 6 |
+
import os
|
| 7 |
+
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
| 8 |
+
|
| 9 |
+
from app.pipeline import classify_report
|
| 10 |
+
|
| 11 |
+
def test_profanity_detection():
|
| 12 |
+
"""Test profanity detection with various abusive and clean texts"""
|
| 13 |
+
print("π§ͺ Testing Profanity Detection")
|
| 14 |
+
print("=" * 60)
|
| 15 |
+
|
| 16 |
+
# Test cases: (description, should_be_rejected, test_name)
|
| 17 |
+
test_cases = [
|
| 18 |
+
# Abusive language - should be rejected
|
| 19 |
+
("This is a fucking pothole that needs fixing", True, "Explicit profanity"),
|
| 20 |
+
("What the hell is wrong with this road", True, "Mild profanity"),
|
| 21 |
+
("This shit needs to be fixed immediately", True, "Strong profanity"),
|
| 22 |
+
("You're an idiot for not fixing this", True, "Insult"),
|
| 23 |
+
("This is crap and needs attention", True, "Mild profanity"),
|
| 24 |
+
|
| 25 |
+
# Clean language - should be accepted
|
| 26 |
+
("There is a large pothole on Main Street that needs repair", False, "Clean description"),
|
| 27 |
+
("The street light is not working properly", False, "Clean technical issue"),
|
| 28 |
+
("Water is leaking from the pipe", False, "Clean water issue"),
|
| 29 |
+
("Garbage is overflowing from the bin", False, "Clean sanitation issue"),
|
| 30 |
+
("Park maintenance is needed", False, "Clean maintenance request"),
|
| 31 |
+
|
| 32 |
+
# Edge cases
|
| 33 |
+
("FUCK this pothole", True, "All caps profanity"),
|
| 34 |
+
("fuck this pothole", True, "Lowercase profanity"),
|
| 35 |
+
("F*ck this pothole", False, "Censored profanity (might pass)"),
|
| 36 |
+
("This is a damn good road", True, "Profanity in positive context"),
|
| 37 |
+
]
|
| 38 |
+
|
| 39 |
+
passed = 0
|
| 40 |
+
failed = 0
|
| 41 |
+
|
| 42 |
+
for desc, should_reject, test_name in test_cases:
|
| 43 |
+
report = {
|
| 44 |
+
"report_id": f"test_{test_name.replace(' ', '_').lower()}",
|
| 45 |
+
"description": desc,
|
| 46 |
+
"category": "Road & Traffic",
|
| 47 |
+
"user_id": "test_user",
|
| 48 |
+
"image_url": None,
|
| 49 |
+
"latitude": 12.9,
|
| 50 |
+
"longitude": 77.6
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
result = classify_report(report)
|
| 54 |
+
was_rejected = result.get("status") == "rejected"
|
| 55 |
+
reason = result.get("reason", "")
|
| 56 |
+
|
| 57 |
+
# Check if result matches expectation
|
| 58 |
+
if was_rejected == should_reject:
|
| 59 |
+
status = "β
PASS"
|
| 60 |
+
passed += 1
|
| 61 |
+
else:
|
| 62 |
+
status = "β FAIL"
|
| 63 |
+
failed += 1
|
| 64 |
+
|
| 65 |
+
print(f"\n{status} - {test_name}")
|
| 66 |
+
print(f" Description: \"{desc}\"")
|
| 67 |
+
print(f" Expected: {'REJECTED' if should_reject else 'ACCEPTED'}")
|
| 68 |
+
print(f" Got: {'REJECTED' if was_rejected else 'ACCEPTED'}")
|
| 69 |
+
if was_rejected:
|
| 70 |
+
print(f" Reason: {reason}")
|
| 71 |
+
|
| 72 |
+
print("\n" + "=" * 60)
|
| 73 |
+
print(f"π Test Results: {passed} passed, {failed} failed out of {len(test_cases)} tests")
|
| 74 |
+
|
| 75 |
+
if failed == 0:
|
| 76 |
+
print("β
All profanity detection tests PASSED!")
|
| 77 |
+
else:
|
| 78 |
+
print(f"β {failed} test(s) FAILED - profanity detection needs improvement")
|
| 79 |
+
|
| 80 |
+
return failed == 0
|
| 81 |
+
|
| 82 |
+
if __name__ == "__main__":
|
| 83 |
+
success = test_profanity_detection()
|
| 84 |
+
sys.exit(0 if success else 1)
|