Amii2410
/

api

+# -*- coding: utf-8 -*-
+"""app.ipynb
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/1Bli_bGuux1CJr22uJYxsoLSQkr5LjXvD
+"""
+import random
+import pandas as pd
+# Complaint categories with 10–12 synonym-rich templates each (no {} placeholders now)
+categories = {
+    "Garbage": [
+        "Garbage not collected",
+        "Trash piled up",
+        "Waste scattered everywhere",
+        "Debris dumped carelessly",
+        "Rubbish overflowing",
+        "Litter causing bad smell",
+        "Uncollected scrap lying around",
+        "Filth spread all over",
+        "Junk thrown carelessly",
+        "Refuse dumped openly",
+        "Garbage heap blocking the way",
+        "Dumping ground overflowing"
+    ],
+    "Water": [
+        "Water pipeline leaking",
+        "No water supply",
+        "Contaminated tap water",
+        "Low water pressure",
+        "Water tanker not arrived",
+        "Sewage water overflow",
+        "Drainage issue",
+        "Sewer blockage reported",
+        "Flooding due to heavy rain",
+        "Water logging problem",
+        "Dirty water flowing",
+        "Burst pipeline issue"
+    ],
+    "Roads": [
+        "Big pothole on the road",
+        "Damaged road surface",
+        "Cracks on the road",
+        "Uneven surface making driving difficult",
+        "Broken speed breaker",
+        "Debris blocking the road",
+        "Manhole cover missing",
+        "Broken pavement",
+        "Damaged footpath",
+        "Road erosion reported",
+        "Construction waste dumped on road",
+        "Street blocked due to cave-in"
+    ],
+    "Electricity": [
+        # General electricity
+        "Frequent power cuts",
+        "Load shedding problem",
+        "Voltage fluctuation issue",
+        "Transformer not working",
+        "Wire hanging dangerously",
+        "No electricity supply",
+        "Complete blackout",
+        "Short circuit issue reported",
+        "Electrical failure in houses",
+        "Electric spark observed",
+        # Streetlight related
+        "Streetlight not working",
+        "Streetlight bulb fused",
+        "Dark area due to broken streetlight",
+        "Streetlight flickering",
+        "Streetlight pole damaged",
+        "Entire lane dark without lights"
+    ]
+}
+# Number of complaints per category (balanced dataset)
+num_samples = 300  # per category
+data = []
+for category, templates in categories.items():
+    for _ in range(num_samples):
+        template = random.choice(templates)
+        data.append({
+            "Complaint Text": template,
+            "Category": category
+        })
+# Convert to DataFrame
+df = pd.DataFrame(data)
+# Shuffle
+df = df.sample(frac=1).reset_index(drop=True)
+# Save CSV
+df.to_csv("synthetic_civic_complaints_no_location.csv", index=False, encoding="utf-8")
+print("✅ Final synonym-rich dataset created: synthetic_civic_complaints_no_location.csv")
+display(df.head())
+import pandas as pd
+from sklearn.model_selection import train_test_split, cross_val_score, learning_curve
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, ConfusionMatrixDisplay
+import matplotlib.pyplot as plt
+import numpy as np
+# 1. Load dataset
+df = pd.read_csv("synthetic_civic_complaints_rich.csv")
+# 🔹 Make all complaint text lowercase (case-insensitive)
+df["Complaint Text"] = df["Complaint Text"].str.lower()
+# 2. Train-test split
+X = df["Complaint Text"]
+y = df["Category"]
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y, test_size=0.2, random_state=42, stratify=y
+)
+# 3. Vectorizer + classifier
+vectorizer = TfidfVectorizer(stop_words="english", max_features=5000)
+X_train_vec = vectorizer.fit_transform(X_train)
+X_test_vec = vectorizer.transform(X_test)
+clf = LogisticRegression(max_iter=500)
+clf.fit(X_train_vec, y_train)
+# 4. Evaluate
+y_pred = clf.predict(X_test_vec)
+print("Accuracy:", accuracy_score(y_test, y_pred))
+print("\nClassification Report:\n", classification_report(y_test, y_pred))
+# 5. Confusion Matrix
+labels = clf.classes_
+cm = confusion_matrix(y_test, y_pred, labels=labels)
+disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
+fig, ax = plt.subplots(figsize=(6, 5))
+disp.plot(ax=ax, cmap="Blues", values_format="d")
+plt.show()
+# 6. Cross-validation
+from sklearn.pipeline import Pipeline
+pipe = Pipeline([
+    ("tfidf", TfidfVectorizer(stop_words="english", max_features=5000)),
+    ("clf", LogisticRegression(max_iter=500))
+])
+scores = cross_val_score(pipe, X, y, cv=5, scoring="accuracy")
+print("Cross-validation scores:", scores)
+print("Mean CV Accuracy:", scores.mean())
+# 7. Learning Curve
+train_sizes, train_scores, val_scores = learning_curve(
+    pipe, X, y, cv=5, scoring="accuracy",
+    train_sizes=np.linspace(0.1, 1.0, 5)
+)
+train_mean = train_scores.mean(axis=1)
+val_mean = val_scores.mean(axis=1)
+plt.plot(train_sizes, train_mean, label="Training score")
+plt.plot(train_sizes, val_mean, label="Validation score")
+plt.xlabel("Training Set Size")
+plt.ylabel("Accuracy")
+plt.title("Learning Curve")
+plt.legend()
+plt.grid(True)
+plt.show()
+import spacy
+from spacy.training.example import Example
+# Create blank English pipeline
+nlp = spacy.blank("en")
+# Add text categorizer instead of NER
+textcat = nlp.add_pipe("textcat")
+textcat.add_label("Garbage")
+textcat.add_label("Water")
+textcat.add_label("Roads")
+textcat.add_label("Electricity")
+# Prepare training data
+TRAIN_DATA = []
+for _, row in df.iterrows():
+    text = row["Complaint Text"]
+    label = row["Category"]
+    cats = {cat: 0.0 for cat in textcat.labels}
+    cats[label] = 1.0
+    TRAIN_DATA.append((text, {"cats": cats}))
+# Train the text classifier
+optimizer = nlp.begin_training()
+for i in range(20):  # epochs
+    losses = {}
+    for text, annotations in TRAIN_DATA:
+        doc = nlp.make_doc(text)
+        example = Example.from_dict(doc, annotations)
+        nlp.update([example], sgd=optimizer, losses=losses)
+    print(f"Epoch {i+1}, Losses: {losses}")
+# Save model
+nlp.to_disk("complaint_textcat_model")
+print("✅ Text classification model saved: complaint_textcat_model")
+import spacy
+from spacy.training.example import Example
+import random
+# 🔹 Build text classification training data
+TRAIN_DATA = []
+for _, row in df.iterrows():
+    text = row["Complaint Text"]
+    label = row["Category"]
+    cats = {
+        "Garbage": 0.0,
+        "Water": 0.0,
+        "Roads": 0.0,
+        "Electricity": 0.0
+    }
+    cats[label] = 1.0
+    TRAIN_DATA.append((text, {"cats": cats}))
+# 🔹 Create blank pipeline with text categorizer
+nlp = spacy.blank("en")
+textcat = nlp.add_pipe("textcat")
+for label in ["Garbage", "Water", "Roads", "Electricity"]:
+    textcat.add_label(label)
+nlp.initialize()
+# 🔹 Train model
+for itn in range(10):  # epochs
+    random.shuffle(TRAIN_DATA)
+    losses = {}
+    for text, ann in TRAIN_DATA:
+        doc = nlp.make_doc(text)
+        example = Example.from_dict(doc, ann)
+        nlp.update([example], losses=losses)
+    print(f"Epoch {itn+1}, Losses: {losses}")
+# 🔹 Complaint prediction function
+def predict_complaint(text):
+    doc = nlp(text)
+    # Step 1 → Category prediction
+    cats = doc.cats
+    category = max(cats, key=cats.get)  # pick category with highest score
+    # Step 2 → Priority detection
+    text_lower = text.lower()
+    urgent_words = ["urgent", "dangerous", "immediately", "accident", "severe"]
+    medium_words = ["not working", "overflow", "leak", "delay", "low pressure"]
+    priority = "Low"
+    if any(word in text_lower for word in urgent_words):
+        priority = "High"
+    elif any(word in text_lower for word in medium_words):
+        priority = "Medium"
+    return {
+        "Complaint": text,
+        "Predicted Category": category,
+        "Priority": priority
+    }
+# 🔹 Test it
+print(predict_complaint("Debris dumped behind chandni chowk"))
+print(predict_complaint("Streetlight not working near ChANdni chowk, its very dangerous"))
+import pickle
+# Wrapper so spaCy model can be pickled
+class ComplaintClassifier:
+    def __init__(self, nlp_model):
+        self.nlp = nlp_model
+    def predict(self, text):
+        doc = self.nlp(text)
+        cats = doc.cats
+        category = max(cats, key=cats.get)
+        # Priority detection
+        text_lower = text.lower()
+        urgent_words = ["urgent", "dangerous", "immediately", "accident", "severe"]
+        medium_words = ["not working", "overflow", "leak", "delay", "low pressure"]
+        priority = "Low"
+        if any(word in text_lower for word in urgent_words):
+            priority = "High"
+        elif any(word in text_lower for word in medium_words):
+            priority = "Medium"
+        return {
+            "Complaint": text,
+            "Predicted Category": category,
+            "Priority": priority
+        }
+# Wrap trained spaCy model
+classifier = ComplaintClassifier(nlp)
+# Save with pickle
+with open("complaint_model.pkl", "wb") as f:
+    pickle.dump(classifier, f)
+print("✅ complaint_model.pkl saved successfully")
+from fastapi import FastAPI
+from pydantic import BaseModel
+import uvicorn
+import nest_asyncio
+import pickle
+import spacy
+# ========== Load trained model ==========
+# Make sure you have already trained & saved it as complaint_model.pkl
+with open("complaint_model.pkl", "rb") as f:
+    nlp = pickle.load(f)
+# ========== Priority detection ==========
+def detect_priority(text: str) -> str:
+    text_lower = text.lower()
+    urgent_words = ["urgent", "dangerous", "immediately", "accident", "severe"]
+    medium_words = ["not working", "overflow", "leak", "delay", "low pressure"]
+    if any(word in text_lower for word in urgent_words):
+        return "High"
+    elif any(word in text_lower for word in medium_words):
+        return "Medium"
+    return "Low"
+# ========== FastAPI ==========
+app = FastAPI()
+class ComplaintInput(BaseModel):
+    text: str
+@app.post("/predict")
+async def predict_complaint(input_data: ComplaintInput):
+    doc = nlp(input_data.text)
+    cats = doc.cats
+    category = max(cats, key=cats.get)
+    priority = detect_priority(input_data.text)
+    return {
+        "Complaint": input_data.text,
+        "Predicted Category": category,
+        "Priority": priority,
+        "Raw Scores": cats
+    }
+# ========== Run in Colab only ==========
+if __name__ == "__main__":
+    try:
+        nest_asyncio.apply()
+        uvicorn.run(app, host="0.0.0.0", port=7860)
+    except RuntimeError:
+        # In Hugging Face or when uvicorn is auto-run, we skip this
+        pass

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+fastapi
+uvicorn
+scikit-learn
+pandas
+numpy
+matplotlib
+spacy
+textblob