kanhacoderx commited on
Commit
c893f14
·
verified ·
1 Parent(s): 3eb7a64

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +17 -0
  2. app.py +219 -0
  3. readme.md +12 -0
  4. requirements.txt +10 -0
Dockerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
4
+
5
+ WORKDIR /app
6
+
7
+ COPY requirements.txt .
8
+
9
+ RUN pip install --no-cache-dir --upgrade pip
10
+ RUN pip install --no-cache-dir -r requirements.txt
11
+
12
+ COPY app.py .
13
+ COPY models ./models
14
+
15
+ EXPOSE 7860
16
+
17
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from typing import Dict, List, Optional
3
+
4
+ import json
5
+ import joblib
6
+ import numpy as np
7
+
8
+ from fastapi import FastAPI
9
+ from fastapi.middleware.cors import CORSMiddleware
10
+ from pydantic import BaseModel, Field
11
+ from sentence_transformers import SentenceTransformer
12
+
13
+
14
+ # Paths
15
+
16
+
17
+ BASE_DIR = Path(__file__).resolve().parent
18
+
19
+ MODEL_PATH = BASE_DIR / "models" / "best_logistic_embedding_model.joblib"
20
+ METADATA_PATH = BASE_DIR / "models" / "best_model_metadata.json"
21
+
22
+
23
+
24
+ # FastAPI app
25
+
26
+
27
+ app = FastAPI(
28
+ title="Grievance Department Classifier API",
29
+ description="Classifies citizen complaints into government departments using MiniLM embeddings + Logistic Regression.",
30
+ version="1.0.0",
31
+ )
32
+
33
+ app.add_middleware(
34
+ CORSMiddleware,
35
+ allow_origins=["*"], # later replace with your frontend URL
36
+ allow_credentials=True,
37
+ allow_methods=["*"],
38
+ allow_headers=["*"],
39
+ )
40
+
41
+
42
+
43
+ # Request / Response Schemas
44
+
45
+
46
+ class DepartmentPredictionRequest(BaseModel):
47
+ complaint_text: str = Field(
48
+ ...,
49
+ min_length=3,
50
+ description="Citizen complaint text",
51
+ example="Garbage is dumped in an empty plot and bad smell is coming."
52
+ )
53
+ location: Optional[str] = Field(
54
+ default="unknown",
55
+ description="Optional location, ward, zone, city, or area",
56
+ example="Ward 12"
57
+ )
58
+
59
+
60
+ class ClassProbability(BaseModel):
61
+ department: str
62
+ probability: float
63
+
64
+
65
+ class DepartmentPredictionResponse(BaseModel):
66
+ complaint_text: str
67
+ predicted_department: str
68
+ confidence: float
69
+ probabilities: List[ClassProbability]
70
+ model: str
71
+ method: str
72
+
73
+
74
+ class BatchDepartmentPredictionRequest(BaseModel):
75
+ complaints: List[DepartmentPredictionRequest]
76
+
77
+
78
+ class BatchDepartmentPredictionResponse(BaseModel):
79
+ predictions: List[DepartmentPredictionResponse]
80
+
81
+
82
+
83
+ # Model service
84
+
85
+
86
+ class DepartmentClassifierService:
87
+ def __init__(self, model_path: Path, metadata_path: Path):
88
+ if not model_path.exists():
89
+ raise FileNotFoundError(f"Model file not found: {model_path}")
90
+
91
+ if not metadata_path.exists():
92
+ raise FileNotFoundError(f"Metadata file not found: {metadata_path}")
93
+
94
+ with open(metadata_path, "r", encoding="utf-8") as file:
95
+ self.metadata = json.load(file)
96
+
97
+ self.embedding_model_name = self.metadata.get(
98
+ "embedding_model_name",
99
+ "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
100
+ )
101
+
102
+ self.classifier = joblib.load(model_path)
103
+
104
+ self.embedding_model = SentenceTransformer(self.embedding_model_name)
105
+
106
+ def predict(self, complaint_text: str) -> Dict:
107
+ text = str(complaint_text).strip()
108
+
109
+ embedding = self.embedding_model.encode(
110
+ [text],
111
+ convert_to_numpy=True,
112
+ normalize_embeddings=True
113
+ )
114
+
115
+ predicted_department = self.classifier.predict(embedding)[0]
116
+
117
+ if hasattr(self.classifier, "predict_proba"):
118
+ probabilities = self.classifier.predict_proba(embedding)[0]
119
+ classes = self.classifier.classes_
120
+
121
+ probability_items = [
122
+ {
123
+ "department": str(cls),
124
+ "probability": float(prob)
125
+ }
126
+ for cls, prob in zip(classes, probabilities)
127
+ ]
128
+
129
+ probability_items = sorted(
130
+ probability_items,
131
+ key=lambda item: item["probability"],
132
+ reverse=True
133
+ )
134
+
135
+ confidence = float(max(probabilities))
136
+
137
+ else:
138
+ probability_items = [
139
+ {
140
+ "department": str(predicted_department),
141
+ "probability": 1.0
142
+ }
143
+ ]
144
+ confidence = 1.0
145
+
146
+ return {
147
+ "predicted_department": str(predicted_department),
148
+ "confidence": confidence,
149
+ "probabilities": probability_items,
150
+ }
151
+
152
+
153
+ classifier_service = DepartmentClassifierService(
154
+ model_path=MODEL_PATH,
155
+ metadata_path=METADATA_PATH
156
+ )
157
+
158
+
159
+
160
+ # Routes
161
+
162
+
163
+ @app.get("/")
164
+ def home():
165
+ return {
166
+ "message": "Grievance Department Classifier API is running",
167
+ "embedding_model": classifier_service.embedding_model_name,
168
+ "classifier": classifier_service.metadata.get("classifier", "unknown"),
169
+ }
170
+
171
+
172
+ @app.get("/health")
173
+ def health():
174
+ return {
175
+ "status": "ok",
176
+ "model_loaded": True,
177
+ "embedding_model": classifier_service.embedding_model_name,
178
+ "classifier": classifier_service.metadata.get("classifier", "unknown"),
179
+ }
180
+
181
+
182
+ @app.get("/model-info")
183
+ def model_info():
184
+ return classifier_service.metadata
185
+
186
+
187
+ @app.post("/predict-department", response_model=DepartmentPredictionResponse)
188
+ def predict_department(request: DepartmentPredictionRequest):
189
+ result = classifier_service.predict(request.complaint_text)
190
+
191
+ return {
192
+ "complaint_text": request.complaint_text,
193
+ "predicted_department": result["predicted_department"],
194
+ "confidence": result["confidence"],
195
+ "probabilities": result["probabilities"],
196
+ "model": classifier_service.embedding_model_name,
197
+ "method": "MiniLM embeddings + Logistic Regression",
198
+ }
199
+
200
+
201
+ @app.post("/batch-predict-department", response_model=BatchDepartmentPredictionResponse)
202
+ def batch_predict_department(request: BatchDepartmentPredictionRequest):
203
+ predictions = []
204
+
205
+ for item in request.complaints:
206
+ result = classifier_service.predict(item.complaint_text)
207
+
208
+ predictions.append({
209
+ "complaint_text": item.complaint_text,
210
+ "predicted_department": result["predicted_department"],
211
+ "confidence": result["confidence"],
212
+ "probabilities": result["probabilities"],
213
+ "model": classifier_service.embedding_model_name,
214
+ "method": "MiniLM embeddings + Logistic Regression",
215
+ })
216
+
217
+ return {
218
+ "predictions": predictions
219
+ }
readme.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Grievance Department Classifier API
3
+ emoji: 🏛️
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: docker
7
+ app_port: 7860
8
+ ---
9
+
10
+ # Grievance Department Classifier API
11
+
12
+ FastAPI backend for classifying citizen complaints into departments.
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ pydantic
4
+ python-multipart
5
+ joblib
6
+ numpy
7
+ scikit-learn
8
+ sentence-transformers
9
+ torch
10
+ transformers