Spaces:
Sleeping
Sleeping
Sai809701 commited on
Commit Β·
60a3fde
1
Parent(s): 5bb5bcf
added 3 files
Browse files- Dockerfile +15 -0
- main.py +105 -0
- requirements.txt +7 -0
Dockerfile
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use the official Python image as a base
|
| 2 |
+
FROM python:3.10
|
| 3 |
+
|
| 4 |
+
# Set the working directory inside the container
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Copy the requirements file and install the dependencies
|
| 8 |
+
COPY requirements.txt requirements.txt
|
| 9 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 10 |
+
|
| 11 |
+
# Copy all the other files into the container
|
| 12 |
+
COPY . .
|
| 13 |
+
|
| 14 |
+
# Run the FastAPI app using Uvicorn
|
| 15 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
main.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import torch
|
| 4 |
+
from fastapi import FastAPI, HTTPException
|
| 5 |
+
from pydantic import BaseModel
|
| 6 |
+
from pymongo import MongoClient
|
| 7 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 8 |
+
from typing import Dict, List, Optional
|
| 9 |
+
|
| 10 |
+
# --- Configuration using Environment Variables from Hugging Face Secrets ---
|
| 11 |
+
# MODEL_NAME will be your fine-tuned model on the Hub (e.g., "your-username/your-model")
|
| 12 |
+
MODEL_NAME = os.getenv("MODEL_NAME", "bert-model")
|
| 13 |
+
|
| 14 |
+
# MONGO_URI will be your MongoDB Atlas connection string
|
| 15 |
+
MONGO_URI = os.getenv("MONGO_URI")
|
| 16 |
+
DB_NAME = "legal_aid-chatbot"
|
| 17 |
+
COLLECTION_NAME = "categories"
|
| 18 |
+
|
| 19 |
+
# --- Global Resources (loaded once at startup) ---
|
| 20 |
+
model: Optional[AutoModelForSequenceClassification] = None
|
| 21 |
+
tokenizer: Optional[AutoTokenizer] = None
|
| 22 |
+
intent_map: Dict[int, str] = {}
|
| 23 |
+
collection: Optional[MongoClient] = None
|
| 24 |
+
|
| 25 |
+
# --- Helper function to create the intent map ---
|
| 26 |
+
def create_intent_map(csv_files: List[str]) -> Dict[int, str]:
|
| 27 |
+
"""Creates a consistent intent-to-ID mapping from the training CSVs."""
|
| 28 |
+
all_intents = set()
|
| 29 |
+
try:
|
| 30 |
+
df_women = pd.read_csv(csv_files[0])
|
| 31 |
+
all_intents.update(df_women['intent'].unique())
|
| 32 |
+
df_legal_aid = pd.read_csv(csv_files[1])
|
| 33 |
+
all_intents.update(df_legal_aid['intent_type'].unique())
|
| 34 |
+
except FileNotFoundError as e:
|
| 35 |
+
print(f"β Critical Error: CSV for intent mapping not found: {e}")
|
| 36 |
+
return {}
|
| 37 |
+
# Sort the intents to ensure the mapping is always the same
|
| 38 |
+
return {i: intent for i, intent in enumerate(sorted(list(all_intents)))}
|
| 39 |
+
|
| 40 |
+
# --- Application Startup Event ---
|
| 41 |
+
app = FastAPI(title="Legal Aid Chatbot API", version="1.0.0")
|
| 42 |
+
|
| 43 |
+
@app.on_event("startup")
|
| 44 |
+
def startup_event():
|
| 45 |
+
"""Loads all necessary resources when the FastAPI application starts."""
|
| 46 |
+
global model, tokenizer, intent_map, collection
|
| 47 |
+
|
| 48 |
+
print("--- Loading resources on application startup ---")
|
| 49 |
+
|
| 50 |
+
if not MONGO_URI:
|
| 51 |
+
print("β Critical Error: MONGO_URI secret is not set in Hugging Face Space settings.")
|
| 52 |
+
return
|
| 53 |
+
|
| 54 |
+
intent_map = create_intent_map(['womens_legal_questions_20k.csv', 'legal_aid_chatbot_dataset_20k.csv'])
|
| 55 |
+
if not intent_map:
|
| 56 |
+
print("β Could not create intent map. API will not function correctly.")
|
| 57 |
+
return
|
| 58 |
+
|
| 59 |
+
try:
|
| 60 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
| 61 |
+
# Load model with the correct number of labels based on our data
|
| 62 |
+
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=len(intent_map))
|
| 63 |
+
print(f"β
Model '{MODEL_NAME}' and tokenizer loaded.")
|
| 64 |
+
except Exception as e:
|
| 65 |
+
print(f"β Critical Error loading Hugging Face model: {e}")
|
| 66 |
+
|
| 67 |
+
try:
|
| 68 |
+
client = MongoClient(MONGO_URI)
|
| 69 |
+
collection = client[DB_NAME][COLLECTION_NAME]
|
| 70 |
+
client.server_info() # Test connection
|
| 71 |
+
print("π Successfully connected to MongoDB.")
|
| 72 |
+
except Exception as e:
|
| 73 |
+
print(f"β Critical Error connecting to MongoDB: {e}")
|
| 74 |
+
|
| 75 |
+
# --- API Data Models (based on your main.py) ---
|
| 76 |
+
class QueryRequest(BaseModel):
|
| 77 |
+
question: str
|
| 78 |
+
|
| 79 |
+
class SolutionResponse(BaseModel):
|
| 80 |
+
predicted_intent: str
|
| 81 |
+
solution: str
|
| 82 |
+
|
| 83 |
+
# --- API Endpoint (based on your main.py) ---
|
| 84 |
+
@app.post("/get-solution", response_model=SolutionResponse)
|
| 85 |
+
def get_legal_solution(request: QueryRequest):
|
| 86 |
+
"""Receives a question, predicts intent, and retrieves the solution from MongoDB."""
|
| 87 |
+
if not all([model, tokenizer, collection]):
|
| 88 |
+
raise HTTPException(status_code=503, detail="Server resources are not ready. Check startup logs for errors.")
|
| 89 |
+
|
| 90 |
+
inputs = tokenizer(request.question, return_tensors="pt", truncation=True, padding=True)
|
| 91 |
+
with torch.no_grad():
|
| 92 |
+
logits = model(**inputs).logits
|
| 93 |
+
|
| 94 |
+
prediction_id = torch.argmax(logits, dim=1).item()
|
| 95 |
+
predicted_intent = intent_map.get(prediction_id, "Unknown Intent")
|
| 96 |
+
|
| 97 |
+
document = collection.find_one({"intent": predicted_intent})
|
| 98 |
+
|
| 99 |
+
solution = document["answer"] if document and "answer" in document else "No specific solution was found for this topic."
|
| 100 |
+
|
| 101 |
+
return SolutionResponse(predicted_intent=predicted_intent, solution=solution)
|
| 102 |
+
|
| 103 |
+
@app.get("/")
|
| 104 |
+
def root():
|
| 105 |
+
return {"message": "Legal Aid Chatbot API is active and running."}
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn[standard]
|
| 3 |
+
torch
|
| 4 |
+
transformers
|
| 5 |
+
pandas
|
| 6 |
+
pymongo
|
| 7 |
+
scikit-learn
|