Spaces:

Sp2503
/

BERT_MODEL

Sleeping

App Files Files Community

Sai809701 commited on Oct 15, 2025

Commit

60a3fde

1 Parent(s): 5bb5bcf

added 3 files

Browse files

Files changed (3) hide show

Dockerfile +15 -0
main.py +105 -0
requirements.txt +7 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,15 @@

+# Use the official Python image as a base
+FROM python:3.10
+# Set the working directory inside the container
+WORKDIR /app
+# Copy the requirements file and install the dependencies
+COPY requirements.txt requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy all the other files into the container
+COPY . .
+# Run the FastAPI app using Uvicorn
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

main.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import os
+import pandas as pd
+import torch
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from pymongo import MongoClient
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+from typing import Dict, List, Optional
+# --- Configuration using Environment Variables from Hugging Face Secrets ---
+# MODEL_NAME will be your fine-tuned model on the Hub (e.g., "your-username/your-model")
+MODEL_NAME = os.getenv("MODEL_NAME", "bert-model")
+# MONGO_URI will be your MongoDB Atlas connection string
+MONGO_URI = os.getenv("MONGO_URI")
+DB_NAME = "legal_aid-chatbot"
+COLLECTION_NAME = "categories"
+# --- Global Resources (loaded once at startup) ---
+model: Optional[AutoModelForSequenceClassification] = None
+tokenizer: Optional[AutoTokenizer] = None
+intent_map: Dict[int, str] = {}
+collection: Optional[MongoClient] = None
+# --- Helper function to create the intent map ---
+def create_intent_map(csv_files: List[str]) -> Dict[int, str]:
+    """Creates a consistent intent-to-ID mapping from the training CSVs."""
+    all_intents = set()
+    try:
+        df_women = pd.read_csv(csv_files[0])
+        all_intents.update(df_women['intent'].unique())
+        df_legal_aid = pd.read_csv(csv_files[1])
+        all_intents.update(df_legal_aid['intent_type'].unique())
+    except FileNotFoundError as e:
+        print(f"❌ Critical Error: CSV for intent mapping not found: {e}")
+        return {}
+    # Sort the intents to ensure the mapping is always the same
+    return {i: intent for i, intent in enumerate(sorted(list(all_intents)))}
+# --- Application Startup Event ---
+app = FastAPI(title="Legal Aid Chatbot API", version="1.0.0")
+@app.on_event("startup")
+def startup_event():
+    """Loads all necessary resources when the FastAPI application starts."""
+    global model, tokenizer, intent_map, collection
+    print("--- Loading resources on application startup ---")
+    if not MONGO_URI:
+        print("❌ Critical Error: MONGO_URI secret is not set in Hugging Face Space settings.")
+        return
+    intent_map = create_intent_map(['womens_legal_questions_20k.csv', 'legal_aid_chatbot_dataset_20k.csv'])
+    if not intent_map:
+        print("❌ Could not create intent map. API will not function correctly.")
+        return
+    try:
+        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+        # Load model with the correct number of labels based on our data
+        model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=len(intent_map))
+        print(f"✅ Model '{MODEL_NAME}' and tokenizer loaded.")
+    except Exception as e:
+        print(f"❌ Critical Error loading Hugging Face model: {e}")
+    try:
+        client = MongoClient(MONGO_URI)
+        collection = client[DB_NAME][COLLECTION_NAME]
+        client.server_info() # Test connection
+        print("🚀 Successfully connected to MongoDB.")
+    except Exception as e:
+        print(f"❌ Critical Error connecting to MongoDB: {e}")
+# --- API Data Models (based on your main.py) ---
+class QueryRequest(BaseModel):
+    question: str
+class SolutionResponse(BaseModel):
+    predicted_intent: str
+    solution: str
+# --- API Endpoint (based on your main.py) ---
+@app.post("/get-solution", response_model=SolutionResponse)
+def get_legal_solution(request: QueryRequest):
+    """Receives a question, predicts intent, and retrieves the solution from MongoDB."""
+    if not all([model, tokenizer, collection]):
+        raise HTTPException(status_code=503, detail="Server resources are not ready. Check startup logs for errors.")
+    inputs = tokenizer(request.question, return_tensors="pt", truncation=True, padding=True)
+    with torch.no_grad():
+        logits = model(**inputs).logits
+    prediction_id = torch.argmax(logits, dim=1).item()
+    predicted_intent = intent_map.get(prediction_id, "Unknown Intent")
+    document = collection.find_one({"intent": predicted_intent})
+    solution = document["answer"] if document and "answer" in document else "No specific solution was found for this topic."
+    return SolutionResponse(predicted_intent=predicted_intent, solution=solution)
+@app.get("/")
+def root():
+    return {"message": "Legal Aid Chatbot API is active and running."}

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+fastapi
+uvicorn[standard]
+torch
+transformers
+pandas
+pymongo
+scikit-learn