Sai809701 commited on
Commit
60a3fde
Β·
1 Parent(s): 5bb5bcf

added 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +15 -0
  2. main.py +105 -0
  3. requirements.txt +7 -0
Dockerfile ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use the official Python image as a base
2
+ FROM python:3.10
3
+
4
+ # Set the working directory inside the container
5
+ WORKDIR /app
6
+
7
+ # Copy the requirements file and install the dependencies
8
+ COPY requirements.txt requirements.txt
9
+ RUN pip install --no-cache-dir -r requirements.txt
10
+
11
+ # Copy all the other files into the container
12
+ COPY . .
13
+
14
+ # Run the FastAPI app using Uvicorn
15
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ import torch
4
+ from fastapi import FastAPI, HTTPException
5
+ from pydantic import BaseModel
6
+ from pymongo import MongoClient
7
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
8
+ from typing import Dict, List, Optional
9
+
10
+ # --- Configuration using Environment Variables from Hugging Face Secrets ---
11
+ # MODEL_NAME will be your fine-tuned model on the Hub (e.g., "your-username/your-model")
12
+ MODEL_NAME = os.getenv("MODEL_NAME", "bert-model")
13
+
14
+ # MONGO_URI will be your MongoDB Atlas connection string
15
+ MONGO_URI = os.getenv("MONGO_URI")
16
+ DB_NAME = "legal_aid-chatbot"
17
+ COLLECTION_NAME = "categories"
18
+
19
+ # --- Global Resources (loaded once at startup) ---
20
+ model: Optional[AutoModelForSequenceClassification] = None
21
+ tokenizer: Optional[AutoTokenizer] = None
22
+ intent_map: Dict[int, str] = {}
23
+ collection: Optional[MongoClient] = None
24
+
25
+ # --- Helper function to create the intent map ---
26
+ def create_intent_map(csv_files: List[str]) -> Dict[int, str]:
27
+ """Creates a consistent intent-to-ID mapping from the training CSVs."""
28
+ all_intents = set()
29
+ try:
30
+ df_women = pd.read_csv(csv_files[0])
31
+ all_intents.update(df_women['intent'].unique())
32
+ df_legal_aid = pd.read_csv(csv_files[1])
33
+ all_intents.update(df_legal_aid['intent_type'].unique())
34
+ except FileNotFoundError as e:
35
+ print(f"❌ Critical Error: CSV for intent mapping not found: {e}")
36
+ return {}
37
+ # Sort the intents to ensure the mapping is always the same
38
+ return {i: intent for i, intent in enumerate(sorted(list(all_intents)))}
39
+
40
+ # --- Application Startup Event ---
41
+ app = FastAPI(title="Legal Aid Chatbot API", version="1.0.0")
42
+
43
+ @app.on_event("startup")
44
+ def startup_event():
45
+ """Loads all necessary resources when the FastAPI application starts."""
46
+ global model, tokenizer, intent_map, collection
47
+
48
+ print("--- Loading resources on application startup ---")
49
+
50
+ if not MONGO_URI:
51
+ print("❌ Critical Error: MONGO_URI secret is not set in Hugging Face Space settings.")
52
+ return
53
+
54
+ intent_map = create_intent_map(['womens_legal_questions_20k.csv', 'legal_aid_chatbot_dataset_20k.csv'])
55
+ if not intent_map:
56
+ print("❌ Could not create intent map. API will not function correctly.")
57
+ return
58
+
59
+ try:
60
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
61
+ # Load model with the correct number of labels based on our data
62
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=len(intent_map))
63
+ print(f"βœ… Model '{MODEL_NAME}' and tokenizer loaded.")
64
+ except Exception as e:
65
+ print(f"❌ Critical Error loading Hugging Face model: {e}")
66
+
67
+ try:
68
+ client = MongoClient(MONGO_URI)
69
+ collection = client[DB_NAME][COLLECTION_NAME]
70
+ client.server_info() # Test connection
71
+ print("πŸš€ Successfully connected to MongoDB.")
72
+ except Exception as e:
73
+ print(f"❌ Critical Error connecting to MongoDB: {e}")
74
+
75
+ # --- API Data Models (based on your main.py) ---
76
+ class QueryRequest(BaseModel):
77
+ question: str
78
+
79
+ class SolutionResponse(BaseModel):
80
+ predicted_intent: str
81
+ solution: str
82
+
83
+ # --- API Endpoint (based on your main.py) ---
84
+ @app.post("/get-solution", response_model=SolutionResponse)
85
+ def get_legal_solution(request: QueryRequest):
86
+ """Receives a question, predicts intent, and retrieves the solution from MongoDB."""
87
+ if not all([model, tokenizer, collection]):
88
+ raise HTTPException(status_code=503, detail="Server resources are not ready. Check startup logs for errors.")
89
+
90
+ inputs = tokenizer(request.question, return_tensors="pt", truncation=True, padding=True)
91
+ with torch.no_grad():
92
+ logits = model(**inputs).logits
93
+
94
+ prediction_id = torch.argmax(logits, dim=1).item()
95
+ predicted_intent = intent_map.get(prediction_id, "Unknown Intent")
96
+
97
+ document = collection.find_one({"intent": predicted_intent})
98
+
99
+ solution = document["answer"] if document and "answer" in document else "No specific solution was found for this topic."
100
+
101
+ return SolutionResponse(predicted_intent=predicted_intent, solution=solution)
102
+
103
+ @app.get("/")
104
+ def root():
105
+ return {"message": "Legal Aid Chatbot API is active and running."}
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ torch
4
+ transformers
5
+ pandas
6
+ pymongo
7
+ scikit-learn