Spaces:

Sp2503
/

BERT_MODEL

Sleeping

App Files Files Community

Sp2503 commited on Oct 16, 2025

Commit

c1dfe4e

verified ·

1 Parent(s): 91bccf1

Update main.py

Browse files

Files changed (1) hide show

main.py +15 -25

main.py CHANGED Viewed

@@ -2,24 +2,26 @@ import os
 import pandas as pd
 import torch
 from fastapi import FastAPI, HTTPException
-from pydantic import BaseModel
 from pymongo import MongoClient
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from typing import Dict, List, Optional
-# --- Configuration using Environment Variables from Hugging Face Secrets ---
-MODEL_NAME = os.getenv("MODEL_NAME")
-MONGO_URI = os.getenv("MONGO_URI")
 DB_NAME = "legal_aid-chatbot"
 COLLECTION_NAME = "categories"
-# --- Global Resources (loaded once at startup) ---
 model: Optional[AutoModelForSequenceClassification] = None
 tokenizer: Optional[AutoTokenizer] = None
 intent_map: Dict[int, str] = {}
 collection: Optional[MongoClient] = None
-# --- Helper function to create the intent map ---
 def create_intent_map(csv_files: List[str]) -> Dict[int, str]:
     """Creates a consistent intent-to-ID mapping from the training CSVs."""
     all_intents = set()
@@ -31,58 +33,49 @@ def create_intent_map(csv_files: List[str]) -> Dict[int, str]:
     except FileNotFoundError as e:
         print(f"❌ Critical Error: CSV for intent mapping not found: {e}")
         return {}
-    # Sort the intents to ensure the mapping is always the same
     return {i: intent for i, intent in enumerate(sorted(list(all_intents)))}
-# --- Application Startup Event ---
 app = FastAPI(title="Legal Aid Chatbot API", version="1.0.0")
 @app.on_event("startup")
 def startup_event():
     """Loads all necessary resources when the FastAPI application starts."""
     global model, tokenizer, intent_map, collection
     print("--- Loading resources on application startup ---")
     if not MONGO_URI:
         print("❌ Critical Error: MONGO_URI secret is not set in Hugging Face Space settings.")
         return
-    if not MODEL_NAME:
-        print("❌ Critical Error: MODEL_NAME secret is not set in Hugging Face Space settings.")
-        return
     intent_map = create_intent_map(['womens_legal_questions_20k.csv', 'legal_aid_chatbot_dataset_20k.csv'])
     if not intent_map:
         print("❌ Could not create intent map. API will not function correctly.")
         return
     try:
-        # Use a writable cache directory for Hugging Face Spaces
         cache_dir = "/tmp"
         tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=cache_dir)
-        # Explicitly provide the number of labels for the classifier
         num_labels = len(intent_map)
         model = AutoModelForSequenceClassification.from_pretrained(
-            MODEL_NAME,
-            num_labels=num_labels,
             cache_dir=cache_dir
         )
         print(f"✅ Model '{MODEL_NAME}' and tokenizer loaded successfully.")
     except Exception as e:
         print(f"❌ Critical Error loading Hugging Face model: {e}")
-        model = None
     try:
         client = MongoClient(MONGO_URI)
         collection = client[DB_NAME][COLLECTION_NAME]
-        client.server_info() # Test connection
         print("🚀 Successfully connected to MongoDB.")
     except Exception as e:
         print(f"❌ Critical Error connecting to MongoDB: {e}")
         collection = None
-# --- API Data Models ---
 class QueryRequest(BaseModel):
     question: str
@@ -90,22 +83,19 @@ class SolutionResponse(BaseModel):
     predicted_intent: str
     solution: str
-# --- API Endpoint ---
 @app.post("/get-solution", response_model=SolutionResponse)
 def get_legal_solution(request: QueryRequest):
-    """Receives a question, predicts intent, and retrieves the solution from MongoDB."""
     if not all([model, tokenizer, collection]):
         raise HTTPException(status_code=503, detail="Server resources are not ready. Check startup logs for errors.")
     inputs = tokenizer(request.question, return_tensors="pt", truncation=True, padding=True)
     with torch.no_grad():
         logits = model(**inputs).logits
     prediction_id = torch.argmax(logits, dim=1).item()
     predicted_intent = intent_map.get(prediction_id, "Unknown Intent")
     document = collection.find_one({"intent": predicted_intent})
     solution = document["answer"] if document and "answer" in document else "No specific solution was found for this topic."
     return SolutionResponse(predicted_intent=predicted_intent, solution=solution)

 import pandas as pd
 import torch
 from fastapi import FastAPI, HTTPException
+from pantic import BaseModel
 from pymongo import MongoClient
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from typing import Dict, List, Optional
+# --- Configuration ---
+# ** FIX: Hardcoding the correct model name from the URL to bypass any issues with secrets. **
+MODEL_NAME = "Sp2503/Bertmodel"
+# The MongoDB URI is loaded from Hugging Face Space secrets for security
+MONGO_URI = os.getenv("MONGO_URI")
 DB_NAME = "legal_aid-chatbot"
 COLLECTION_NAME = "categories"
+# --- Global Resources ---
 model: Optional[AutoModelForSequenceClassification] = None
 tokenizer: Optional[AutoTokenizer] = None
 intent_map: Dict[int, str] = {}
 collection: Optional[MongoClient] = None
 def create_intent_map(csv_files: List[str]) -> Dict[int, str]:
     """Creates a consistent intent-to-ID mapping from the training CSVs."""
     all_intents = set()
     except FileNotFoundError as e:
         print(f"❌ Critical Error: CSV for intent mapping not found: {e}")
         return {}
     return {i: intent for i, intent in enumerate(sorted(list(all_intents)))}
 app = FastAPI(title="Legal Aid Chatbot API", version="1.0.0")
 @app.on_event("startup")
 def startup_event():
     """Loads all necessary resources when the FastAPI application starts."""
     global model, tokenizer, intent_map, collection
     print("--- Loading resources on application startup ---")
     if not MONGO_URI:
         print("❌ Critical Error: MONGO_URI secret is not set in Hugging Face Space settings.")
         return
     intent_map = create_intent_map(['womens_legal_questions_20k.csv', 'legal_aid_chatbot_dataset_20k.csv'])
     if not intent_map:
         print("❌ Could not create intent map. API will not function correctly.")
         return
     try:
         cache_dir = "/tmp"
         tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=cache_dir)
         num_labels = len(intent_map)
         model = AutoModelForSequenceClassification.from_pretrained(
+            MODEL_NAME,
+            num_labels=num_labels,
             cache_dir=cache_dir
         )
         print(f"✅ Model '{MODEL_NAME}' and tokenizer loaded successfully.")
     except Exception as e:
         print(f"❌ Critical Error loading Hugging Face model: {e}")
+        model = None
     try:
         client = MongoClient(MONGO_URI)
         collection = client[DB_NAME][COLLECTION_NAME]
+        client.server_info()
         print("🚀 Successfully connected to MongoDB.")
     except Exception as e:
         print(f"❌ Critical Error connecting to MongoDB: {e}")
         collection = None
 class QueryRequest(BaseModel):
     question: str
     predicted_intent: str
     solution: str
 @app.post("/get-solution", response_model=SolutionResponse)
 def get_legal_solution(request: QueryRequest):
     if not all([model, tokenizer, collection]):
         raise HTTPException(status_code=503, detail="Server resources are not ready. Check startup logs for errors.")
     inputs = tokenizer(request.question, return_tensors="pt", truncation=True, padding=True)
     with torch.no_grad():
         logits = model(**inputs).logits
     prediction_id = torch.argmax(logits, dim=1).item()
     predicted_intent = intent_map.get(prediction_id, "Unknown Intent")
     document = collection.find_one({"intent": predicted_intent})
     solution = document["answer"] if document and "answer" in document else "No specific solution was found for this topic."
     return SolutionResponse(predicted_intent=predicted_intent, solution=solution)