Spaces:

Sp2503
/

Muril-Model

Sleeping

App Files Files Community

Sp2503 commited on Oct 7, 2025

Commit

3d71fc5

verified ·

1 Parent(s): 5b88ecf

Update main.py

Browse files

Files changed (1) hide show

main.py +35 -30

main.py CHANGED Viewed

@@ -6,28 +6,29 @@ from pydantic import BaseModel
 from sentence_transformers import SentenceTransformer, util
 from langdetect import detect
 from huggingface_hub import hf_hub_download
-import threading
-# --- Hugging Face cache settings ---
-os.environ["HF_HOME"] = "/tmp/hf_cache"
-os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
-# --- Configuration ---
 MODEL_PATH = './muril_combined_multilingual_model'
 CSV_PATH = './muril_multilingual_dataset.csv'
 HF_REPO = "Sp2503/muril-dataset"
 HF_FILE = "answer_embeddings.pt"
-# --- FastAPI app setup ---
 app = FastAPI(title="MuRIL Multilingual QA API")
 model = None
 df = None
 answer_embeddings = None
-load_status = {"ready": False, "error": None}
-# --- Load embeddings from Hugging Face ---
 def load_embeddings():
     print("📥 Downloading embeddings from Hugging Face...")
     hf_path = hf_hub_download(
@@ -40,29 +41,28 @@ def load_embeddings():
     return torch.load(hf_path, map_location="cpu")
-# --- Background resource loading ---
 def load_resources():
-    global model, df, answer_embeddings, load_status
     try:
         print("⚙️ Loading model and dataset in background...")
         model = SentenceTransformer(MODEL_PATH)
         df = pd.read_csv(CSV_PATH).dropna(subset=['question', 'answer'])
         answer_embeddings = load_embeddings()
-        load_status["ready"] = True
         print("✅ Model and embeddings ready.")
     except Exception as e:
-        load_status["error"] = str(e)
         print(f"❌ Error loading resources: {e}")
 @app.on_event("startup")
-def schedule_background_load():
-    """Run model load in a background thread to prevent startup timeout"""
-    thread = threading.Thread(target=load_resources, daemon=True)
-    thread.start()
-# --- API Models ---
 class QueryRequest(BaseModel):
     question: str
     lang: str = None
@@ -75,22 +75,21 @@ class QAResponse(BaseModel):
 # --- Root Endpoint ---
 @app.get("/")
 def root():
-    if load_status["error"]:
-        return {"status": "❌ Error", "details": load_status["error"]}
-    return {"status": "✅ Running", "model_ready": load_status["ready"]}
-# --- QA Endpoint ---
 @app.post("/get-answer", response_model=QAResponse)
 def get_answer_endpoint(request: QueryRequest):
-    if not load_status["ready"]:
-        return {"answer": "⏳ Model still loading, please try again in a few seconds."}
     question_text = request.question.strip()
-    try:
-        lang_filter = request.lang or detect(question_text)
-    except Exception:
-        lang_filter = None
     filtered_df = df
     filtered_embeddings = answer_embeddings
@@ -98,12 +97,18 @@ def get_answer_endpoint(request: QueryRequest):
         mask = df['lang'] == lang_filter
         filtered_df = df[mask].reset_index(drop=True)
         filtered_embeddings = answer_embeddings[mask.values]
-        if filtered_df.empty:
-            return {"answer": f"⚠️ No answers available for language '{lang_filter}'."}
-    # Semantic similarity search
     question_emb = model.encode(question_text, convert_to_tensor=True)
     cosine_scores = util.pytorch_cos_sim(question_emb, filtered_embeddings)
     best_idx = torch.argmax(cosine_scores).item()
     answer = filtered_df.iloc[best_idx]['answer']
     return {"answer": answer}

 from sentence_transformers import SentenceTransformer, util
 from langdetect import detect
 from huggingface_hub import hf_hub_download
+import asyncio
+# --- Cache Configuration ---
+os.environ["HF_HOME"] = "/app/hf_cache"
+os.environ["TRANSFORMERS_CACHE"] = "/app/hf_cache"
+os.environ["TORCH_DISABLE_CUDA"] = "1"  # disable GPU
+# --- Paths ---
 MODEL_PATH = './muril_combined_multilingual_model'
 CSV_PATH = './muril_multilingual_dataset.csv'
 HF_REPO = "Sp2503/muril-dataset"
 HF_FILE = "answer_embeddings.pt"
+# --- FastAPI Setup ---
 app = FastAPI(title="MuRIL Multilingual QA API")
+# Global variables (loaded at startup)
 model = None
 df = None
 answer_embeddings = None
+# --- Helper: Load embeddings from Hugging Face ---
 def load_embeddings():
     print("📥 Downloading embeddings from Hugging Face...")
     hf_path = hf_hub_download(
     return torch.load(hf_path, map_location="cpu")
+# --- Resource Loader ---
 def load_resources():
+    global model, df, answer_embeddings
     try:
         print("⚙️ Loading model and dataset in background...")
         model = SentenceTransformer(MODEL_PATH)
         df = pd.read_csv(CSV_PATH).dropna(subset=['question', 'answer'])
         answer_embeddings = load_embeddings()
         print("✅ Model and embeddings ready.")
     except Exception as e:
         print(f"❌ Error loading resources: {e}")
+# --- Async Background Loading ---
 @app.on_event("startup")
+async def startup_event():
+    loop = asyncio.get_event_loop()
+    loop.run_in_executor(None, load_resources)
+    print("🚀 Background model loading started.")
+# --- Request Models ---
 class QueryRequest(BaseModel):
     question: str
     lang: str = None
 # --- Root Endpoint ---
 @app.get("/")
 def root():
+    ready = model is not None and df is not None and answer_embeddings is not None
+    return {
+        "status": "✅ Running MuRIL QA API",
+        "model_loaded": ready
+    }
+# --- Question Answer Endpoint ---
 @app.post("/get-answer", response_model=QAResponse)
 def get_answer_endpoint(request: QueryRequest):
+    if model is None or df is None or answer_embeddings is None:
+        return {"answer": "⏳ Model still loading, please try again shortly."}
     question_text = request.question.strip()
+    lang_filter = request.lang or detect(question_text)
     filtered_df = df
     filtered_embeddings = answer_embeddings
         mask = df['lang'] == lang_filter
         filtered_df = df[mask].reset_index(drop=True)
         filtered_embeddings = answer_embeddings[mask.values]
+    if len(filtered_df) == 0:
+        return {"answer": f"⚠️ No data found for language '{lang_filter}'."}
     question_emb = model.encode(question_text, convert_to_tensor=True)
     cosine_scores = util.pytorch_cos_sim(question_emb, filtered_embeddings)
     best_idx = torch.argmax(cosine_scores).item()
     answer = filtered_df.iloc[best_idx]['answer']
     return {"answer": answer}
+# --- Keep app alive when run directly ---
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run("main:app", host="0.0.0.0", port=8080)