Spaces:

Sp2503
/

Muril-Model

Sleeping

App Files Files Community

Sp2503 commited on Oct 7, 2025

Commit

d38f9c4

verified ·

1 Parent(s): 0a41dbe

Update main.py

Browse files

Files changed (1) hide show

main.py +105 -47

main.py CHANGED Viewed

@@ -1,66 +1,124 @@
-from fastapi import FastAPI
-from fastapi.middleware.cors import CORSMiddleware
-import torch
-from transformers import AutoTokenizer, AutoModel
 import os
-app = FastAPI(title="MuRIL QA Demo")
-# Allow cross-origin requests
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-MODEL_NAME = "google/muril-base-cased"
-EMBED_PATH = "/tmp/datasets--Sp2503--muril-dataset/snapshots/b768e5a3a401589f25b723c20f9674e88717db1b/answer_embeddings.pt"
-model = None
-tokenizer = None
-answer_embeddings = None
-def load_model():
-    global model, tokenizer, answer_embeddings
-    print("⚙️ Loading model and dataset...")
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-    model = AutoModel.from_pretrained(MODEL_NAME)
-    if os.path.exists(EMBED_PATH):
-        answer_embeddings = torch.load(EMBED_PATH, map_location="cpu")
-        print(f"✅ Embeddings loaded from {EMBED_PATH}")
-    else:
-        print("⚠️ Embeddings not found! Please check dataset path.")
-    print("✅ Model and embeddings ready.")
-# 🚀 Load everything before starting FastAPI
-print("🚀 Starting app...")
-load_model()
-@app.get("/")
-def health_check():
-    return {"status": "ok"}
-@app.get("/ask")
-def ask(question: str):
-    if model is None or tokenizer is None or answer_embeddings is None:
-        return {"error": "Model not loaded yet"}
-    inputs = tokenizer(question, return_tensors="pt")
-    with torch.no_grad():
-        q_emb = model(**inputs).last_hidden_state.mean(dim=1)
-    similarities = torch.nn.functional.cosine_similarity(q_emb, answer_embeddings)
-    top_idx = torch.argmax(similarities).item()
-    return {"question": question, "answer_id": top_idx, "score": similarities[top_idx].item()}
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run("main:app", host="0.0.0.0", port=8080)

 import os
+import torch
+import pandas as pd
+from fastapi import FastAPI
+from pydantic import BaseModel
+from sentence_transformers import SentenceTransformer, util
+from langdetect import detect
+from huggingface_hub import hf_hub_download
+import threading
+import time
+# --- Cache Configuration ---
+os.environ["HF_HOME"] = "/app/hf_cache"
+os.environ["TRANSFORMERS_CACHE"] = "/app/hf_cache"
+os.environ["TORCH_DISABLE_CUDA"] = "1"
+# --- Paths ---
+MODEL_PATH = './muril_combined_multilingual_model'
+CSV_PATH = './muril_multilingual_dataset.csv'
+HF_REPO = "Sp2503/muril-dataset"
+HF_FILE = "answer_embeddings.pt"
+# --- FastAPI Setup ---
+app = FastAPI(title="MuRIL Multilingual QA API")
+# Global variables
+model = None
+df = None
+answer_embeddings = None
+is_model_ready = False
+# --- Helper: Load embeddings from Hugging Face ---
+def load_embeddings():
+    print("📥 Downloading embeddings from Hugging Face...")
+    hf_path = hf_hub_download(
+        repo_id=HF_REPO,
+        filename=HF_FILE,
+        repo_type="dataset",
+        cache_dir="/tmp"
+    )
+    print(f"✅ Embeddings available at {hf_path}")
+    return torch.load(hf_path, map_location="cpu")
+# --- Resource Loader ---
+def load_resources():
+    global model, df, answer_embeddings, is_model_ready
+    try:
+        print("⚙️ Loading model and dataset...")
+        model = SentenceTransformer(MODEL_PATH)
+        df = pd.read_csv(CSV_PATH).dropna(subset=['question', 'answer'])
+        answer_embeddings = load_embeddings()
+        is_model_ready = True
+        print("✅ Model and embeddings ready.")
+    except Exception as e:
+        print(f"❌ Error loading resources: {e}")
+        is_model_ready = False
+# --- Background Loader Thread ---
+@app.on_event("startup")
+def startup_event():
+    print("🚀 Starting background model loader thread...")
+    thread = threading.Thread(target=load_resources, daemon=True)
+    thread.start()
+@app.get("/")
+def root():
+    return {
+        "status": "✅ Running MuRIL QA API",
+        "model_loaded": is_model_ready
+    }
+class QueryRequest(BaseModel):
+    question: str
+    lang: str = None
+class QAResponse(BaseModel):
+    answer: str
+@app.post("/get-answer", response_model=QAResponse)
+def get_answer_endpoint(request: QueryRequest):
+    if not is_model_ready:
+        return {"answer": "⏳ Model still loading, please try again shortly."}
+    question_text = request.question.strip()
+    lang_filter = request.lang or detect(question_text)
+    filtered_df = df
+    filtered_embeddings = answer_embeddings
+    if 'lang' in df.columns and lang_filter:
+        mask = df['lang'] == lang_filter
+        filtered_df = df[mask].reset_index(drop=True)
+        filtered_embeddings = answer_embeddings[mask.values]
+    if len(filtered_df) == 0:
+        return {"answer": f"⚠️ No data found for language '{lang_filter}'."}
+    question_emb = model.encode(question_text, convert_to_tensor=True)
+    cosine_scores = util.pytorch_cos_sim(question_emb, filtered_embeddings)
+    best_idx = torch.argmax(cosine_scores).item()
+    answer = filtered_df.iloc[best_idx]['answer']
+    return {"answer": answer}
+# --- Keep-alive thread for Spaces ---
+def keep_alive():
+    while True:
+        # This ensures the app doesn’t shut down for inactivity
+        time.sleep(60)
+        if not is_model_ready:
+            print("🕒 Model still loading...")
 if __name__ == "__main__":
     import uvicorn
+    threading.Thread(target=keep_alive, daemon=True).start()
+    # Run with fewer workers for Spaces (prevents timeout)
+    uvicorn.run("main:app", host="0.0.0.0", port=8080, workers=1)