Spaces:

sedtha
/

khmer-summarizer-api-mBART-LoRA

Sleeping

App Files Files Community

sedtha commited on Dec 26, 2025

Commit

c2110ef

verified ·

1 Parent(s): 6092add

Update app.py

Browse files

Files changed (1) hide show

app.py +143 -40

app.py CHANGED Viewed

@@ -1,70 +1,173 @@
-from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
-from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
-import torch
-# --------------------
-# App initialization
-# --------------------
-app = FastAPI(title="Khmer Summarization API")
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"],  # allow frontend from anywhere
-    allow_methods=["*"],
-    allow_headers=["*"],
 )
-# --------------------
-# Model loading
-# --------------------
-MODEL_NAME = "sedtha/mBart-50-large_LoRa_kh_sumerize"
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-print("Loading tokenizer...")
-tokenizer = MBart50TokenizerFast.from_pretrained(MODEL_NAME)
-print("Loading model...")
-model = MBartForConditionalGeneration.from_pretrained(MODEL_NAME)
-model.to(DEVICE)
-model.eval()
-print("Model loaded successfully!")
-# --------------------
-# Request schema
-# --------------------
 class SummarizeRequest(BaseModel):
     text: str
-    max_length: int = 150
-    min_length: int = 40
-# --------------------
-# API endpoint
-# --------------------
 @app.post("/summarize")
 def summarize(req: SummarizeRequest):
     inputs = tokenizer(
         req.text,
         return_tensors="pt",
         truncation=True,
         max_length=1024
-    ).to(DEVICE)
     with torch.no_grad():
-        output_ids = model.generate(
             **inputs,
-            max_length=req.max_length,
-            min_length=req.min_length,
-            num_beams=4
         )
-    summary = tokenizer.decode(
-        output_ids[0],
-        skip_special_tokens=True
-    )
     return {
-        "summary": summary
     }

+import torch
+import warnings
+from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
+from peft import PeftModel
+from transformers import (
+    MBartForConditionalGeneration, MBart50Tokenizer,
+    MT5ForConditionalGeneration, T5Tokenizer
+)
+warnings.filterwarnings("ignore")
+app = FastAPI(
+    title="Khmer Summarization API",
+    description="mBART-LoRA + mT5 in ONE API",
+    version="1.0.0"
+)
+# ================= CORS Configuration =================
+# Allow all origins for Hugging Face Spaces
+origins = [
+    "https://*.hf.space",  # Allow Hugging Face Spaces
+    "http://localhost",
+    "http://localhost:3000",
+    "http://127.0.0.1",
+    "http://127.0.0.1:3000",
+    "*"  # You can be more restrictive in production
+]
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=origins,
+    allow_credentials=True,
+    allow_methods=["*"],  # Allows all methods (GET, POST, etc.)
+    allow_headers=["*"],  # Allows all headers
 )
+# ================= Device =================
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# ================= Models Config =================
+MODELS = {
+    "model1": {
+        "name": "Khmer mBART + LoRA",
+        "type": "mbart",
+        "repo": "sedtha/mBart-50-large_LoRa_kh_sumerize",
+        "model": None,
+        "tokenizer": None
+    },
+    "model2": {
+        "name": "Khmer mT5",
+        "type": "mt5",
+        "repo": "angkor96/khmer-mT5-news-summarization",
+        "model": None,
+        "tokenizer": None
+    }
+}
+# ================= Load Model =================
+def load_model(key: str):
+    info = MODELS[key]
+    if info["model"] is None:
+        print(f"🔹 Loading {info['name']}...")
+        if info["type"] == "mbart":
+            tokenizer = MBart50Tokenizer.from_pretrained(
+                info["repo"],
+                src_lang="km_KH",
+                tgt_lang="km_KH",
+                cache_dir="./cache"
+            )
+            base_model = MBartForConditionalGeneration.from_pretrained(
+                "facebook/mbart-large-50",
+                cache_dir="./cache"
+            ).to(device)
+            model = PeftModel.from_pretrained(
+                base_model,
+                info["repo"],
+                cache_dir="./cache"
+            ).to(device)
+        elif info["type"] == "mt5":
+            tokenizer = T5Tokenizer.from_pretrained(info["repo"], cache_dir="./cache")
+            model = MT5ForConditionalGeneration.from_pretrained(
+                info["repo"], cache_dir="./cache"
+            ).to(device)
+        model.eval()
+        info["model"] = model
+        info["tokenizer"] = tokenizer
+        print(f"✅ Loaded {info['name']}")
+    return info["model"], info["tokenizer"]
+# ================= Request Schema =================
 class SummarizeRequest(BaseModel):
     text: str
+    model: str = "model2"
+# ================= API Endpoint =================
 @app.post("/summarize")
 def summarize(req: SummarizeRequest):
+    if not req.text.strip():
+        raise HTTPException(status_code=400, detail="Text is empty")
+    if req.model not in MODELS:
+        raise HTTPException(status_code=400, detail="Invalid model")
+    model, tokenizer = load_model(req.model)
     inputs = tokenizer(
         req.text,
         return_tensors="pt",
         truncation=True,
         max_length=1024
+    ).to(device)
     with torch.no_grad():
+        summary_ids = model.generate(
             **inputs,
+            do_sample=True,
+            temperature=0.8,
+            top_p=0.9,
+            top_k=50,
+            max_new_tokens=125,
+            repetition_penalty=1.2,
+            no_repeat_ngram_size=3
         )
+    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
+    # Khmer sentence cleanup
+    if "។" in summary:
+        summary = summary[:summary.rfind("។") + 1]
     return {
+        "model": MODELS[req.model]["name"],
+        "summary": summary.strip()
     }
+# ================= Health Check =================
+@app.get("/")
+def root():
+    return {"status": "Khmer Summarization API is running 🚀"}
+# ================= Additional endpoint for testing =================
+@app.get("/health")
+def health_check():
+    return {
+        "status": "healthy",
+        "device": str(device),
+        "models_loaded": {
+            key: info["model"] is not None
+            for key, info in MODELS.items()
+        }
+    }
+# ================= Pre-load models on startup (optional) =================
+@app.on_event("startup")
+async def startup_event():
+    # Optionally pre-load both models on startup
+    # This will make first request faster but uses more memory
+    print("🚀 Starting up...")
+    print(f"Using device: {device}")
+    # You can choose to pre-load models or load them on first request
+    # For memory efficiency, we'll load on first request
+    print("Models will be loaded on first request to save memory")