Spaces:

sedtha
/

Text

Sleeping

App Files Files Community

sedtha commited on Oct 30, 2025

Commit

4b22381

verified ·

1 Parent(s): 5b2614b

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -38

app.py CHANGED Viewed

@@ -1,47 +1,28 @@
 import gradio as gr
-from transformers import AutoTokenizer, MBartForConditionalGeneration
-from peft import PeftModel, PeftConfig
 import torch
 # ==========================
 # 1. Load model from Hugging Face
 # ==========================
-MODEL_NAME = "sedtha/mBart-50-large_LoRa_kh_sumerize"
 print("Loading model and tokenizer...")
-try:
-    # First, load the base model configuration to get the base model name
-    config = PeftConfig.from_pretrained(MODEL_NAME)
-    base_model_name = config.base_model_name_or_path
-    # Load tokenizer from the base model (mbart-large-50)
-    tokenizer = AutoTokenizer.from_pretrained(base_model_name)
-    # Load the base model
-    base_model = MBartForConditionalGeneration.from_pretrained(base_model_name)
-    # Load the LoRA adapter
-    model = PeftModel.from_pretrained(base_model, MODEL_NAME)
-    # Merge LoRA weights with base model for inference (optional but can improve performance)
-    model = model.merge_and_unload()
-except Exception as e:
-    print(f"Error loading model: {e}")
-    # Fallback: try direct loading
-    try:
-        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-        model = MBartForConditionalGeneration.from_pretrained(MODEL_NAME)
-    except Exception as e2:
-        print(f"Fallback loading also failed: {e2}")
-        raise
 # Move to GPU if available
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model = model.to(device)
-model.eval()  # Set to evaluation mode
 print(f"✅ Model loaded successfully on {device}!")
@@ -59,10 +40,6 @@ def summarize_khmer_text(text, max_length=150, min_length=40):
         return "⚠️ អត្ថបទខ្លីពេក / Text is too short to summarize"
     try:
-        # Set the source language for mBART (Khmer)
-        # For mBART-50, Khmer language code is "km_KR"
-        tokenizer.src_lang = "km_KR"
         # Tokenize input
         inputs = tokenizer(
             text,
@@ -81,8 +58,7 @@ def summarize_khmer_text(text, max_length=150, min_length=40):
                 length_penalty=2.0,
                 num_beams=4,
                 early_stopping=True,
-                no_repeat_ngram_size=3,
-                forced_bos_token_id=tokenizer.lang_code_to_id["km_KR"]  # Force Khmer output
             )
         # Decode output
@@ -157,4 +133,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
 # 4. Launch
 # ==========================
 if __name__ == "__main__":
-    demo.launch(share=True)

 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+from transformers import (
+    MBartForConditionalGeneration, MBart50Tokenizer,
+    MT5ForConditionalGeneration, T5Tokenizer
+)
 import torch
+from peft import PeftModel
 # ==========================
 # 1. Load model from Hugging Face
 # ==========================
+MODEL_NAME = "sedtha/mBart-50-large_LoRa_kh_sumerize"  # e.g., "Sedtha-019/khmer-summarization"
 print("Loading model and tokenizer...")
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+# model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
+base = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50")
+model = PeftModel.from_pretrained(base, MODEL_NAME)
 # Move to GPU if available
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model = model.to(device)
 print(f"✅ Model loaded successfully on {device}!")
         return "⚠️ អត្ថបទខ្លីពេក / Text is too short to summarize"
     try:
         # Tokenize input
         inputs = tokenizer(
             text,
                 length_penalty=2.0,
                 num_beams=4,
                 early_stopping=True,
+                no_repeat_ngram_size=3
             )
         # Decode output
 # 4. Launch
 # ==========================
 if __name__ == "__main__":
+    demo.launch(share=True)