import gradio as gr from transformers import AutoTokenizer, AutoModelForSeq2SeqLM import torch # ========================== # 1. Load model from Hugging Face # ========================== MODEL_NAME = "angkor96/khmer-news-summarization" # e.g., "Sedtha-019/khmer-summarization" print("Loading model and tokenizer...") tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME) # Move to GPU if available device = "cuda" if torch.cuda.is_available() else "cpu" model = model.to(device) print(f"✅ Model loaded successfully on {device}!") # ========================== # 2. Summarization function # ========================== def summarize_khmer_text(text, max_length=150): """ Summarize Khmer text """ if not text or text.strip() == "": return "⚠️ សូមបញ្ចូលអត្ថបទ / Please enter text" # if len(text.strip()) < 20: # return "⚠️ អត្ថបទខ្លីពេក / Text is too short to summarize" try: # Tokenize input inputs = tokenizer( text, max_length=1024, truncation=True, padding="max_length", return_tensors="pt" ).to(device) # Generate summary with torch.no_grad(): summary_ids = model.generate( inputs["input_ids"], max_length=max_length, # min_length=min_length, length_penalty=2.0, num_beams=5, early_stopping=True, # no_repeat_ngram_size=3 ) # Decode output summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) return summary except Exception as e: return f"❌ Error: {str(e)}" # ========================== # 3. Gradio UI # ========================== with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown( """ # 🇰🇭 Khmer Text Summarization ### បញ្ចូលអត្ថបទខ្មែរ ហើយទទួលបានការសង្ខេបដោយស្វ័យប្រវត្តិ Enter Khmer text and get an automatic summary """ ) with gr.Row(): with gr.Column(): input_text = gr.Textbox( lines=10, placeholder="បញ្ចូលអត្ថបទខ្មែរនៅទីនេះ...\nEnter Khmer text here...", label="📝 អត្ថបទដើម / Original Text" ) with gr.Row(): max_len = gr.Slider( minimum=50, maximum=300, value=150, step=10, label="Maximum Summary Length" ) min_len = gr.Slider( minimum=20, maximum=100, value=40, step=10, label="Minimum Summary Length" ) submit_btn = gr.Button("🔄 Summarize / សង្ខេប", variant="primary") with gr.Column(): output_text = gr.Textbox( lines=10, label="📋 សង្ខេប / Summary" ) # Examples gr.Examples( examples=[ ["ប្រទេសកម្ពុជាមានប្រវត្តិសាស្រ្តយូរលង់និងសម្បូរបែបដោយវប្បធម៌។ អាណាចក្រខ្មែរបានរីកចម្រើនក្នុងសតវត្សទី៩ដល់ទី១៥។ អង្គរវត្តជាស្នាដៃស្ថាបត្យកម្មដ៏អស្ចារ្យមួយរបស់ពិភពលោក។", 100, 30], ["ការអប់រំជាមូលដ្ឋានគ្រឹះសំខាន់សម្រាប់ការអភិវឌ្ឍន៍ជាតិ។ សិស្សានុសិស្សគប្បីរៀនសូត្រយ៉ាងស្អិតរុំ។ គ្រូបង្រៀនមានតួនាទីសំខាន់ក្នុងការបង្កើតអនាគតកុមារ។", 80, 25], ], inputs=[input_text, max_len, min_len], ) # Connect button submit_btn.click( fn=summarize_khmer_text, inputs=[input_text, max_len, min_len], outputs=output_text ) # ========================== # 4. Launch # ========================== if __name__ == "__main__": demo.launch(share=True)