import gradio as gr from transformers import AutoTokenizer, AutoModelForSeq2SeqLM import torch MODEL_NAME = "angkor96/khmer-news-summarization" device = torch.device("cuda" if torch.cuda.is_available() else "cpu") tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME).to(device) model.eval() def summarize(text): try: inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=1024) inputs = {k: v.to(device) for k, v in inputs.items()} with torch.no_grad(): summary_ids = model.generate( **inputs, max_length=150, num_beams=4, length_penalty=2.0, early_stopping=True ) return tokenizer.decode(summary_ids[0], skip_special_tokens=True) except Exception as e: return f"មិនអាចសង្ខេបបានទេ។ ({e})" iface = gr.Interface( fn=summarize, inputs=gr.Textbox(label="បញ្ចូលអត្ថបទ"), outputs=gr.Textbox(label="អត្ថបទសង្ខេប"), title="Khmer News Summarization API", description="API service powered by angkor96/khmer-news-summarization", api_name="predict", # <-- this exposes /run/predict ) iface.launch()