Spaces:
Sleeping
Sleeping
| # ============================================================ | |
| # Aplikasi Translasi Jawa -> Indonesia & Inggris + Evaluasi | |
| # MODEL: facebook/m2m100_418M | |
| # METRIK: BLEU + ROUGE-L + METEOR | |
| # ============================================================ | |
| import torch | |
| import gradio as gr | |
| import sacrebleu | |
| import nltk | |
| import torch | |
| from rouge import Rouge | |
| from nltk.translate.meteor_score import meteor_score | |
| from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer | |
| print("=== APP STARTED ===") | |
| # ============================================================ | |
| # Download NLTK resources (for METEOR) | |
| # ============================================================ | |
| nltk.download("wordnet") | |
| nltk.download("omw-1.4") | |
| # ============================================================ | |
| # Load Model & Tokenizer | |
| # ============================================================ | |
| MODEL_NAME = "facebook/m2m100_418M" | |
| tokenizer = M2M100Tokenizer.from_pretrained(MODEL_NAME) | |
| model = M2M100ForConditionalGeneration.from_pretrained(MODEL_NAME) | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model.to(device) | |
| print("Model loaded on:", device) | |
| # ============================================================ | |
| # Translasi Function | |
| # ============================================================ | |
| def translate_jawa(text): | |
| if not text.strip(): | |
| return "", "" | |
| tokenizer.src_lang = "jv" | |
| inputs = tokenizer( | |
| text, | |
| return_tensors="pt", | |
| padding=True, | |
| truncation=True, | |
| max_length=512 | |
| ).to(device) | |
| # Indonesia | |
| gen_id = model.generate( | |
| **inputs, | |
| forced_bos_token_id=tokenizer.get_lang_id("id"), | |
| max_length=512 | |
| ) | |
| # English | |
| gen_en = model.generate( | |
| **inputs, | |
| forced_bos_token_id=tokenizer.get_lang_id("en"), | |
| max_length=512 | |
| ) | |
| id_text = tokenizer.batch_decode( | |
| gen_id, | |
| skip_special_tokens=True | |
| )[0] | |
| en_text = tokenizer.batch_decode( | |
| gen_en, | |
| skip_special_tokens=True | |
| )[0] | |
| return id_text, en_text | |
| # ============================================================ | |
| # Evaluasi Function (BLEU + ROUGE-L + METEOR) | |
| # ============================================================ | |
| def evaluate_translation(jawa, ref_id, ref_en): | |
| pred_id, pred_en = translate_jawa(jawa) | |
| # BLEU (Indonesia) | |
| bleu = sacrebleu.corpus_bleu( | |
| [pred_id], | |
| [[ref_id]] | |
| ).score | |
| # ROUGE-L (Indonesia) | |
| rouge = Rouge() | |
| rouge_l = rouge.get_scores( | |
| pred_id, | |
| ref_id | |
| )[0]["rouge-l"]["f"] | |
| # METEOR (Indonesia) | |
| meteor = meteor_score( | |
| [ref_id.split()], | |
| pred_id.split() | |
| ) | |
| return ( | |
| pred_id, | |
| pred_en, | |
| f"{bleu:.2f}", | |
| f"{rouge_l:.4f}", | |
| f"{meteor:.4f}" | |
| ) | |
| # ============================================================ | |
| # Gradio UI | |
| # ============================================================ | |
| with gr.Blocks(title="Jawa → Indonesia & English Translator") as demo: | |
| gr.Markdown("## 🈶 Translasi Bahasa Jawa + Evaluasi") | |
| gr.Markdown( | |
| "Model: **facebook/m2m100_418M** \n" | |
| "Output: Indonesia & English \n" | |
| "Evaluasi: **BLEU · ROUGE-L · METEOR**" | |
| ) | |
| with gr.Tab("🔤 Translasi"): | |
| inp = gr.Textbox(lines=5, label="Teks Bahasa Jawa") | |
| out_id = gr.Textbox(lines=3, label="Terjemahan Indonesia") | |
| out_en = gr.Textbox(lines=3, label="Terjemahan English") | |
| btn = gr.Button("Terjemahkan") | |
| btn.click( | |
| fn=translate_jawa, | |
| inputs=inp, | |
| outputs=[out_id, out_en] | |
| ) | |
| with gr.Tab("📊 Evaluasi"): | |
| eval_jawa = gr.Textbox(lines=4, label="Teks Jawa") | |
| ref_id = gr.Textbox(lines=2, label="Referensi Indonesia") | |
| ref_en = gr.Textbox(lines=2, label="Referensi English (opsional)") | |
| pred_id = gr.Textbox(label="Prediksi Indonesia") | |
| pred_en = gr.Textbox(label="Prediksi English") | |
| bleu = gr.Textbox(label="BLEU Score") | |
| rouge_l = gr.Textbox(label="ROUGE-L F1") | |
| meteor = gr.Textbox(label="METEOR Score") | |
| eval_btn = gr.Button("Evaluasi") | |
| eval_btn.click( | |
| fn=evaluate_translation, | |
| inputs=[eval_jawa, ref_id, ref_en], | |
| outputs=[pred_id, pred_en, bleu, rouge_l, meteor] | |
| ) | |
| # ============================================================ | |
| # Launch App | |
| # ============================================================ | |
| if __name__ == "__main__": | |
| print("=== LAUNCHING GRADIO ===") | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False | |
| ) | |