|
|
|
|
|
import gradio as gr |
|
|
from transformers import pipeline, T5Tokenizer, T5ForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM |
|
|
import torch |
|
|
import evaluate |
|
|
import pandas as pd |
|
|
import io |
|
|
|
|
|
rouge_metric = evaluate.load("rouge") |
|
|
|
|
|
|
|
|
|
|
|
def load_t5_indonesian_model(): |
|
|
try: |
|
|
t5_tokenizer = T5Tokenizer.from_pretrained("cahya/t5-base-indonesian-summarization-cased") |
|
|
t5_model = T5ForConditionalGeneration.from_pretrained("cahya/t5-base-indonesian-summarization-cased") |
|
|
|
|
|
|
|
|
t5_device = 0 if torch.cuda.is_available() else -1 |
|
|
if t5_device != -1: |
|
|
t5_model.to(f"cuda:{t5_device}") |
|
|
|
|
|
print("Model T5 Bahasa Indonesia (cahya/t5) berhasil dimuat.") |
|
|
return t5_tokenizer, t5_model, t5_device |
|
|
except Exception as e: |
|
|
print(f"Error saat memuat model T5 Bahasa Indonesia: {str(e)}") |
|
|
return None, None, -1 |
|
|
|
|
|
|
|
|
def load_indobart_model(): |
|
|
try: |
|
|
|
|
|
indobart_pipeline = pipeline("summarization", model="gaduhhartawan/indobart-base-v2") |
|
|
print("Model IndoBART v2 (gaduhhartawan/indobart) berhasil dimuat.") |
|
|
return indobart_pipeline |
|
|
except Exception as e: |
|
|
print(f"Error saat memuat model IndoBART v2: {str(e)}") |
|
|
return None |
|
|
|
|
|
|
|
|
|
|
|
t5_tokenizer, t5_model, t5_device = load_t5_indonesian_model() |
|
|
indobart_summarizer_pipeline = load_indobart_model() |
|
|
|
|
|
|
|
|
def summarize_and_evaluate(text_input, model_choice, min_length_val=30, max_length_val=150, reference_summary=""): |
|
|
summarized_text = "" |
|
|
status_message = "" |
|
|
current_model_name = "" |
|
|
|
|
|
if not text_input.strip(): |
|
|
return "⚠️ Mohon masukkan teks yang ingin diringkas!", "", "", "" |
|
|
|
|
|
if min_length_val >= max_length_val: |
|
|
return "⚠️ Panjang minimum harus lebih kecil dari panjang maksimum!", "", "", "" |
|
|
if min_length_val <= 0 or max_length_val <= 0: |
|
|
return "⚠️ Panjang tidak boleh nol atau negatif!", "", "", "" |
|
|
|
|
|
try: |
|
|
if model_choice == "cahya/t5-base-indonesian-summarization-cased": |
|
|
current_model_name = "T5 Bahasa Indonesia (cahya/t5)" |
|
|
if t5_tokenizer is None or t5_model is None: |
|
|
status_message = f"❌ Error: {current_model_name} gagal dimuat." |
|
|
else: |
|
|
|
|
|
input_ids = t5_tokenizer.encode("summarize: " + text_input, |
|
|
return_tensors="pt", |
|
|
max_length=512, |
|
|
truncation=True) |
|
|
|
|
|
|
|
|
if t5_device != -1: |
|
|
input_ids = input_ids.to(f"cuda:{t5_device}") |
|
|
|
|
|
|
|
|
summary_ids = t5_model.generate( |
|
|
input_ids, |
|
|
min_length=int(min_length_val), |
|
|
max_length=int(max_length_val), |
|
|
num_beams=4, |
|
|
early_stopping=True |
|
|
) |
|
|
summarized_text = t5_tokenizer.decode(summary_ids[0], skip_special_tokens=True) |
|
|
status_message = f"✅ Ringkasan dengan {current_model_name} berhasil!" |
|
|
|
|
|
elif model_choice == "gaduhhartawan/indobart-base-v2": |
|
|
current_model_name = "IndoBART v2 (gaduhhartawan/indobart)" |
|
|
if indobart_summarizer_pipeline is None: |
|
|
status_message = f"❌ Error: {current_model_name} gagal dimuat." |
|
|
else: |
|
|
|
|
|
summary = indobart_summarizer_pipeline( |
|
|
text_input, |
|
|
min_length=int(min_length_val), |
|
|
max_length=int(max_length_val), |
|
|
truncation=True |
|
|
) |
|
|
summarized_text = summary[0]['summary_text'] |
|
|
status_message = f"✅ Ringkasan dengan {current_model_name} berhasil!" |
|
|
|
|
|
else: |
|
|
status_message = "⚠️ Pilihan model tidak valid." |
|
|
|
|
|
|
|
|
eval_table_html = "" |
|
|
if summarized_text and reference_summary.strip(): |
|
|
|
|
|
predictions = [summarized_text] |
|
|
references = [reference_summary] |
|
|
|
|
|
|
|
|
rouge_scores = rouge_metric.compute(predictions=predictions, references=references) |
|
|
|
|
|
|
|
|
|
|
|
evaluation_data = { |
|
|
"Metrik": ["ROUGE-1 F1", "ROUGE-2 F1", "ROUGE-L F1"], |
|
|
"Skor": [ |
|
|
f"{rouge_scores['rouge1']:.4f}", |
|
|
f"{rouge_scores['rouge2']:.4f}", |
|
|
f"{rouge_scores['rougeL']:.4f}" |
|
|
] |
|
|
} |
|
|
evaluation_df = pd.DataFrame(evaluation_data) |
|
|
eval_table_html = evaluation_df.to_html(index=False) |
|
|
|
|
|
status_message += " Evaluasi ROUGE selesai." |
|
|
elif summarized_text: |
|
|
status_message += " (Tidak ada ringkasan referensi untuk evaluasi ROUGE)." |
|
|
|
|
|
|
|
|
result_html = f""" |
|
|
<h3>Teks Ringkasan Anda (dengan {current_model_name}):</h3> |
|
|
<p>{summarized_text}</p> |
|
|
""" |
|
|
|
|
|
|
|
|
return status_message, result_html, eval_table_html, summarized_text |
|
|
|
|
|
except Exception as e: |
|
|
return f"❌ Terjadi kesalahan: {str(e)}", "", "", "" |
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks(title="Perbandingan Model Ringkasan Bahasa Indonesia") as demo: |
|
|
gr.Markdown("# 📝 Perbandingan Model Ringkasan Bahasa Indonesia") |
|
|
gr.Markdown("Masukkan teks asli Bahasa Indonesia dan pilih model yang ingin Anda gunakan. Opsional, berikan ringkasan referensi untuk evaluasi ROUGE.") |
|
|
|
|
|
with gr.Row(): |
|
|
model_choice = gr.Radio( |
|
|
choices=["cahya/t5-base-indonesian-summarization-cased", "gaduhhartawan/indobart-base-v2"], |
|
|
label="Pilih Model Ringkasan", |
|
|
value="cahya/t5-base-indonesian-summarization-cased" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
text_input = gr.Textbox( |
|
|
label="Teks Asli (Bahasa Indonesia)", |
|
|
placeholder="Masukkan teks panjang berbahasa Indonesia yang ingin Anda ringkas di sini...", |
|
|
lines=10 |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
min_length_slider = gr.Slider( |
|
|
minimum=10, |
|
|
maximum=100, |
|
|
value=30, |
|
|
step=1, |
|
|
label="Panjang Ringkasan Minimum" |
|
|
) |
|
|
max_length_slider = gr.Slider( |
|
|
minimum=50, |
|
|
maximum=200, |
|
|
value=80, |
|
|
step=1, |
|
|
label="Panjang Ringkasan Maksimum" |
|
|
) |
|
|
|
|
|
reference_summary_input = gr.Textbox( |
|
|
label="Ringkasan Referensi (Opsional untuk Evaluasi ROUGE)", |
|
|
placeholder="Masukkan ringkasan yang dibuat manusia untuk teks ini (untuk perbandingan)", |
|
|
lines=3 |
|
|
) |
|
|
|
|
|
summarize_btn = gr.Button("✨ Ringkas & Evaluasi Sekarang") |
|
|
|
|
|
status_output = gr.Markdown(label="Status Proses") |
|
|
summary_output = gr.HTML(label="Hasil Ringkasan") |
|
|
evaluation_output = gr.HTML(label="Hasil Evaluasi ROUGE") |
|
|
|
|
|
download_btn = gr.File(label="Unduh Ringkasan", visible=False) |
|
|
|
|
|
|
|
|
def update_download_button(summarized_text_content): |
|
|
if summarized_text_content: |
|
|
|
|
|
|
|
|
file_data = summarized_text_content.encode('utf-8') |
|
|
return gr.File(value=file_data, |
|
|
file_name="ringkasan_hasil.txt", |
|
|
visible=True) |
|
|
return gr.File(visible=False) |
|
|
|
|
|
|
|
|
summarize_btn.click( |
|
|
fn=summarize_and_evaluate, |
|
|
inputs=[text_input, model_choice, min_length_slider, max_length_slider, reference_summary_input], |
|
|
|
|
|
outputs=[status_output, summary_output, evaluation_output, gr.State()] |
|
|
|
|
|
).success( |
|
|
|
|
|
fn=lambda s_out, h_out, e_out, text_raw: update_download_button(text_raw), |
|
|
inputs=[status_output, summary_output, evaluation_output, gr.State()], |
|
|
outputs=download_btn |
|
|
) |
|
|
|
|
|
|
|
|
gr.Markdown(""" |
|
|
--- |
|
|
<div style='text-align: center; margin-top: 20px;'> |
|
|
<p>Dibuat oleh Muhammad Khoirul Mustaqim.</p> |
|
|
<p>Didukung oleh Hugging Face Transformers dan Gradio.</p> |
|
|
<p>Model: <a href="https://huggingface.co/cahya/t5-base-indonesian-summarization-cased" target="_blank">cahya/t5-base-indonesian-summarization-cased</a> dan <a href="https://huggingface.co/gaduhhartawan/indobart-base-v2" target="_blank">gaduhhartawan/indobart-base-v2</a></p> |
|
|
</div> |
|
|
""") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |