fajrikoto/id_liputan6
Updated • 334 • 12
How to use gaduhhartawan/indobart-base with Transformers:
# Use a pipeline as a high-level helper
# Warning: Pipeline type "summarization" is no longer supported in transformers v5.
# You must load the model directly (see below) or downgrade to v4.x with:
# 'pip install "transformers<5.0.0'
from transformers import pipeline
pipe = pipeline("summarization", model="gaduhhartawan/indobart-base") # Load model directly
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
tokenizer = AutoTokenizer.from_pretrained("gaduhhartawan/indobart-base")
model = AutoModelForSeq2SeqLM.from_pretrained("gaduhhartawan/indobart-base")This model is a fine-tuned version of bart-large-cnn on Liputan6 dataset. See demo model here notebook.
| Training Loss | Epoch | R1 Precision | R1 Recall | R1 Fmeasure | R2 Precision | R2 Recall | R2 Fmeasure | Rl Precision | Rl Recall | Rl Fmeasure |
|---|---|---|---|---|---|---|---|---|---|---|
| 0.3064 | 1.0 | 0.3487 | 0.6043 | 0.4375 | 0.1318 | 0.2613 | 0.1723 | 0.3349 | 0.5833 | 0.4208 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
# Load model and tokenizer
model = AutoModelForSeq2SeqLM.from_pretrained("gaduhhartawan/indobart-base")
tokenizer = AutoTokenizer.from_pretrained("gaduhhartawan/indobart-base")
# Input article for summarization
ARTICLE_TO_SUMMARIZE = "lorem ipsum..."
# Generate summary
input_ids = tokenizer.encode(ARTICLE_TO_SUMMARIZE, return_tensors='pt')
summary_ids = model.generate(input_ids,
min_length=30,
max_length=150,
num_beams=2,
repetition_penalty=2.0,
length_penalty=0.8,
early_stopping=True,
no_repeat_ngram_size=2,
use_cache=True,
do_sample=True,
temperature=0.7,
top_k=50,
top_p=0.95)
# Decode the summary
summary_text = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
print("Summary: ", summary_text)