File size: 2,599 Bytes
bfde142 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
from transformers import T5Tokenizer, T5ForConditionalGeneration
def summarize_text(text: str,
model_name: str = "t5-base",
max_length: int = 150,
min_length: int = 40,
num_beams: int = 4) -> str:
"""
Summarizes the given text using a T5 model.
Parameters:
- text: The long input text to be summarized.
- model_name: The pre-trained T5 model to use (e.g., "t5-base", "t5-small", etc.)
- max_length: The maximum length (in tokens) of the generated summary.
- min_length: The minimum length (in tokens) of the generated summary.
- num_beams: The number of beams for beam search (affects summary quality).
Returns:
- The summarized text (str)
"""
# Load tokenizer and model (using new tokenizer behavior)
tokenizer = T5Tokenizer.from_pretrained(model_name, legacy=False)
model = T5ForConditionalGeneration.from_pretrained(model_name)
# Add the summarization prompt (T5 uses prompt-based approach)
input_text = "summarize: " + text.strip()
# Tokenize the input text
input_ids = tokenizer.encode(input_text,
return_tensors="pt",
max_length=512,
truncation=True)
# Generate summary using the model's generate method
summary_ids = model.generate(input_ids,
max_length=max_length,
min_length=min_length,
num_beams=num_beams,
early_stopping=True)
# Decode the generated tokens back into text
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
return summary
if __name__ == "__main__":
# Example English text to summarize
long_text = (
"In recent years, the global economy has faced various challenges. Trade tensions, "
"inflationary pressures, and rapid technological advancements have contributed to "
"significant changes in market dynamics. Experts believe that these factors will continue "
"to influence economic trends, while governments around the world are exploring policies "
"to stabilize the economy. Meanwhile, the rise of the digital economy and the transition "
"to green energy are emerging as key drivers of future economic growth."
)
# Execute the summarization
summary_result = summarize_text(long_text)
print("Summary:")
print(summary_result)
|