| """import gradio as gr |
| import nltk |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
| |
| nltk.download('punkt') |
| |
| def fragment_text(text, tokenizer): |
| sentences = nltk.tokenize.sent_tokenize(text) |
| max_len = tokenizer.max_len_single_sentence |
| |
| chunks = [] |
| chunk = "" |
| count = -1 |
| |
| for sentence in sentences: |
| count += 1 |
| combined_length = len(tokenizer.tokenize(sentence)) + len(chunk) |
| |
| if combined_length <= max_len: |
| chunk += sentence + " " |
| else: |
| chunks.append(chunk.strip()) |
| chunk = sentence + " " |
| |
| if chunk != "": |
| chunks.append(chunk.strip()) |
| |
| return chunks |
| |
| |
| def summarize_text(text, tokenizer, model): |
| chunks = fragment_text(text, tokenizer) |
| |
| summaries = [] |
| for chunk in chunks: |
| input = tokenizer(chunk, return_tensors='pt') |
| output = model.generate(**input) |
| summary = tokenizer.decode(*output, skip_special_tokens=True) |
| summaries.append(summary) |
| |
| final_summary = " ".join(summaries) |
| return final_summary |
| |
| checkpoint = "tclopess/bart_samsum" |
| tokenizer = AutoTokenizer.from_pretrained(checkpoint) |
| model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint) |
| |
| def summarize_and_display(text): |
| summary = summarize_text(text, tokenizer, model) |
| return summary |
| |
| iface = gr.Interface( |
| fn=summarize_and_display, |
| inputs=gr.Textbox(label="Enter text to summarize:"), |
| outputs=gr.Textbox(label="Summary:"), |
| live=False, # Set live to False to add a button |
| button="Summarize", # Add a button with the label "Summarize" |
| title="Text Summarizer with Button", |
| ) |
| |
| iface.launch(share=True) |
| """ |
|
|
| import gradio as gr |
| import nltk |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
|
| nltk.download('punkt') |
|
|
|
|
| def fragment_text(text, tokenizer): |
| sentences = nltk.tokenize.sent_tokenize(text) |
| max_len = tokenizer.max_len_single_sentence |
|
|
| chunks = [] |
| chunk = "" |
| count = -1 |
|
|
| for sentence in sentences: |
| count += 1 |
| combined_length = len(tokenizer.tokenize(sentence)) + len(chunk) |
|
|
| if combined_length <= max_len: |
| chunk += sentence + " " |
| else: |
| chunks.append(chunk.strip()) |
| chunk = sentence + " " |
|
|
| if chunk != "": |
| chunks.append(chunk.strip()) |
|
|
| return chunks |
|
|
|
|
| def summarize_text(text, tokenizer, model): |
| chunks = fragment_text(text, tokenizer) |
|
|
| summaries = [] |
| for chunk in chunks: |
| input = tokenizer(chunk, return_tensors='pt') |
| output = model.generate(**input) |
| summary = tokenizer.decode(*output, skip_special_tokens=True) |
| summaries.append(summary) |
|
|
| final_summary = " ".join(summaries) |
| return final_summary |
|
|
|
|
| checkpoint = "tclopess/bart_samsum" |
| tokenizer = AutoTokenizer.from_pretrained(checkpoint) |
| model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint) |
|
|
|
|
| def summarize_and_display(text, button_click_event, interface_state): |
| summary = summarize_text(text, tokenizer, model) |
| return summary |
|
|
|
|
| iface = gr.Interface( |
| fn=summarize_and_display, |
| inputs=[ |
| gr.Textbox(label="Enter text to summarize:"), |
| gr.Label(label="Summarize"), |
| gr.Button("Summarize"), |
| ], |
| outputs=gr.Textbox(label="Summary:"), |
| live=True, |
| title="Text Summarizer with Button", |
| ) |
|
|
| iface.launch(share=True) |
|
|