|
|
import gradio as gr |
|
|
import os |
|
|
import torch |
|
|
from transformers import BartTokenizer, BartForConditionalGeneration, AutoConfig |
|
|
|
|
|
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
|
|
|
|
|
|
HF_TOKEN = os.environ.get("HF_TOKEN") |
|
|
|
|
|
|
|
|
model_name = 'rubendsm/inteligencia-ricardo-bart-large-2' |
|
|
|
|
|
|
|
|
config = AutoConfig.from_pretrained(model_name, hf_token=HF_TOKEN) |
|
|
tokenizer = BartTokenizer.from_pretrained(model_name) |
|
|
model = BartForConditionalGeneration.from_pretrained(model_name, config=config) |
|
|
model.to(device) |
|
|
|
|
|
|
|
|
def summarize_long_text(input_text, temperature=1.6, top_k=55, top_p=0.8, num_beams=8): |
|
|
|
|
|
max_input_length = 2048 |
|
|
max_output_length = 2048 |
|
|
parts = [input_text[i:i+max_input_length] for i in range(0, len(input_text), max_input_length)] |
|
|
|
|
|
part_summaries = [] |
|
|
for part in parts: |
|
|
input_ids = tokenizer.encode(part, return_tensors="pt", max_length=max_input_length, truncation=True) |
|
|
|
|
|
input_ids = input_ids.to(device) |
|
|
output = model.generate( |
|
|
input_ids, |
|
|
max_length=max_output_length, |
|
|
num_return_sequences=1, |
|
|
do_sample=True, |
|
|
temperature=temperature, |
|
|
top_k=top_k, |
|
|
top_p=top_p, |
|
|
num_beams=num_beams |
|
|
) |
|
|
summary = tokenizer.decode(output[0], skip_special_tokens=True) |
|
|
part_summaries.append(summary) |
|
|
|
|
|
|
|
|
full_summary = ' '.join(part_summaries) |
|
|
return full_summary |
|
|
|
|
|
|
|
|
|
|
|
textbox_input = gr.Textbox(lines=7, label="Input Text") |
|
|
temperature_slider = gr.Slider(minimum=0.1, maximum=2.0, label="Temperature - Controla a aleatoriedade da geração", value=1.6) |
|
|
top_k_slider = gr.Slider(minimum=1, maximum=100, label="Top K - Controla o número de tokens candidatos considerados", value=55) |
|
|
top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, label="Top P - Controla a soma acumulada das probabilidades de tokens", value=0.8) |
|
|
num_beams_slider = gr.Slider(minimum=1, maximum=10, label="Num Beams - Número de feixes (sequências candidatas) durante a geração", value=8) |
|
|
textbox_output = gr.Textbox(label="Summary") |
|
|
|
|
|
interface = gr.Interface( |
|
|
fn=summarize_long_text, |
|
|
inputs=[textbox_input, temperature_slider, top_k_slider, top_p_slider, num_beams_slider], |
|
|
outputs=textbox_output, |
|
|
title="Resumo de transcrição de reuniões", |
|
|
description="Assembleia Municipal de Paredes" |
|
|
) |
|
|
interface.launch(share=True) |
|
|
|