Spaces:

ms1449
/

Week3Day2Task

Sleeping

File size: 1,997 Bytes

import streamlit as st
from transformers import BartTokenizerFast, BartForConditionalGeneration

@st.cache_resource
def load_model():
    model_path = "bart_small_samsum"  #  model path 
    tokenizer = BartTokenizerFast.from_pretrained(model_path)
    model = BartForConditionalGeneration.from_pretrained(model_path)
    return tokenizer, model

# Set maximum lengths for input and target sequences
max_input_length = 128
max_target_length = 64

def summarize(input_text, tokenizer, model):
    # Tokenize input text
    inputs = tokenizer(input_text, return_tensors="pt", max_length=max_input_length, truncation=True)
    
    # Generate summary
    summary_ids = model.generate(
        inputs["input_ids"], 
        max_length=max_target_length, 
        min_length=30, 
        length_penalty=2.0, 
        num_beams=4, 
        early_stopping=True
    )
    
    # Decode the generated summary
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    
    return summary

# Streamlit app
st.title("Summarization Tool Using Bart-small Finetuned on Small sized Samsum Dataset")

# Load model
tokenizer, model = load_model()

# Text input
input_text = st.text_area("Enter your dialogue here:", height=200)

if st.button("Summarize"):
    if input_text:
        with st.spinner("Generating summary..."):
            summary = summarize(input_text, tokenizer, model)
        st.subheader("Summary:")
        st.write(summary)
    else:
        st.warning("Please enter some text to summarize.")

# Add some information about the model
st.sidebar.header("About")
st.sidebar.info(
    "This app uses a fine-tuned BART-Small model to summarize dialogues. "
    "Enter your dialogue in the text area and click 'Summarize' to generate a summary."
)

# You can add more information or customization in the sidebar
st.sidebar.header("Model Details")
st.sidebar.text("Model: BART-small")
st.sidebar.text("Max Input Length: 128 tokens")
st.sidebar.text("Max Summary Length: 64 tokens")