import streamlit as st
from transformers import pipeline
from PyPDF2 import PdfReader

# Initialize the summarization pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

def summarize_text(text, max_length=130, min_length=30):
    """Summarize the input text."""
    if not text.strip():
        return "Input text is empty. Please provide valid text."
    
    # Limit text length to avoid issues with very long texts
    max_text_length = 1000  # You can adjust this value as needed
    if len(text) > max_text_length:
        text = text[:max_text_length]
    
    try:
        summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
        return summary[0]['summary_text'] if summary else "No summary available."
    except Exception as e:
        return f"An error occurred: {e}"

def extract_text_from_pdf(pdf_file):
    """Extract text from an uploaded PDF file."""
    reader = PdfReader(pdf_file)
    text = ""
    for page in reader.pages:
        text += page.extract_text() or ""
    return text

def main():
    st.title("Document Summarization App")

    # File uploader for users to upload documents
    uploaded_file = st.file_uploader("Upload a document (PDF or TXT)", type=["pdf", "txt"])

    if uploaded_file is not None:
        # Check if the uploaded file is a PDF
        if uploaded_file.type == "application/pdf":
            text = extract_text_from_pdf(uploaded_file)
        elif uploaded_file.type == "text/plain":
            text = uploaded_file.read().decode("utf-8")
        else:
            st.error("Unsupported file type. Please upload a PDF or TXT file.")
            return
        
        st.subheader("Original Text")
        st.write(text)

        if st.button("Generate Summary"):
            with st.spinner("Summarizing..."):
                summary = summarize_text(text)
                st.subheader("Summary")
                st.write(summary)

if __name__ == "__main__":
    main()