Spaces:

lovi07
/

Text_Summarization

Sleeping

File size: 4,846 Bytes

import os
import validators
import streamlit as st
from langchain.prompts import PromptTemplate
from langchain_groq import ChatGroq
from langchain.chains.summarize import load_summarize_chain
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import YoutubeLoader, UnstructuredURLLoader
import tempfile

# Streamlit App Configuration
st.set_page_config(page_title="LangChain: Summarize Text From YT, Website, or PDF", page_icon="🦜")
st.title("🦜 LangChain: Summarize Text From YT, Website, or PDF")
st.subheader("Summarize Content from a URL or Uploaded PDF")

# Sidebar: API Key Inputs
with st.sidebar:
    st.write("Get your Groq API key from https://groq.com/ and your LangSmith API key from https://langsmith.com/")
    groq_api_key = st.text_input("Groq API Key", value="", type="password")
    langsmith_api_key = st.text_input("LangSmith API Key", value="", type="password")

# Set LangSmith environment variables
if langsmith_api_key:
    os.environ["LANGCHAIN_TRACING_V2"] = "true"
    os.environ["LANGCHAIN_API_KEY"] = langsmith_api_key

# URL Input
generic_url = st.text_input("URL (YouTube or Website)", label_visibility="collapsed")

# PDF File Uploader
uploaded_file = st.file_uploader("Upload a PDF File", type=["pdf"])

# Prompt Templates
initial_prompt = PromptTemplate(
    template="Write a concise summary of the following content:\nContent: {text}",
    input_variables=["text"]
)

refinement_prompt = PromptTemplate(
    template="The following is a summary that needs refinement:\nCurrent Summary: {existing_answer}\n\n"
             "We have additional content that can be used to refine the summary:\nContent: {text}\n\n"
             "Please refine the current summary to include the new information while maintaining conciseness.",
    input_variables=["existing_answer", "text"]
)

# Initialize LLM
if groq_api_key:
    try:
        llm = ChatGroq(model="gemma2-9b-it", groq_api_key=groq_api_key)
    except Exception as e:
        st.error(f"Failed to initialize Groq client: {e}")
        llm = None
else:
    llm = None

# Button to Summarize Content
if st.button("Summarize the Content"):
    if not groq_api_key.strip():
        st.error("Please provide the Groq API Key to get started.")
    elif not langsmith_api_key.strip():
        st.error("Please provide the LangSmith API Key for tracking.")
    elif not (generic_url.strip() or uploaded_file):
        st.error("Please provide a valid URL or upload a PDF file.")
    elif generic_url and not validators.url(generic_url):
        st.error("Please enter a valid URL. It can be a YouTube video or website URL.")
    elif not llm:
        st.error("LLM not initialized. Please check your API key.")
    else:
        try:
            with st.spinner("Processing..."):
                docs = []

                # Load from URL
                if generic_url.strip():
                    if "youtube.com" in generic_url or "youtu.be" in generic_url:
                        loader = YoutubeLoader.from_youtube_url(generic_url, add_video_info=True)
                    else:
                        loader = UnstructuredURLLoader(
                            urls=[generic_url],
                            ssl_verify=False,
                            headers={
                                "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_5_1) "
                                              "AppleWebKit/537.36 (KHTML, like Gecko) "
                                              "Chrome/116.0.0.0 Safari/537.36"
                            },
                        )
                    docs = loader.load()

                # Load from PDF
                elif uploaded_file:
                    with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
                        temp_file.write(uploaded_file.read())
                        temp_file_path = temp_file.name
                    loader = PyPDFLoader(temp_file_path)
                    docs = loader.load_and_split()

                # Safety check
                if not docs:
                    st.error("❌ No content could be extracted from the given source. Please try another file or URL.")
                else:
                    chain = load_summarize_chain(
                        llm,
                        chain_type="refine",
                        question_prompt=initial_prompt,
                        refine_prompt=refinement_prompt,
                        verbose=True
                    )
                    output_summary = chain.run(docs)
                    st.success(output_summary)

        except Exception as e:
            st.exception(f"Exception: {e}")