Spaces:

ndhanvina
/

LangChain-VideoWeb-Summarizer

Sleeping

File size: 6,675 Bytes

ce4294b

import streamlit as st
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains.summarize import load_summarize_chain
from langchain.docstore.document import Document
from langchain_core.prompts import PromptTemplate
import validators
# from youtube_transcript_api import YouTubeTranscriptApi # Replaced by YoutubeLoader
# from pytube import YouTube # Replaced by YoutubeLoader
# from unstructured.partition.html import partition_html # Replaced by UnstructuredURLLoader
# import requests # Replaced by UnstructuredURLLoader
from langchain_community.document_loaders import YoutubeLoader, UnstructuredURLLoader
from youtube_transcript_api import TranscriptsDisabled, NoTranscriptFound


# Set page config
st.set_page_config(page_title="AI Content Summarizer", page_icon="🚀", layout="wide")

# Custom CSS for styling
st.markdown("""

<style>

    .main-header {

        font-size: 36px !important;

        color: #4CAF50;

        text-align: center;

        margin-bottom: 30px;

    }

    .sub-header {

        font-size: 24px !important;

        color: #FF6347;

        margin-top: 20px;

        margin-bottom: 10px;

    }

    .text-input {

        width: 100%;

        padding: 10px;

        border-radius: 5px;

        border: 1px solid #ddd;

        margin-bottom: 20px;

    }

    .submit-button {

        background-color: #4CAF50;

        color: white;

        padding: 10px 20px;

        border: none;

        border-radius: 5px;

        cursor: pointer;

        font-size: 16px;

    }

    .submit-button:hover {

        background-color: #45a049;

    }

    .summary-output {

        background-color: #f9f9f9;

        padding: 20px;

        border-radius: 5px;

        border: 1px solid #eee;

        margin-top: 20px;

        color: #333333; /* Added for text visibility */

    }

    .error-message {

        color: red;

        font-weight: bold;

    }

</style>

""", unsafe_allow_html=True)

# API Key Input
st.sidebar.title("API Key Configuration")
google_api_key = st.sidebar.text_input("🔑 Google API Key", type="password")

# --- Helper Functions ---
def get_llm(api_key: str):
    """Initializes and returns the ChatGoogleGenerativeAI instance."""
    return ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=api_key, temperature=0)

def load_documents(url: str) -> list[Document]:
    """Loads documents from a URL (YouTube or web article)."""
    docs = []
    try:
        if "youtube.com" in url or "youtu.be" in url:
            st.info("Processing YouTube URL...")
            try:
                loader = YoutubeLoader.from_youtube_url(
                    url,
                    add_video_info=False, # Keep this as False
                    language=['en']
                )
                with st.spinner("Fetching and parsing YouTube content..."):
                    docs = loader.load()
            except TranscriptsDisabled:
                st.error(f"Transcripts are disabled for the YouTube video: {url}")
                return []
            except NoTranscriptFound:
                st.error(f"No English transcripts found for the YouTube video: {url}. The video might not have transcripts or not in English.")
                return []
            except Exception as e:
                st.error(f"Error loading YouTube content for {url}: {str(e)}. This could be due to parsing issues or video unavailability.")
                return []
        else:
            st.info("Processing web article URL...")
            headers = {
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
            }
            loader = UnstructuredURLLoader(urls=[url], headers=headers)
            with st.spinner("Fetching and parsing web content..."):
                docs = loader.load()

        if not docs:
            st.warning("No content could be extracted from the URL. For YouTube, check if transcripts are available and in English. For websites, the page might be empty or structured in a way that's hard to parse.")
            return []
        return docs

    except Exception as e: # This is the outermost catch-all
        st.error(f"An unexpected error occurred during document loading: {str(e)}")
        return []


prompt_template_str = """

Provide simple understandable summary in around 300 words for the following content:

Content: {text}

"""
prompt = PromptTemplate(template=prompt_template_str, input_variables=["text"])

def generate_summary(llm, docs: list[Document]):
    """Generates a summary using the LLM and loaded documents."""
    if not docs:
        return "No content to summarize."
    chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt)
    with st.spinner("AI is summarizing the content..."):
        summary = chain.invoke({"input_documents": docs})
        return summary["output_text"]

# --- Main Application ---
st.markdown("<div class='main-header'>AI Content Summarizer 🚀</div>", unsafe_allow_html=True)
st.write("This app summarizes web articles and YouTube videos using AI. Enter a URL below to get started.")

# Input URL
url_input = st.text_input("Enter URL (Article or YouTube):", key="url_input_main", help="Paste the URL of the article or YouTube video you want to summarize.")

# Submit button
if st.button("Summarize Content", key="submit_button_main"):
    if not google_api_key:
        st.error("🚫 Please enter your Google API Key in the sidebar.")
    elif not url_input:
        st.warning("⚠️ Please enter a URL.")
    elif not validators.url(url_input):
        st.error("🚫 Invalid URL. Please enter a valid URL.")
    else:
        try:
            st.markdown("<div class='sub-header'>Processing...</div>", unsafe_allow_html=True)

            llm = get_llm(api_key=google_api_key)

            docs = load_documents(url=url_input)

            if docs:
                summary_result = generate_summary(llm=llm, docs=docs)
                st.markdown("<div class='sub-header'>Summary:</div>", unsafe_allow_html=True)
                st.success("Summary generated successfully!")
                st.markdown(f"<div class='summary-output'>{summary_result}</div>", unsafe_allow_html=True)
            # Error handling for empty docs is done within load_documents

        except Exception as e:
            st.error(f"An unexpected error occurred: {e}")
            st.markdown(f"<div class='error-message'>Details: {str(e)}</div>", unsafe_allow_html=True)