File size: 6,675 Bytes
ce4294b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import streamlit as st
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains.summarize import load_summarize_chain
from langchain.docstore.document import Document
from langchain_core.prompts import PromptTemplate
import validators
# from youtube_transcript_api import YouTubeTranscriptApi # Replaced by YoutubeLoader
# from pytube import YouTube # Replaced by YoutubeLoader
# from unstructured.partition.html import partition_html # Replaced by UnstructuredURLLoader
# import requests # Replaced by UnstructuredURLLoader
from langchain_community.document_loaders import YoutubeLoader, UnstructuredURLLoader
from youtube_transcript_api import TranscriptsDisabled, NoTranscriptFound


# Set page config
st.set_page_config(page_title="AI Content Summarizer", page_icon="πŸš€", layout="wide")

# Custom CSS for styling
st.markdown("""

<style>

    .main-header {

        font-size: 36px !important;

        color: #4CAF50;

        text-align: center;

        margin-bottom: 30px;

    }

    .sub-header {

        font-size: 24px !important;

        color: #FF6347;

        margin-top: 20px;

        margin-bottom: 10px;

    }

    .text-input {

        width: 100%;

        padding: 10px;

        border-radius: 5px;

        border: 1px solid #ddd;

        margin-bottom: 20px;

    }

    .submit-button {

        background-color: #4CAF50;

        color: white;

        padding: 10px 20px;

        border: none;

        border-radius: 5px;

        cursor: pointer;

        font-size: 16px;

    }

    .submit-button:hover {

        background-color: #45a049;

    }

    .summary-output {

        background-color: #f9f9f9;

        padding: 20px;

        border-radius: 5px;

        border: 1px solid #eee;

        margin-top: 20px;

        color: #333333; /* Added for text visibility */

    }

    .error-message {

        color: red;

        font-weight: bold;

    }

</style>

""", unsafe_allow_html=True)

# API Key Input
st.sidebar.title("API Key Configuration")
google_api_key = st.sidebar.text_input("πŸ”‘ Google API Key", type="password")

# --- Helper Functions ---
def get_llm(api_key: str):
    """Initializes and returns the ChatGoogleGenerativeAI instance."""
    return ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=api_key, temperature=0)

def load_documents(url: str) -> list[Document]:
    """Loads documents from a URL (YouTube or web article)."""
    docs = []
    try:
        if "youtube.com" in url or "youtu.be" in url:
            st.info("Processing YouTube URL...")
            try:
                loader = YoutubeLoader.from_youtube_url(
                    url,
                    add_video_info=False, # Keep this as False
                    language=['en']
                )
                with st.spinner("Fetching and parsing YouTube content..."):
                    docs = loader.load()
            except TranscriptsDisabled:
                st.error(f"Transcripts are disabled for the YouTube video: {url}")
                return []
            except NoTranscriptFound:
                st.error(f"No English transcripts found for the YouTube video: {url}. The video might not have transcripts or not in English.")
                return []
            except Exception as e:
                st.error(f"Error loading YouTube content for {url}: {str(e)}. This could be due to parsing issues or video unavailability.")
                return []
        else:
            st.info("Processing web article URL...")
            headers = {
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
            }
            loader = UnstructuredURLLoader(urls=[url], headers=headers)
            with st.spinner("Fetching and parsing web content..."):
                docs = loader.load()

        if not docs:
            st.warning("No content could be extracted from the URL. For YouTube, check if transcripts are available and in English. For websites, the page might be empty or structured in a way that's hard to parse.")
            return []
        return docs

    except Exception as e: # This is the outermost catch-all
        st.error(f"An unexpected error occurred during document loading: {str(e)}")
        return []


prompt_template_str = """

Provide simple understandable summary in around 300 words for the following content:

Content: {text}

"""
prompt = PromptTemplate(template=prompt_template_str, input_variables=["text"])

def generate_summary(llm, docs: list[Document]):
    """Generates a summary using the LLM and loaded documents."""
    if not docs:
        return "No content to summarize."
    chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt)
    with st.spinner("AI is summarizing the content..."):
        summary = chain.invoke({"input_documents": docs})
        return summary["output_text"]

# --- Main Application ---
st.markdown("<div class='main-header'>AI Content Summarizer πŸš€</div>", unsafe_allow_html=True)
st.write("This app summarizes web articles and YouTube videos using AI. Enter a URL below to get started.")

# Input URL
url_input = st.text_input("Enter URL (Article or YouTube):", key="url_input_main", help="Paste the URL of the article or YouTube video you want to summarize.")

# Submit button
if st.button("Summarize Content", key="submit_button_main"):
    if not google_api_key:
        st.error("🚫 Please enter your Google API Key in the sidebar.")
    elif not url_input:
        st.warning("⚠️ Please enter a URL.")
    elif not validators.url(url_input):
        st.error("🚫 Invalid URL. Please enter a valid URL.")
    else:
        try:
            st.markdown("<div class='sub-header'>Processing...</div>", unsafe_allow_html=True)

            llm = get_llm(api_key=google_api_key)

            docs = load_documents(url=url_input)

            if docs:
                summary_result = generate_summary(llm=llm, docs=docs)
                st.markdown("<div class='sub-header'>Summary:</div>", unsafe_allow_html=True)
                st.success("Summary generated successfully!")
                st.markdown(f"<div class='summary-output'>{summary_result}</div>", unsafe_allow_html=True)
            # Error handling for empty docs is done within load_documents

        except Exception as e:
            st.error(f"An unexpected error occurred: {e}")
            st.markdown(f"<div class='error-message'>Details: {str(e)}</div>", unsafe_allow_html=True)