File size: 6,675 Bytes
ce4294b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 | import streamlit as st
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains.summarize import load_summarize_chain
from langchain.docstore.document import Document
from langchain_core.prompts import PromptTemplate
import validators
# from youtube_transcript_api import YouTubeTranscriptApi # Replaced by YoutubeLoader
# from pytube import YouTube # Replaced by YoutubeLoader
# from unstructured.partition.html import partition_html # Replaced by UnstructuredURLLoader
# import requests # Replaced by UnstructuredURLLoader
from langchain_community.document_loaders import YoutubeLoader, UnstructuredURLLoader
from youtube_transcript_api import TranscriptsDisabled, NoTranscriptFound
# Set page config
st.set_page_config(page_title="AI Content Summarizer", page_icon="π", layout="wide")
# Custom CSS for styling
st.markdown("""
<style>
.main-header {
font-size: 36px !important;
color: #4CAF50;
text-align: center;
margin-bottom: 30px;
}
.sub-header {
font-size: 24px !important;
color: #FF6347;
margin-top: 20px;
margin-bottom: 10px;
}
.text-input {
width: 100%;
padding: 10px;
border-radius: 5px;
border: 1px solid #ddd;
margin-bottom: 20px;
}
.submit-button {
background-color: #4CAF50;
color: white;
padding: 10px 20px;
border: none;
border-radius: 5px;
cursor: pointer;
font-size: 16px;
}
.submit-button:hover {
background-color: #45a049;
}
.summary-output {
background-color: #f9f9f9;
padding: 20px;
border-radius: 5px;
border: 1px solid #eee;
margin-top: 20px;
color: #333333; /* Added for text visibility */
}
.error-message {
color: red;
font-weight: bold;
}
</style>
""", unsafe_allow_html=True)
# API Key Input
st.sidebar.title("API Key Configuration")
google_api_key = st.sidebar.text_input("π Google API Key", type="password")
# --- Helper Functions ---
def get_llm(api_key: str):
"""Initializes and returns the ChatGoogleGenerativeAI instance."""
return ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=api_key, temperature=0)
def load_documents(url: str) -> list[Document]:
"""Loads documents from a URL (YouTube or web article)."""
docs = []
try:
if "youtube.com" in url or "youtu.be" in url:
st.info("Processing YouTube URL...")
try:
loader = YoutubeLoader.from_youtube_url(
url,
add_video_info=False, # Keep this as False
language=['en']
)
with st.spinner("Fetching and parsing YouTube content..."):
docs = loader.load()
except TranscriptsDisabled:
st.error(f"Transcripts are disabled for the YouTube video: {url}")
return []
except NoTranscriptFound:
st.error(f"No English transcripts found for the YouTube video: {url}. The video might not have transcripts or not in English.")
return []
except Exception as e:
st.error(f"Error loading YouTube content for {url}: {str(e)}. This could be due to parsing issues or video unavailability.")
return []
else:
st.info("Processing web article URL...")
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
loader = UnstructuredURLLoader(urls=[url], headers=headers)
with st.spinner("Fetching and parsing web content..."):
docs = loader.load()
if not docs:
st.warning("No content could be extracted from the URL. For YouTube, check if transcripts are available and in English. For websites, the page might be empty or structured in a way that's hard to parse.")
return []
return docs
except Exception as e: # This is the outermost catch-all
st.error(f"An unexpected error occurred during document loading: {str(e)}")
return []
prompt_template_str = """
Provide simple understandable summary in around 300 words for the following content:
Content: {text}
"""
prompt = PromptTemplate(template=prompt_template_str, input_variables=["text"])
def generate_summary(llm, docs: list[Document]):
"""Generates a summary using the LLM and loaded documents."""
if not docs:
return "No content to summarize."
chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt)
with st.spinner("AI is summarizing the content..."):
summary = chain.invoke({"input_documents": docs})
return summary["output_text"]
# --- Main Application ---
st.markdown("<div class='main-header'>AI Content Summarizer π</div>", unsafe_allow_html=True)
st.write("This app summarizes web articles and YouTube videos using AI. Enter a URL below to get started.")
# Input URL
url_input = st.text_input("Enter URL (Article or YouTube):", key="url_input_main", help="Paste the URL of the article or YouTube video you want to summarize.")
# Submit button
if st.button("Summarize Content", key="submit_button_main"):
if not google_api_key:
st.error("π« Please enter your Google API Key in the sidebar.")
elif not url_input:
st.warning("β οΈ Please enter a URL.")
elif not validators.url(url_input):
st.error("π« Invalid URL. Please enter a valid URL.")
else:
try:
st.markdown("<div class='sub-header'>Processing...</div>", unsafe_allow_html=True)
llm = get_llm(api_key=google_api_key)
docs = load_documents(url=url_input)
if docs:
summary_result = generate_summary(llm=llm, docs=docs)
st.markdown("<div class='sub-header'>Summary:</div>", unsafe_allow_html=True)
st.success("Summary generated successfully!")
st.markdown(f"<div class='summary-output'>{summary_result}</div>", unsafe_allow_html=True)
# Error handling for empty docs is done within load_documents
except Exception as e:
st.error(f"An unexpected error occurred: {e}")
st.markdown(f"<div class='error-message'>Details: {str(e)}</div>", unsafe_allow_html=True)
|