ndhanvina's picture
Upload 3 files
ce4294b verified
import streamlit as st
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains.summarize import load_summarize_chain
from langchain.docstore.document import Document
from langchain_core.prompts import PromptTemplate
import validators
# from youtube_transcript_api import YouTubeTranscriptApi # Replaced by YoutubeLoader
# from pytube import YouTube # Replaced by YoutubeLoader
# from unstructured.partition.html import partition_html # Replaced by UnstructuredURLLoader
# import requests # Replaced by UnstructuredURLLoader
from langchain_community.document_loaders import YoutubeLoader, UnstructuredURLLoader
from youtube_transcript_api import TranscriptsDisabled, NoTranscriptFound
# Set page config
st.set_page_config(page_title="AI Content Summarizer", page_icon="πŸš€", layout="wide")
# Custom CSS for styling
st.markdown("""
<style>
.main-header {
font-size: 36px !important;
color: #4CAF50;
text-align: center;
margin-bottom: 30px;
}
.sub-header {
font-size: 24px !important;
color: #FF6347;
margin-top: 20px;
margin-bottom: 10px;
}
.text-input {
width: 100%;
padding: 10px;
border-radius: 5px;
border: 1px solid #ddd;
margin-bottom: 20px;
}
.submit-button {
background-color: #4CAF50;
color: white;
padding: 10px 20px;
border: none;
border-radius: 5px;
cursor: pointer;
font-size: 16px;
}
.submit-button:hover {
background-color: #45a049;
}
.summary-output {
background-color: #f9f9f9;
padding: 20px;
border-radius: 5px;
border: 1px solid #eee;
margin-top: 20px;
color: #333333; /* Added for text visibility */
}
.error-message {
color: red;
font-weight: bold;
}
</style>
""", unsafe_allow_html=True)
# API Key Input
st.sidebar.title("API Key Configuration")
google_api_key = st.sidebar.text_input("πŸ”‘ Google API Key", type="password")
# --- Helper Functions ---
def get_llm(api_key: str):
"""Initializes and returns the ChatGoogleGenerativeAI instance."""
return ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=api_key, temperature=0)
def load_documents(url: str) -> list[Document]:
"""Loads documents from a URL (YouTube or web article)."""
docs = []
try:
if "youtube.com" in url or "youtu.be" in url:
st.info("Processing YouTube URL...")
try:
loader = YoutubeLoader.from_youtube_url(
url,
add_video_info=False, # Keep this as False
language=['en']
)
with st.spinner("Fetching and parsing YouTube content..."):
docs = loader.load()
except TranscriptsDisabled:
st.error(f"Transcripts are disabled for the YouTube video: {url}")
return []
except NoTranscriptFound:
st.error(f"No English transcripts found for the YouTube video: {url}. The video might not have transcripts or not in English.")
return []
except Exception as e:
st.error(f"Error loading YouTube content for {url}: {str(e)}. This could be due to parsing issues or video unavailability.")
return []
else:
st.info("Processing web article URL...")
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
loader = UnstructuredURLLoader(urls=[url], headers=headers)
with st.spinner("Fetching and parsing web content..."):
docs = loader.load()
if not docs:
st.warning("No content could be extracted from the URL. For YouTube, check if transcripts are available and in English. For websites, the page might be empty or structured in a way that's hard to parse.")
return []
return docs
except Exception as e: # This is the outermost catch-all
st.error(f"An unexpected error occurred during document loading: {str(e)}")
return []
prompt_template_str = """
Provide simple understandable summary in around 300 words for the following content:
Content: {text}
"""
prompt = PromptTemplate(template=prompt_template_str, input_variables=["text"])
def generate_summary(llm, docs: list[Document]):
"""Generates a summary using the LLM and loaded documents."""
if not docs:
return "No content to summarize."
chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt)
with st.spinner("AI is summarizing the content..."):
summary = chain.invoke({"input_documents": docs})
return summary["output_text"]
# --- Main Application ---
st.markdown("<div class='main-header'>AI Content Summarizer πŸš€</div>", unsafe_allow_html=True)
st.write("This app summarizes web articles and YouTube videos using AI. Enter a URL below to get started.")
# Input URL
url_input = st.text_input("Enter URL (Article or YouTube):", key="url_input_main", help="Paste the URL of the article or YouTube video you want to summarize.")
# Submit button
if st.button("Summarize Content", key="submit_button_main"):
if not google_api_key:
st.error("🚫 Please enter your Google API Key in the sidebar.")
elif not url_input:
st.warning("⚠️ Please enter a URL.")
elif not validators.url(url_input):
st.error("🚫 Invalid URL. Please enter a valid URL.")
else:
try:
st.markdown("<div class='sub-header'>Processing...</div>", unsafe_allow_html=True)
llm = get_llm(api_key=google_api_key)
docs = load_documents(url=url_input)
if docs:
summary_result = generate_summary(llm=llm, docs=docs)
st.markdown("<div class='sub-header'>Summary:</div>", unsafe_allow_html=True)
st.success("Summary generated successfully!")
st.markdown(f"<div class='summary-output'>{summary_result}</div>", unsafe_allow_html=True)
# Error handling for empty docs is done within load_documents
except Exception as e:
st.error(f"An unexpected error occurred: {e}")
st.markdown(f"<div class='error-message'>Details: {str(e)}</div>", unsafe_allow_html=True)