Spaces:

ndhanvina
/

LangChain-VideoWeb-Summarizer

Sleeping

App Files Files Community

LangChain-VideoWeb-Summarizer / app.py

ndhanvina

Upload 3 files

ce4294b verified 9 months ago

raw

history blame contribute delete

6.68 kB

	import streamlit as st
	from langchain_google_genai import ChatGoogleGenerativeAI
	from langchain.chains.summarize import load_summarize_chain
	from langchain.docstore.document import Document
	from langchain_core.prompts import PromptTemplate
	import validators
	# from youtube_transcript_api import YouTubeTranscriptApi # Replaced by YoutubeLoader
	# from pytube import YouTube # Replaced by YoutubeLoader
	# from unstructured.partition.html import partition_html # Replaced by UnstructuredURLLoader
	# import requests # Replaced by UnstructuredURLLoader
	from langchain_community.document_loaders import YoutubeLoader, UnstructuredURLLoader
	from youtube_transcript_api import TranscriptsDisabled, NoTranscriptFound


	# Set page config
	st.set_page_config(page_title="AI Content Summarizer", page_icon="🚀", layout="wide")

	# Custom CSS for styling
	st.markdown("""
	<style>
	.main-header {
	font-size: 36px !important;
	color: #4CAF50;
	text-align: center;
	margin-bottom: 30px;
	}
	.sub-header {
	font-size: 24px !important;
	color: #FF6347;
	margin-top: 20px;
	margin-bottom: 10px;
	}
	.text-input {
	width: 100%;
	padding: 10px;
	border-radius: 5px;
	border: 1px solid #ddd;
	margin-bottom: 20px;
	}
	.submit-button {
	background-color: #4CAF50;
	color: white;
	padding: 10px 20px;
	border: none;
	border-radius: 5px;
	cursor: pointer;
	font-size: 16px;
	}
	.submit-button:hover {
	background-color: #45a049;
	}
	.summary-output {
	background-color: #f9f9f9;
	padding: 20px;
	border-radius: 5px;
	border: 1px solid #eee;
	margin-top: 20px;
	color: #333333; /* Added for text visibility */
	}
	.error-message {
	color: red;
	font-weight: bold;
	}
	</style>
	""", unsafe_allow_html=True)

	# API Key Input
	st.sidebar.title("API Key Configuration")
	google_api_key = st.sidebar.text_input("🔑 Google API Key", type="password")

	# --- Helper Functions ---
	def get_llm(api_key: str):
	"""Initializes and returns the ChatGoogleGenerativeAI instance."""
	return ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=api_key, temperature=0)

	def load_documents(url: str) -> list[Document]:
	"""Loads documents from a URL (YouTube or web article)."""
	docs = []
	try:
	if "youtube.com" in url or "youtu.be" in url:
	st.info("Processing YouTube URL...")
	try:
	loader = YoutubeLoader.from_youtube_url(
	url,
	add_video_info=False, # Keep this as False
	language=['en']
	)
	with st.spinner("Fetching and parsing YouTube content..."):
	docs = loader.load()
	except TranscriptsDisabled:
	st.error(f"Transcripts are disabled for the YouTube video: {url}")
	return []
	except NoTranscriptFound:
	st.error(f"No English transcripts found for the YouTube video: {url}. The video might not have transcripts or not in English.")
	return []
	except Exception as e:
	st.error(f"Error loading YouTube content for {url}: {str(e)}. This could be due to parsing issues or video unavailability.")
	return []
	else:
	st.info("Processing web article URL...")
	headers = {
	"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
	}
	loader = UnstructuredURLLoader(urls=[url], headers=headers)
	with st.spinner("Fetching and parsing web content..."):
	docs = loader.load()

	if not docs:
	st.warning("No content could be extracted from the URL. For YouTube, check if transcripts are available and in English. For websites, the page might be empty or structured in a way that's hard to parse.")
	return []
	return docs

	except Exception as e: # This is the outermost catch-all
	st.error(f"An unexpected error occurred during document loading: {str(e)}")
	return []


	prompt_template_str = """
	Provide simple understandable summary in around 300 words for the following content:
	Content: {text}
	"""
	prompt = PromptTemplate(template=prompt_template_str, input_variables=["text"])

	def generate_summary(llm, docs: list[Document]):
	"""Generates a summary using the LLM and loaded documents."""
	if not docs:
	return "No content to summarize."
	chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt)
	with st.spinner("AI is summarizing the content..."):
	summary = chain.invoke({"input_documents": docs})
	return summary["output_text"]

	# --- Main Application ---
	st.markdown("<div class='main-header'>AI Content Summarizer 🚀</div>", unsafe_allow_html=True)
	st.write("This app summarizes web articles and YouTube videos using AI. Enter a URL below to get started.")

	# Input URL
	url_input = st.text_input("Enter URL (Article or YouTube):", key="url_input_main", help="Paste the URL of the article or YouTube video you want to summarize.")

	# Submit button
	if st.button("Summarize Content", key="submit_button_main"):
	if not google_api_key:
	st.error("🚫 Please enter your Google API Key in the sidebar.")
	elif not url_input:
	st.warning("⚠️ Please enter a URL.")
	elif not validators.url(url_input):
	st.error("🚫 Invalid URL. Please enter a valid URL.")
	else:
	try:
	st.markdown("<div class='sub-header'>Processing...</div>", unsafe_allow_html=True)

	llm = get_llm(api_key=google_api_key)

	docs = load_documents(url=url_input)

	if docs:
	summary_result = generate_summary(llm=llm, docs=docs)
	st.markdown("<div class='sub-header'>Summary:</div>", unsafe_allow_html=True)
	st.success("Summary generated successfully!")
	st.markdown(f"<div class='summary-output'>{summary_result}</div>", unsafe_allow_html=True)
	# Error handling for empty docs is done within load_documents

	except Exception as e:
	st.error(f"An unexpected error occurred: {e}")
	st.markdown(f"<div class='error-message'>Details: {str(e)}</div>", unsafe_allow_html=True)