import streamlit as st from langchain_google_genai import ChatGoogleGenerativeAI from langchain.chains.summarize import load_summarize_chain from langchain.docstore.document import Document from langchain_core.prompts import PromptTemplate import validators # from youtube_transcript_api import YouTubeTranscriptApi # Replaced by YoutubeLoader # from pytube import YouTube # Replaced by YoutubeLoader # from unstructured.partition.html import partition_html # Replaced by UnstructuredURLLoader # import requests # Replaced by UnstructuredURLLoader from langchain_community.document_loaders import YoutubeLoader, UnstructuredURLLoader from youtube_transcript_api import TranscriptsDisabled, NoTranscriptFound # Set page config st.set_page_config(page_title="AI Content Summarizer", page_icon="🚀", layout="wide") # Custom CSS for styling st.markdown(""" """, unsafe_allow_html=True) # API Key Input st.sidebar.title("API Key Configuration") google_api_key = st.sidebar.text_input("🔑 Google API Key", type="password") # --- Helper Functions --- def get_llm(api_key: str): """Initializes and returns the ChatGoogleGenerativeAI instance.""" return ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=api_key, temperature=0) def load_documents(url: str) -> list[Document]: """Loads documents from a URL (YouTube or web article).""" docs = [] try: if "youtube.com" in url or "youtu.be" in url: st.info("Processing YouTube URL...") try: loader = YoutubeLoader.from_youtube_url( url, add_video_info=False, # Keep this as False language=['en'] ) with st.spinner("Fetching and parsing YouTube content..."): docs = loader.load() except TranscriptsDisabled: st.error(f"Transcripts are disabled for the YouTube video: {url}") return [] except NoTranscriptFound: st.error(f"No English transcripts found for the YouTube video: {url}. The video might not have transcripts or not in English.") return [] except Exception as e: st.error(f"Error loading YouTube content for {url}: {str(e)}. This could be due to parsing issues or video unavailability.") return [] else: st.info("Processing web article URL...") headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" } loader = UnstructuredURLLoader(urls=[url], headers=headers) with st.spinner("Fetching and parsing web content..."): docs = loader.load() if not docs: st.warning("No content could be extracted from the URL. For YouTube, check if transcripts are available and in English. For websites, the page might be empty or structured in a way that's hard to parse.") return [] return docs except Exception as e: # This is the outermost catch-all st.error(f"An unexpected error occurred during document loading: {str(e)}") return [] prompt_template_str = """ Provide simple understandable summary in around 300 words for the following content: Content: {text} """ prompt = PromptTemplate(template=prompt_template_str, input_variables=["text"]) def generate_summary(llm, docs: list[Document]): """Generates a summary using the LLM and loaded documents.""" if not docs: return "No content to summarize." chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt) with st.spinner("AI is summarizing the content..."): summary = chain.invoke({"input_documents": docs}) return summary["output_text"] # --- Main Application --- st.markdown("

AI Content Summarizer 🚀

", unsafe_allow_html=True) st.write("This app summarizes web articles and YouTube videos using AI. Enter a URL below to get started.") # Input URL url_input = st.text_input("Enter URL (Article or YouTube):", key="url_input_main", help="Paste the URL of the article or YouTube video you want to summarize.") # Submit button if st.button("Summarize Content", key="submit_button_main"): if not google_api_key: st.error("🚫 Please enter your Google API Key in the sidebar.") elif not url_input: st.warning("⚠️ Please enter a URL.") elif not validators.url(url_input): st.error("🚫 Invalid URL. Please enter a valid URL.") else: try: st.markdown("

Processing...

", unsafe_allow_html=True) llm = get_llm(api_key=google_api_key) docs = load_documents(url=url_input) if docs: summary_result = generate_summary(llm=llm, docs=docs) st.markdown("

Summary:

", unsafe_allow_html=True) st.success("Summary generated successfully!") st.markdown(f"

{summary_result}

", unsafe_allow_html=True) # Error handling for empty docs is done within load_documents except Exception as e: st.error(f"An unexpected error occurred: {e}") st.markdown(f"

Details: {str(e)}

", unsafe_allow_html=True)