import streamlit as st from langchain_google_genai import ChatGoogleGenerativeAI from langchain.chains.summarize import load_summarize_chain from langchain.docstore.document import Document from langchain_core.prompts import PromptTemplate import validators # from youtube_transcript_api import YouTubeTranscriptApi # Replaced by YoutubeLoader # from pytube import YouTube # Replaced by YoutubeLoader # from unstructured.partition.html import partition_html # Replaced by UnstructuredURLLoader # import requests # Replaced by UnstructuredURLLoader from langchain_community.document_loaders import YoutubeLoader, UnstructuredURLLoader from youtube_transcript_api import TranscriptsDisabled, NoTranscriptFound # Set page config st.set_page_config(page_title="AI Content Summarizer", page_icon="🚀", layout="wide") # Custom CSS for styling st.markdown(""" """, unsafe_allow_html=True) # API Key Input st.sidebar.title("API Key Configuration") google_api_key = st.sidebar.text_input("🔑 Google API Key", type="password") # --- Helper Functions --- def get_llm(api_key: str): """Initializes and returns the ChatGoogleGenerativeAI instance.""" return ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=api_key, temperature=0) def load_documents(url: str) -> list[Document]: """Loads documents from a URL (YouTube or web article).""" docs = [] try: if "youtube.com" in url or "youtu.be" in url: st.info("Processing YouTube URL...") try: loader = YoutubeLoader.from_youtube_url( url, add_video_info=False, # Keep this as False language=['en'] ) with st.spinner("Fetching and parsing YouTube content..."): docs = loader.load() except TranscriptsDisabled: st.error(f"Transcripts are disabled for the YouTube video: {url}") return [] except NoTranscriptFound: st.error(f"No English transcripts found for the YouTube video: {url}. The video might not have transcripts or not in English.") return [] except Exception as e: st.error(f"Error loading YouTube content for {url}: {str(e)}. This could be due to parsing issues or video unavailability.") return [] else: st.info("Processing web article URL...") headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" } loader = UnstructuredURLLoader(urls=[url], headers=headers) with st.spinner("Fetching and parsing web content..."): docs = loader.load() if not docs: st.warning("No content could be extracted from the URL. For YouTube, check if transcripts are available and in English. For websites, the page might be empty or structured in a way that's hard to parse.") return [] return docs except Exception as e: # This is the outermost catch-all st.error(f"An unexpected error occurred during document loading: {str(e)}") return [] prompt_template_str = """ Provide simple understandable summary in around 300 words for the following content: Content: {text} """ prompt = PromptTemplate(template=prompt_template_str, input_variables=["text"]) def generate_summary(llm, docs: list[Document]): """Generates a summary using the LLM and loaded documents.""" if not docs: return "No content to summarize." chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt) with st.spinner("AI is summarizing the content..."): summary = chain.invoke({"input_documents": docs}) return summary["output_text"] # --- Main Application --- st.markdown("