import os import validators import streamlit as st from langchain.prompts import PromptTemplate from langchain_groq import ChatGroq from langchain.chains.summarize import load_summarize_chain from langchain_community.document_loaders import PyPDFLoader from langchain_community.document_loaders import YoutubeLoader, UnstructuredURLLoader import tempfile # Streamlit App Configuration st.set_page_config(page_title="LangChain: Summarize Text From YT, Website, or PDF", page_icon="🦜") st.title("🦜 LangChain: Summarize Text From YT, Website, or PDF") st.subheader("Summarize Content from a URL or Uploaded PDF") # Sidebar: API Key Inputs with st.sidebar: st.write("Get your Groq API key from https://groq.com/ and your LangSmith API key from https://langsmith.com/") groq_api_key = st.text_input("Groq API Key", value="", type="password") langsmith_api_key = st.text_input("LangSmith API Key", value="", type="password") # Set LangSmith environment variables if langsmith_api_key: os.environ["LANGCHAIN_TRACING_V2"] = "true" os.environ["LANGCHAIN_API_KEY"] = langsmith_api_key # URL Input generic_url = st.text_input("URL (YouTube or Website)", label_visibility="collapsed") # PDF File Uploader uploaded_file = st.file_uploader("Upload a PDF File", type=["pdf"]) # Prompt Templates initial_prompt = PromptTemplate( template="Write a concise summary of the following content:\nContent: {text}", input_variables=["text"] ) refinement_prompt = PromptTemplate( template="The following is a summary that needs refinement:\nCurrent Summary: {existing_answer}\n\n" "We have additional content that can be used to refine the summary:\nContent: {text}\n\n" "Please refine the current summary to include the new information while maintaining conciseness.", input_variables=["existing_answer", "text"] ) # Initialize LLM if groq_api_key: try: llm = ChatGroq(model="gemma2-9b-it", groq_api_key=groq_api_key) except Exception as e: st.error(f"Failed to initialize Groq client: {e}") llm = None else: llm = None # Button to Summarize Content if st.button("Summarize the Content"): if not groq_api_key.strip(): st.error("Please provide the Groq API Key to get started.") elif not langsmith_api_key.strip(): st.error("Please provide the LangSmith API Key for tracking.") elif not (generic_url.strip() or uploaded_file): st.error("Please provide a valid URL or upload a PDF file.") elif generic_url and not validators.url(generic_url): st.error("Please enter a valid URL. It can be a YouTube video or website URL.") elif not llm: st.error("LLM not initialized. Please check your API key.") else: try: with st.spinner("Processing..."): docs = [] # Load from URL if generic_url.strip(): if "youtube.com" in generic_url or "youtu.be" in generic_url: loader = YoutubeLoader.from_youtube_url(generic_url, add_video_info=True) else: loader = UnstructuredURLLoader( urls=[generic_url], ssl_verify=False, headers={ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_5_1) " "AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/116.0.0.0 Safari/537.36" }, ) docs = loader.load() # Load from PDF elif uploaded_file: with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: temp_file.write(uploaded_file.read()) temp_file_path = temp_file.name loader = PyPDFLoader(temp_file_path) docs = loader.load_and_split() # Safety check if not docs: st.error("❌ No content could be extracted from the given source. Please try another file or URL.") else: chain = load_summarize_chain( llm, chain_type="refine", question_prompt=initial_prompt, refine_prompt=refinement_prompt, verbose=True ) output_summary = chain.run(docs) st.success(output_summary) except Exception as e: st.exception(f"Exception: {e}")