Spaces:
Sleeping
Sleeping
| import os | |
| import validators | |
| import streamlit as st | |
| from langchain.prompts import PromptTemplate | |
| from langchain_groq import ChatGroq | |
| from langchain.chains.summarize import load_summarize_chain | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain_community.document_loaders import YoutubeLoader, UnstructuredURLLoader | |
| import tempfile | |
| # Streamlit App Configuration | |
| st.set_page_config(page_title="LangChain: Summarize Text From YT, Website, or PDF", page_icon="π¦") | |
| st.title("π¦ LangChain: Summarize Text From YT, Website, or PDF") | |
| st.subheader("Summarize Content from a URL or Uploaded PDF") | |
| # Sidebar: API Key Inputs | |
| with st.sidebar: | |
| st.write("Get your Groq API key from https://groq.com/ and your LangSmith API key from https://langsmith.com/") | |
| groq_api_key = st.text_input("Groq API Key", value="", type="password") | |
| langsmith_api_key = st.text_input("LangSmith API Key", value="", type="password") | |
| # Set LangSmith environment variables | |
| if langsmith_api_key: | |
| os.environ["LANGCHAIN_TRACING_V2"] = "true" | |
| os.environ["LANGCHAIN_API_KEY"] = langsmith_api_key | |
| # URL Input | |
| generic_url = st.text_input("URL (YouTube or Website)", label_visibility="collapsed") | |
| # PDF File Uploader | |
| uploaded_file = st.file_uploader("Upload a PDF File", type=["pdf"]) | |
| # Prompt Templates | |
| initial_prompt = PromptTemplate( | |
| template="Write a concise summary of the following content:\nContent: {text}", | |
| input_variables=["text"] | |
| ) | |
| refinement_prompt = PromptTemplate( | |
| template="The following is a summary that needs refinement:\nCurrent Summary: {existing_answer}\n\n" | |
| "We have additional content that can be used to refine the summary:\nContent: {text}\n\n" | |
| "Please refine the current summary to include the new information while maintaining conciseness.", | |
| input_variables=["existing_answer", "text"] | |
| ) | |
| # Initialize LLM | |
| if groq_api_key: | |
| try: | |
| llm = ChatGroq(model="gemma2-9b-it", groq_api_key=groq_api_key) | |
| except Exception as e: | |
| st.error(f"Failed to initialize Groq client: {e}") | |
| llm = None | |
| else: | |
| llm = None | |
| # Button to Summarize Content | |
| if st.button("Summarize the Content"): | |
| if not groq_api_key.strip(): | |
| st.error("Please provide the Groq API Key to get started.") | |
| elif not langsmith_api_key.strip(): | |
| st.error("Please provide the LangSmith API Key for tracking.") | |
| elif not (generic_url.strip() or uploaded_file): | |
| st.error("Please provide a valid URL or upload a PDF file.") | |
| elif generic_url and not validators.url(generic_url): | |
| st.error("Please enter a valid URL. It can be a YouTube video or website URL.") | |
| elif not llm: | |
| st.error("LLM not initialized. Please check your API key.") | |
| else: | |
| try: | |
| with st.spinner("Processing..."): | |
| docs = [] | |
| # Load from URL | |
| if generic_url.strip(): | |
| if "youtube.com" in generic_url or "youtu.be" in generic_url: | |
| loader = YoutubeLoader.from_youtube_url(generic_url, add_video_info=True) | |
| else: | |
| loader = UnstructuredURLLoader( | |
| urls=[generic_url], | |
| ssl_verify=False, | |
| headers={ | |
| "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_5_1) " | |
| "AppleWebKit/537.36 (KHTML, like Gecko) " | |
| "Chrome/116.0.0.0 Safari/537.36" | |
| }, | |
| ) | |
| docs = loader.load() | |
| # Load from PDF | |
| elif uploaded_file: | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: | |
| temp_file.write(uploaded_file.read()) | |
| temp_file_path = temp_file.name | |
| loader = PyPDFLoader(temp_file_path) | |
| docs = loader.load_and_split() | |
| # Safety check | |
| if not docs: | |
| st.error("β No content could be extracted from the given source. Please try another file or URL.") | |
| else: | |
| chain = load_summarize_chain( | |
| llm, | |
| chain_type="refine", | |
| question_prompt=initial_prompt, | |
| refine_prompt=refinement_prompt, | |
| verbose=True | |
| ) | |
| output_summary = chain.run(docs) | |
| st.success(output_summary) | |
| except Exception as e: | |
| st.exception(f"Exception: {e}") | |