Lovish Singla
Update app.py
1eb24e1 unverified
import os
import validators
import streamlit as st
from langchain.prompts import PromptTemplate
from langchain_groq import ChatGroq
from langchain.chains.summarize import load_summarize_chain
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import YoutubeLoader, UnstructuredURLLoader
import tempfile
# Streamlit App Configuration
st.set_page_config(page_title="LangChain: Summarize Text From YT, Website, or PDF", page_icon="🦜")
st.title("🦜 LangChain: Summarize Text From YT, Website, or PDF")
st.subheader("Summarize Content from a URL or Uploaded PDF")
# Sidebar: API Key Inputs
with st.sidebar:
st.write("Get your Groq API key from https://groq.com/ and your LangSmith API key from https://langsmith.com/")
groq_api_key = st.text_input("Groq API Key", value="", type="password")
langsmith_api_key = st.text_input("LangSmith API Key", value="", type="password")
# Set LangSmith environment variables
if langsmith_api_key:
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = langsmith_api_key
# URL Input
generic_url = st.text_input("URL (YouTube or Website)", label_visibility="collapsed")
# PDF File Uploader
uploaded_file = st.file_uploader("Upload a PDF File", type=["pdf"])
# Prompt Templates
initial_prompt = PromptTemplate(
template="Write a concise summary of the following content:\nContent: {text}",
input_variables=["text"]
)
refinement_prompt = PromptTemplate(
template="The following is a summary that needs refinement:\nCurrent Summary: {existing_answer}\n\n"
"We have additional content that can be used to refine the summary:\nContent: {text}\n\n"
"Please refine the current summary to include the new information while maintaining conciseness.",
input_variables=["existing_answer", "text"]
)
# Initialize LLM
if groq_api_key:
try:
llm = ChatGroq(model="gemma2-9b-it", groq_api_key=groq_api_key)
except Exception as e:
st.error(f"Failed to initialize Groq client: {e}")
llm = None
else:
llm = None
# Button to Summarize Content
if st.button("Summarize the Content"):
if not groq_api_key.strip():
st.error("Please provide the Groq API Key to get started.")
elif not langsmith_api_key.strip():
st.error("Please provide the LangSmith API Key for tracking.")
elif not (generic_url.strip() or uploaded_file):
st.error("Please provide a valid URL or upload a PDF file.")
elif generic_url and not validators.url(generic_url):
st.error("Please enter a valid URL. It can be a YouTube video or website URL.")
elif not llm:
st.error("LLM not initialized. Please check your API key.")
else:
try:
with st.spinner("Processing..."):
docs = []
# Load from URL
if generic_url.strip():
if "youtube.com" in generic_url or "youtu.be" in generic_url:
loader = YoutubeLoader.from_youtube_url(generic_url, add_video_info=True)
else:
loader = UnstructuredURLLoader(
urls=[generic_url],
ssl_verify=False,
headers={
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_5_1) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/116.0.0.0 Safari/537.36"
},
)
docs = loader.load()
# Load from PDF
elif uploaded_file:
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
temp_file.write(uploaded_file.read())
temp_file_path = temp_file.name
loader = PyPDFLoader(temp_file_path)
docs = loader.load_and_split()
# Safety check
if not docs:
st.error("❌ No content could be extracted from the given source. Please try another file or URL.")
else:
chain = load_summarize_chain(
llm,
chain_type="refine",
question_prompt=initial_prompt,
refine_prompt=refinement_prompt,
verbose=True
)
output_summary = chain.run(docs)
st.success(output_summary)
except Exception as e:
st.exception(f"Exception: {e}")