Spaces:

lovi07
/

Text_Summarization

Sleeping

Lovish Singla

Update app.py

1eb24e1 unverified 6 months ago

4.85 kB

	import os
	import validators
	import streamlit as st
	from langchain.prompts import PromptTemplate
	from langchain_groq import ChatGroq
	from langchain.chains.summarize import load_summarize_chain
	from langchain_community.document_loaders import PyPDFLoader
	from langchain_community.document_loaders import YoutubeLoader, UnstructuredURLLoader
	import tempfile

	# Streamlit App Configuration
	st.set_page_config(page_title="LangChain: Summarize Text From YT, Website, or PDF", page_icon="🦜")
	st.title("🦜 LangChain: Summarize Text From YT, Website, or PDF")
	st.subheader("Summarize Content from a URL or Uploaded PDF")

	# Sidebar: API Key Inputs
	with st.sidebar:
	st.write("Get your Groq API key from https://groq.com/ and your LangSmith API key from https://langsmith.com/")
	groq_api_key = st.text_input("Groq API Key", value="", type="password")
	langsmith_api_key = st.text_input("LangSmith API Key", value="", type="password")

	# Set LangSmith environment variables
	if langsmith_api_key:
	os.environ["LANGCHAIN_TRACING_V2"] = "true"
	os.environ["LANGCHAIN_API_KEY"] = langsmith_api_key

	# URL Input
	generic_url = st.text_input("URL (YouTube or Website)", label_visibility="collapsed")

	# PDF File Uploader
	uploaded_file = st.file_uploader("Upload a PDF File", type=["pdf"])

	# Prompt Templates
	initial_prompt = PromptTemplate(
	template="Write a concise summary of the following content:\nContent: {text}",
	input_variables=["text"]
	)

	refinement_prompt = PromptTemplate(
	template="The following is a summary that needs refinement:\nCurrent Summary: {existing_answer}\n\n"
	"We have additional content that can be used to refine the summary:\nContent: {text}\n\n"
	"Please refine the current summary to include the new information while maintaining conciseness.",
	input_variables=["existing_answer", "text"]
	)

	# Initialize LLM
	if groq_api_key:
	try:
	llm = ChatGroq(model="gemma2-9b-it", groq_api_key=groq_api_key)
	except Exception as e:
	st.error(f"Failed to initialize Groq client: {e}")
	llm = None
	else:
	llm = None

	# Button to Summarize Content
	if st.button("Summarize the Content"):
	if not groq_api_key.strip():
	st.error("Please provide the Groq API Key to get started.")
	elif not langsmith_api_key.strip():
	st.error("Please provide the LangSmith API Key for tracking.")
	elif not (generic_url.strip() or uploaded_file):
	st.error("Please provide a valid URL or upload a PDF file.")
	elif generic_url and not validators.url(generic_url):
	st.error("Please enter a valid URL. It can be a YouTube video or website URL.")
	elif not llm:
	st.error("LLM not initialized. Please check your API key.")
	else:
	try:
	with st.spinner("Processing..."):
	docs = []

	# Load from URL
	if generic_url.strip():
	if "youtube.com" in generic_url or "youtu.be" in generic_url:
	loader = YoutubeLoader.from_youtube_url(generic_url, add_video_info=True)
	else:
	loader = UnstructuredURLLoader(
	urls=[generic_url],
	ssl_verify=False,
	headers={
	"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_5_1) "
	"AppleWebKit/537.36 (KHTML, like Gecko) "
	"Chrome/116.0.0.0 Safari/537.36"
	},
	)
	docs = loader.load()

	# Load from PDF
	elif uploaded_file:
	with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
	temp_file.write(uploaded_file.read())
	temp_file_path = temp_file.name
	loader = PyPDFLoader(temp_file_path)
	docs = loader.load_and_split()

	# Safety check
	if not docs:
	st.error("❌ No content could be extracted from the given source. Please try another file or URL.")
	else:
	chain = load_summarize_chain(
	llm,
	chain_type="refine",
	question_prompt=initial_prompt,
	refine_prompt=refinement_prompt,
	verbose=True
	)
	output_summary = chain.run(docs)
	st.success(output_summary)

	except Exception as e:
	st.exception(f"Exception: {e}")