Spaces:

sohampawar1030
/

summarization_app

No application file

App Files Files Community

summarization_app / summarization_app.py

sohampawar1030

Upload 3 files

58c0337 verified about 1 year ago

raw

history blame contribute delete

11.3 kB

	import streamlit as st
	import os
	from groq import Groq
	from dotenv import load_dotenv
	from PyPDF2 import PdfReader
	from io import BytesIO
	from reportlab.lib.pagesizes import letter
	from reportlab.pdfgen import canvas
	from reportlab.lib.utils import simpleSplit
	from bs4 import BeautifulSoup
	import requests
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.vectorstores import FAISS
	from langchain.llms import OpenAI
	from langchain.chains import RetrievalQA

	load_dotenv()

	# Initialize Groq API
	client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

	# Use HuggingFaceEmbeddings for Sentence Transformer model
	embedding_model = "all-MiniLM-L6-v2" # This is the model name, not the actual model object
	embeddings = HuggingFaceEmbeddings(model_name=embedding_model)

	def summarize_text_groq(input_text, model="llama-3.3-70b-versatile", max_tokens=150):
	try:
	response = client.chat.completions.create(
	messages=[{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": f"Summarize the following text:\n\n{input_text}"}],
	model=model,
	)
	return response.choices[0].message.content.strip()
	except Exception as e:
	raise RuntimeError(f"API call failed: {e}")

	def extract_text_from_pdf(uploaded_pdf):
	try:
	pdf_reader = PdfReader(uploaded_pdf)
	if pdf_reader.is_encrypted:
	st.error("❌ The uploaded PDF is encrypted and cannot be processed.")
	return ""
	text = ""
	for page in pdf_reader.pages:
	text += page.extract_text() or ""
	if not text.strip():
	raise RuntimeError("No extractable text found in the PDF.")
	return text
	except Exception as e:
	raise RuntimeError(f"Failed to extract text from PDF: {e}")

	def save_summary_to_pdf(summary_text):
	try:
	summary_stream = BytesIO()
	c = canvas.Canvas(summary_stream, pagesize=letter)
	width, height = letter
	c.setFont("Helvetica-Bold", 14)
	c.drawString(100, height - 50, "Summary:")
	c.setFont("Helvetica", 10)
	text_margin = 50
	top_margin = height - 80
	bottom_margin = 50
	line_height = 12
	lines = simpleSplit(summary_text, "Helvetica", 10, width - 2 * text_margin)
	y_position = top_margin
	for line in lines:
	if y_position <= bottom_margin:
	c.showPage()
	c.setFont("Helvetica", 10)
	y_position = top_margin
	c.drawString(text_margin, y_position, line)
	y_position -= line_height
	c.save()
	summary_stream.seek(0)
	return summary_stream
	except Exception as e:
	raise RuntimeError(f"Failed to save summary to PDF: {e}")

	def extract_text_from_webpage(url):
	try:
	response = requests.get(url)
	response.raise_for_status()
	soup = BeautifulSoup(response.content, "html.parser")
	text = soup.get_text(separator="\n", strip=True)
	if not text.strip():
	raise RuntimeError("No extractable text found on the webpage.")
	return text
	except Exception as e:
	raise RuntimeError(f"Failed to extract text from webpage: {e}")

	# FAISS Index Creation
	def create_faiss_index(documents):
	try:
	# Create vector store using FAISS from the extracted documents
	vectorstore = FAISS.from_texts(documents, embeddings)
	return vectorstore
	except Exception as e:
	raise RuntimeError(f"Failed to create FAISS index: {e}")

	# RAG Pipeline Creation
	def create_rag_pipeline(retriever):
	try:
	# Use LangChain RetrievalQA for generating answers from the retrieved documents
	qa_chain = RetrievalQA.from_chain_type(
	llm=OpenAI(temperature=0, model="text-davinci-003"),
	chain_type="stuff",
	retriever=retriever
	)
	return qa_chain
	except Exception as e:
	raise RuntimeError(f"Failed to create RAG pipeline: {e}")

	# Streamlit UI
	st.set_page_config(page_title="Text Summarization App", page_icon="📚", layout="wide")
	st.title("📚 Text Summarization App with Groq API")

	tab1, tab2, tab3, tab4, tab5 = st.tabs([
	"Manual Text Input",
	"PDF Upload",
	"📚 Multi-Document Summarizer",
	"🗣️ Chat with Bot",
	"🌐 Webpage Summarizer"
	])

	# Manual Text Input
	with tab1:
	st.subheader("📝 Enter Your Text")
	input_text = st.text_area("Enter the text to summarize", height=200, max_chars=2000)
	if st.button("🔍 Summarize Text"):
	if input_text:
	with st.spinner("Summarizing your text..."):
	try:
	summary = summarize_text_groq(input_text)
	st.success("✅ Summary:")
	st.write(summary)
	summary_pdf = save_summary_to_pdf(summary)
	st.download_button(
	label="💾 Download Summary as PDF",
	data=summary_pdf,
	file_name="text_summary.pdf",
	mime="application/pdf",
	)
	except Exception as e:
	st.error(f"❌ An error occurred: {e}")
	else:
	st.warning("⚠️ Please enter some text to summarize!")

	# PDF Upload
	with tab2:
	st.subheader("📤 Upload a PDF for Summarization")
	uploaded_pdf = st.file_uploader("Upload PDF", type=["pdf"])
	if uploaded_pdf:
	with st.spinner("Extracting text from PDF..."):
	try:
	extracted_text = extract_text_from_pdf(uploaded_pdf)
	st.success("✅ Text extracted from PDF.")
	st.text_area("📄 Extracted Text:", extracted_text, height=200)
	if st.button("🔍 Summarize PDF"):
	with st.spinner("Summarizing the extracted text..."):
	try:
	summary = summarize_text_groq(extracted_text)
	st.success("✅ PDF Summary:")
	st.write(summary)
	summary_pdf = save_summary_to_pdf(summary)
	st.download_button(
	label="💾 Download Summary PDF",
	data=summary_pdf,
	file_name="summary.pdf",
	mime="application/pdf",
	)
	except Exception as e:
	st.error(f"❌ An error occurred: {e}")
	except RuntimeError as e:
	st.error(f"❌ {e}")

	# Multi-Document Summarizer with RAG Pipeline
	with tab3:
	st.subheader("📤 Upload Multiple PDFs for Summarization")
	uploaded_pdfs = st.file_uploader("Upload PDFs (select multiple files)", type=["pdf"], accept_multiple_files=True)
	if uploaded_pdfs:
	documents = []
	summaries = []
	with st.spinner("Processing your documents..."):
	for uploaded_pdf in uploaded_pdfs:
	try:
	extracted_text = extract_text_from_pdf(uploaded_pdf)
	documents.append(extracted_text)
	st.success(f"✅ Extracted text from: {uploaded_pdf.name}")
	except RuntimeError as e:
	st.error(f"❌ Failed to process {uploaded_pdf.name}: {e}")

	if documents:
	# Create FAISS index from documents
	vectorstore = create_faiss_index(documents)
	retriever = vectorstore.as_retriever()
	qa_chain = create_rag_pipeline(retriever)

	for doc in documents:
	summary = qa_chain.run(doc)
	summaries.append(summary)
	st.subheader("Summary:")
	st.write(summary)

	# Combined summary
	combined_summary = "\n\n".join(summaries)
	summary_pdf = save_summary_to_pdf(combined_summary)
	st.download_button(
	label="💾 Download Combined Summary PDF",
	data=summary_pdf,
	file_name="combined_summary.pdf",
	mime="application/pdf",
	)

	# Chat with Bot
	with tab4:
	st.subheader("🗣️ Chat with the Bot")
	if "messages" not in st.session_state:
	st.session_state.messages = [{"role": "system", "content": "You are a helpful assistant."}]
	for message in st.session_state.messages:
	if message["role"] == "user":
	st.write(f"User: {message['content']}")
	else:
	st.write(f"Bot: {message['content']}")
	user_input = st.text_input("Type your message:", "")
	if st.button("Send Message"):
	if user_input:
	st.session_state.messages.append({"role": "user", "content": user_input})
	with st.spinner("Bot is typing..."):
	try:
	response = client.chat.completions.create(
	messages=st.session_state.messages,
	model="llama-3.3-70b-versatile",
	)
	bot_message = response.choices[0].message.content.strip()
	st.session_state.messages.append({"role": "assistant", "content": bot_message})
	st.write(f"Bot: {bot_message}")
	except Exception as e:
	st.error(f"❌ An error occurred: {e}")
	else:
	st.warning("⚠️ Please enter a message to send!")

	# Webpage Summarizer
	with tab5:
	st.subheader("🌐 Enter a Webpage URL for Summarization")
	url = st.text_input("Enter the webpage URL:")
	if st.button("🔍 Summarize Webpage"):
	if url:
	with st.spinner("Extracting text from webpage..."):
	try:
	extracted_text = extract_text_from_webpage(url)
	st.success("✅ Text extracted from webpage.")
	st.text_area("🌐 Extracted Text:", extracted_text, height=200)
	with st.spinner("Summarizing the extracted text..."):
	try:
	summary = summarize_text_groq(extracted_text)
	st.success("✅ Webpage Summary:")
	st.write(summary)
	summary_pdf = save_summary_to_pdf(summary)
	st.download_button(
	label="💾 Download Summary PDF",
	data=summary_pdf,
	file_name="webpage_summary.pdf",
	mime="application/pdf",
	)
	except Exception as e:
	st.error(f"❌ An error occurred: {e}")
	except RuntimeError as e:
	st.error(f"❌ {e}")
	else:
	st.warning("⚠️ Please enter a valid URL!")