Spaces:

sohampawar1030
/

summarization_app

No application file

App Files Files Community

sohampawar1030 commited on Jan 8, 2025

Commit

58c0337

verified ·

1 Parent(s): a073cc4

Upload 3 files

Browse files

Files changed (3) hide show

.env +2 -0
requirements.txt +7 -0
summarization_app.py +270 -0

.env ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ GROQ_API_KEY=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
2	+

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+streamlit==1.16.0
+groq==0.1.0
+python-dotenv==0.21.1
+PyPDF2==2.11.1
+reportlab==3.6.4
+beautifulsoup4==4.11.1
+requests==2.28.2

summarization_app.py ADDED Viewed

	@@ -0,0 +1,270 @@

+import streamlit as st
+import os
+from groq import Groq
+from dotenv import load_dotenv
+from PyPDF2 import PdfReader
+from io import BytesIO
+from reportlab.lib.pagesizes import letter
+from reportlab.pdfgen import canvas
+from reportlab.lib.utils import simpleSplit
+from bs4 import BeautifulSoup
+import requests
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.llms import OpenAI
+from langchain.chains import RetrievalQA
+load_dotenv()
+# Initialize Groq API
+client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
+# Use HuggingFaceEmbeddings for Sentence Transformer model
+embedding_model = "all-MiniLM-L6-v2"  # This is the model name, not the actual model object
+embeddings = HuggingFaceEmbeddings(model_name=embedding_model)
+def summarize_text_groq(input_text, model="llama-3.3-70b-versatile", max_tokens=150):
+    try:
+        response = client.chat.completions.create(
+            messages=[{"role": "system", "content": "You are a helpful assistant."},
+                      {"role": "user", "content": f"Summarize the following text:\n\n{input_text}"}],
+            model=model,
+        )
+        return response.choices[0].message.content.strip()
+    except Exception as e:
+        raise RuntimeError(f"API call failed: {e}")
+def extract_text_from_pdf(uploaded_pdf):
+    try:
+        pdf_reader = PdfReader(uploaded_pdf)
+        if pdf_reader.is_encrypted:
+            st.error("❌ The uploaded PDF is encrypted and cannot be processed.")
+            return ""
+        text = ""
+        for page in pdf_reader.pages:
+            text += page.extract_text() or ""
+        if not text.strip():
+            raise RuntimeError("No extractable text found in the PDF.")
+        return text
+    except Exception as e:
+        raise RuntimeError(f"Failed to extract text from PDF: {e}")
+def save_summary_to_pdf(summary_text):
+    try:
+        summary_stream = BytesIO()
+        c = canvas.Canvas(summary_stream, pagesize=letter)
+        width, height = letter
+        c.setFont("Helvetica-Bold", 14)
+        c.drawString(100, height - 50, "Summary:")
+        c.setFont("Helvetica", 10)
+        text_margin = 50
+        top_margin = height - 80
+        bottom_margin = 50
+        line_height = 12
+        lines = simpleSplit(summary_text, "Helvetica", 10, width - 2 * text_margin)
+        y_position = top_margin
+        for line in lines:
+            if y_position <= bottom_margin:
+                c.showPage()
+                c.setFont("Helvetica", 10)
+                y_position = top_margin
+            c.drawString(text_margin, y_position, line)
+            y_position -= line_height
+        c.save()
+        summary_stream.seek(0)
+        return summary_stream
+    except Exception as e:
+        raise RuntimeError(f"Failed to save summary to PDF: {e}")
+def extract_text_from_webpage(url):
+    try:
+        response = requests.get(url)
+        response.raise_for_status()
+        soup = BeautifulSoup(response.content, "html.parser")
+        text = soup.get_text(separator="\n", strip=True)
+        if not text.strip():
+            raise RuntimeError("No extractable text found on the webpage.")
+        return text
+    except Exception as e:
+        raise RuntimeError(f"Failed to extract text from webpage: {e}")
+# FAISS Index Creation
+def create_faiss_index(documents):
+    try:
+        # Create vector store using FAISS from the extracted documents
+        vectorstore = FAISS.from_texts(documents, embeddings)
+        return vectorstore
+    except Exception as e:
+        raise RuntimeError(f"Failed to create FAISS index: {e}")
+# RAG Pipeline Creation
+def create_rag_pipeline(retriever):
+    try:
+        # Use LangChain RetrievalQA for generating answers from the retrieved documents
+        qa_chain = RetrievalQA.from_chain_type(
+            llm=OpenAI(temperature=0, model="text-davinci-003"),
+            chain_type="stuff",
+            retriever=retriever
+        )
+        return qa_chain
+    except Exception as e:
+        raise RuntimeError(f"Failed to create RAG pipeline: {e}")
+# Streamlit UI
+st.set_page_config(page_title="Text Summarization App", page_icon="📚", layout="wide")
+st.title("📚 Text Summarization App with Groq API")
+tab1, tab2, tab3, tab4, tab5 = st.tabs([
+    "Manual Text Input",
+    "PDF Upload",
+    "📚 Multi-Document Summarizer",
+    "🗣️ Chat with Bot",
+    "🌐 Webpage Summarizer"
+])
+# Manual Text Input
+with tab1:
+    st.subheader("📝 Enter Your Text")
+    input_text = st.text_area("Enter the text to summarize", height=200, max_chars=2000)
+    if st.button("🔍 Summarize Text"):
+        if input_text:
+            with st.spinner("Summarizing your text..."):
+                try:
+                    summary = summarize_text_groq(input_text)
+                    st.success("✅ Summary:")
+                    st.write(summary)
+                    summary_pdf = save_summary_to_pdf(summary)
+                    st.download_button(
+                        label="💾 Download Summary as PDF",
+                        data=summary_pdf,
+                        file_name="text_summary.pdf",
+                        mime="application/pdf",
+                    )
+                except Exception as e:
+                    st.error(f"❌ An error occurred: {e}")
+        else:
+            st.warning("⚠️ Please enter some text to summarize!")
+# PDF Upload
+with tab2:
+    st.subheader("📤 Upload a PDF for Summarization")
+    uploaded_pdf = st.file_uploader("Upload PDF", type=["pdf"])
+    if uploaded_pdf:
+        with st.spinner("Extracting text from PDF..."):
+            try:
+                extracted_text = extract_text_from_pdf(uploaded_pdf)
+                st.success("✅ Text extracted from PDF.")
+                st.text_area("📄 Extracted Text:", extracted_text, height=200)
+                if st.button("🔍 Summarize PDF"):
+                    with st.spinner("Summarizing the extracted text..."):
+                        try:
+                            summary = summarize_text_groq(extracted_text)
+                            st.success("✅ PDF Summary:")
+                            st.write(summary)
+                            summary_pdf = save_summary_to_pdf(summary)
+                            st.download_button(
+                                label="💾 Download Summary PDF",
+                                data=summary_pdf,
+                                file_name="summary.pdf",
+                                mime="application/pdf",
+                            )
+                        except Exception as e:
+                            st.error(f"❌ An error occurred: {e}")
+            except RuntimeError as e:
+                st.error(f"❌ {e}")
+# Multi-Document Summarizer with RAG Pipeline
+with tab3:
+    st.subheader("📤 Upload Multiple PDFs for Summarization")
+    uploaded_pdfs = st.file_uploader("Upload PDFs (select multiple files)", type=["pdf"], accept_multiple_files=True)
+    if uploaded_pdfs:
+        documents = []
+        summaries = []
+        with st.spinner("Processing your documents..."):
+            for uploaded_pdf in uploaded_pdfs:
+                try:
+                    extracted_text = extract_text_from_pdf(uploaded_pdf)
+                    documents.append(extracted_text)
+                    st.success(f"✅ Extracted text from: {uploaded_pdf.name}")
+                except RuntimeError as e:
+                    st.error(f"❌ Failed to process {uploaded_pdf.name}: {e}")
+        if documents:
+            # Create FAISS index from documents
+            vectorstore = create_faiss_index(documents)
+            retriever = vectorstore.as_retriever()
+            qa_chain = create_rag_pipeline(retriever)
+            for doc in documents:
+                summary = qa_chain.run(doc)
+                summaries.append(summary)
+                st.subheader("Summary:")
+                st.write(summary)
+            # Combined summary
+            combined_summary = "\n\n".join(summaries)
+            summary_pdf = save_summary_to_pdf(combined_summary)
+            st.download_button(
+                label="💾 Download Combined Summary PDF",
+                data=summary_pdf,
+                file_name="combined_summary.pdf",
+                mime="application/pdf",
+            )
+# Chat with Bot
+with tab4:
+    st.subheader("🗣️ Chat with the Bot")
+    if "messages" not in st.session_state:
+        st.session_state.messages = [{"role": "system", "content": "You are a helpful assistant."}]
+    for message in st.session_state.messages:
+        if message["role"] == "user":
+            st.write(f"**User**: {message['content']}")
+        else:
+            st.write(f"**Bot**: {message['content']}")
+    user_input = st.text_input("Type your message:", "")
+    if st.button("Send Message"):
+        if user_input:
+            st.session_state.messages.append({"role": "user", "content": user_input})
+            with st.spinner("Bot is typing..."):
+                try:
+                    response = client.chat.completions.create(
+                        messages=st.session_state.messages,
+                        model="llama-3.3-70b-versatile",
+                    )
+                    bot_message = response.choices[0].message.content.strip()
+                    st.session_state.messages.append({"role": "assistant", "content": bot_message})
+                    st.write(f"**Bot**: {bot_message}")
+                except Exception as e:
+                    st.error(f"❌ An error occurred: {e}")
+        else:
+            st.warning("⚠️ Please enter a message to send!")
+# Webpage Summarizer
+with tab5:
+    st.subheader("🌐 Enter a Webpage URL for Summarization")
+    url = st.text_input("Enter the webpage URL:")
+    if st.button("🔍 Summarize Webpage"):
+        if url:
+            with st.spinner("Extracting text from webpage..."):
+                try:
+                    extracted_text = extract_text_from_webpage(url)
+                    st.success("✅ Text extracted from webpage.")
+                    st.text_area("🌐 Extracted Text:", extracted_text, height=200)
+                    with st.spinner("Summarizing the extracted text..."):
+                        try:
+                            summary = summarize_text_groq(extracted_text)
+                            st.success("✅ Webpage Summary:")
+                            st.write(summary)
+                            summary_pdf = save_summary_to_pdf(summary)
+                            st.download_button(
+                                label="💾 Download Summary PDF",
+                                data=summary_pdf,
+                                file_name="webpage_summary.pdf",
+                                mime="application/pdf",
+                            )
+                        except Exception as e:
+                            st.error(f"❌ An error occurred: {e}")
+                except RuntimeError as e:
+                    st.error(f"❌ {e}")
+        else:
+            st.warning("⚠️ Please enter a valid URL!")