Spaces:
No application file
No application file
| import streamlit as st | |
| import os | |
| from groq import Groq | |
| from dotenv import load_dotenv | |
| from PyPDF2 import PdfReader | |
| from io import BytesIO | |
| from reportlab.lib.pagesizes import letter | |
| from reportlab.pdfgen import canvas | |
| from reportlab.lib.utils import simpleSplit | |
| from bs4 import BeautifulSoup | |
| import requests | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.vectorstores import FAISS | |
| from langchain.llms import OpenAI | |
| from langchain.chains import RetrievalQA | |
| load_dotenv() | |
| # Initialize Groq API | |
| client = Groq(api_key=os.environ.get("GROQ_API_KEY")) | |
| # Use HuggingFaceEmbeddings for Sentence Transformer model | |
| embedding_model = "all-MiniLM-L6-v2" # This is the model name, not the actual model object | |
| embeddings = HuggingFaceEmbeddings(model_name=embedding_model) | |
| def summarize_text_groq(input_text, model="llama-3.3-70b-versatile", max_tokens=150): | |
| try: | |
| response = client.chat.completions.create( | |
| messages=[{"role": "system", "content": "You are a helpful assistant."}, | |
| {"role": "user", "content": f"Summarize the following text:\n\n{input_text}"}], | |
| model=model, | |
| ) | |
| return response.choices[0].message.content.strip() | |
| except Exception as e: | |
| raise RuntimeError(f"API call failed: {e}") | |
| def extract_text_from_pdf(uploaded_pdf): | |
| try: | |
| pdf_reader = PdfReader(uploaded_pdf) | |
| if pdf_reader.is_encrypted: | |
| st.error("β The uploaded PDF is encrypted and cannot be processed.") | |
| return "" | |
| text = "" | |
| for page in pdf_reader.pages: | |
| text += page.extract_text() or "" | |
| if not text.strip(): | |
| raise RuntimeError("No extractable text found in the PDF.") | |
| return text | |
| except Exception as e: | |
| raise RuntimeError(f"Failed to extract text from PDF: {e}") | |
| def save_summary_to_pdf(summary_text): | |
| try: | |
| summary_stream = BytesIO() | |
| c = canvas.Canvas(summary_stream, pagesize=letter) | |
| width, height = letter | |
| c.setFont("Helvetica-Bold", 14) | |
| c.drawString(100, height - 50, "Summary:") | |
| c.setFont("Helvetica", 10) | |
| text_margin = 50 | |
| top_margin = height - 80 | |
| bottom_margin = 50 | |
| line_height = 12 | |
| lines = simpleSplit(summary_text, "Helvetica", 10, width - 2 * text_margin) | |
| y_position = top_margin | |
| for line in lines: | |
| if y_position <= bottom_margin: | |
| c.showPage() | |
| c.setFont("Helvetica", 10) | |
| y_position = top_margin | |
| c.drawString(text_margin, y_position, line) | |
| y_position -= line_height | |
| c.save() | |
| summary_stream.seek(0) | |
| return summary_stream | |
| except Exception as e: | |
| raise RuntimeError(f"Failed to save summary to PDF: {e}") | |
| def extract_text_from_webpage(url): | |
| try: | |
| response = requests.get(url) | |
| response.raise_for_status() | |
| soup = BeautifulSoup(response.content, "html.parser") | |
| text = soup.get_text(separator="\n", strip=True) | |
| if not text.strip(): | |
| raise RuntimeError("No extractable text found on the webpage.") | |
| return text | |
| except Exception as e: | |
| raise RuntimeError(f"Failed to extract text from webpage: {e}") | |
| # FAISS Index Creation | |
| def create_faiss_index(documents): | |
| try: | |
| # Create vector store using FAISS from the extracted documents | |
| vectorstore = FAISS.from_texts(documents, embeddings) | |
| return vectorstore | |
| except Exception as e: | |
| raise RuntimeError(f"Failed to create FAISS index: {e}") | |
| # RAG Pipeline Creation | |
| def create_rag_pipeline(retriever): | |
| try: | |
| # Use LangChain RetrievalQA for generating answers from the retrieved documents | |
| qa_chain = RetrievalQA.from_chain_type( | |
| llm=OpenAI(temperature=0, model="text-davinci-003"), | |
| chain_type="stuff", | |
| retriever=retriever | |
| ) | |
| return qa_chain | |
| except Exception as e: | |
| raise RuntimeError(f"Failed to create RAG pipeline: {e}") | |
| # Streamlit UI | |
| st.set_page_config(page_title="Text Summarization App", page_icon="π", layout="wide") | |
| st.title("π Text Summarization App with Groq API") | |
| tab1, tab2, tab3, tab4, tab5 = st.tabs([ | |
| "Manual Text Input", | |
| "PDF Upload", | |
| "π Multi-Document Summarizer", | |
| "π£οΈ Chat with Bot", | |
| "π Webpage Summarizer" | |
| ]) | |
| # Manual Text Input | |
| with tab1: | |
| st.subheader("π Enter Your Text") | |
| input_text = st.text_area("Enter the text to summarize", height=200, max_chars=2000) | |
| if st.button("π Summarize Text"): | |
| if input_text: | |
| with st.spinner("Summarizing your text..."): | |
| try: | |
| summary = summarize_text_groq(input_text) | |
| st.success("β Summary:") | |
| st.write(summary) | |
| summary_pdf = save_summary_to_pdf(summary) | |
| st.download_button( | |
| label="πΎ Download Summary as PDF", | |
| data=summary_pdf, | |
| file_name="text_summary.pdf", | |
| mime="application/pdf", | |
| ) | |
| except Exception as e: | |
| st.error(f"β An error occurred: {e}") | |
| else: | |
| st.warning("β οΈ Please enter some text to summarize!") | |
| # PDF Upload | |
| with tab2: | |
| st.subheader("π€ Upload a PDF for Summarization") | |
| uploaded_pdf = st.file_uploader("Upload PDF", type=["pdf"]) | |
| if uploaded_pdf: | |
| with st.spinner("Extracting text from PDF..."): | |
| try: | |
| extracted_text = extract_text_from_pdf(uploaded_pdf) | |
| st.success("β Text extracted from PDF.") | |
| st.text_area("π Extracted Text:", extracted_text, height=200) | |
| if st.button("π Summarize PDF"): | |
| with st.spinner("Summarizing the extracted text..."): | |
| try: | |
| summary = summarize_text_groq(extracted_text) | |
| st.success("β PDF Summary:") | |
| st.write(summary) | |
| summary_pdf = save_summary_to_pdf(summary) | |
| st.download_button( | |
| label="πΎ Download Summary PDF", | |
| data=summary_pdf, | |
| file_name="summary.pdf", | |
| mime="application/pdf", | |
| ) | |
| except Exception as e: | |
| st.error(f"β An error occurred: {e}") | |
| except RuntimeError as e: | |
| st.error(f"β {e}") | |
| # Multi-Document Summarizer with RAG Pipeline | |
| with tab3: | |
| st.subheader("π€ Upload Multiple PDFs for Summarization") | |
| uploaded_pdfs = st.file_uploader("Upload PDFs (select multiple files)", type=["pdf"], accept_multiple_files=True) | |
| if uploaded_pdfs: | |
| documents = [] | |
| summaries = [] | |
| with st.spinner("Processing your documents..."): | |
| for uploaded_pdf in uploaded_pdfs: | |
| try: | |
| extracted_text = extract_text_from_pdf(uploaded_pdf) | |
| documents.append(extracted_text) | |
| st.success(f"β Extracted text from: {uploaded_pdf.name}") | |
| except RuntimeError as e: | |
| st.error(f"β Failed to process {uploaded_pdf.name}: {e}") | |
| if documents: | |
| # Create FAISS index from documents | |
| vectorstore = create_faiss_index(documents) | |
| retriever = vectorstore.as_retriever() | |
| qa_chain = create_rag_pipeline(retriever) | |
| for doc in documents: | |
| summary = qa_chain.run(doc) | |
| summaries.append(summary) | |
| st.subheader("Summary:") | |
| st.write(summary) | |
| # Combined summary | |
| combined_summary = "\n\n".join(summaries) | |
| summary_pdf = save_summary_to_pdf(combined_summary) | |
| st.download_button( | |
| label="πΎ Download Combined Summary PDF", | |
| data=summary_pdf, | |
| file_name="combined_summary.pdf", | |
| mime="application/pdf", | |
| ) | |
| # Chat with Bot | |
| with tab4: | |
| st.subheader("π£οΈ Chat with the Bot") | |
| if "messages" not in st.session_state: | |
| st.session_state.messages = [{"role": "system", "content": "You are a helpful assistant."}] | |
| for message in st.session_state.messages: | |
| if message["role"] == "user": | |
| st.write(f"**User**: {message['content']}") | |
| else: | |
| st.write(f"**Bot**: {message['content']}") | |
| user_input = st.text_input("Type your message:", "") | |
| if st.button("Send Message"): | |
| if user_input: | |
| st.session_state.messages.append({"role": "user", "content": user_input}) | |
| with st.spinner("Bot is typing..."): | |
| try: | |
| response = client.chat.completions.create( | |
| messages=st.session_state.messages, | |
| model="llama-3.3-70b-versatile", | |
| ) | |
| bot_message = response.choices[0].message.content.strip() | |
| st.session_state.messages.append({"role": "assistant", "content": bot_message}) | |
| st.write(f"**Bot**: {bot_message}") | |
| except Exception as e: | |
| st.error(f"β An error occurred: {e}") | |
| else: | |
| st.warning("β οΈ Please enter a message to send!") | |
| # Webpage Summarizer | |
| with tab5: | |
| st.subheader("π Enter a Webpage URL for Summarization") | |
| url = st.text_input("Enter the webpage URL:") | |
| if st.button("π Summarize Webpage"): | |
| if url: | |
| with st.spinner("Extracting text from webpage..."): | |
| try: | |
| extracted_text = extract_text_from_webpage(url) | |
| st.success("β Text extracted from webpage.") | |
| st.text_area("π Extracted Text:", extracted_text, height=200) | |
| with st.spinner("Summarizing the extracted text..."): | |
| try: | |
| summary = summarize_text_groq(extracted_text) | |
| st.success("β Webpage Summary:") | |
| st.write(summary) | |
| summary_pdf = save_summary_to_pdf(summary) | |
| st.download_button( | |
| label="πΎ Download Summary PDF", | |
| data=summary_pdf, | |
| file_name="webpage_summary.pdf", | |
| mime="application/pdf", | |
| ) | |
| except Exception as e: | |
| st.error(f"β An error occurred: {e}") | |
| except RuntimeError as e: | |
| st.error(f"β {e}") | |
| else: | |
| st.warning("β οΈ Please enter a valid URL!") | |