sohampawar1030 commited on
Commit
58c0337
Β·
verified Β·
1 Parent(s): a073cc4

Upload 3 files

Browse files
Files changed (3) hide show
  1. .env +2 -0
  2. requirements.txt +7 -0
  3. summarization_app.py +270 -0
.env ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ GROQ_API_KEY=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
2
+
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit==1.16.0
2
+ groq==0.1.0
3
+ python-dotenv==0.21.1
4
+ PyPDF2==2.11.1
5
+ reportlab==3.6.4
6
+ beautifulsoup4==4.11.1
7
+ requests==2.28.2
summarization_app.py ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from groq import Groq
4
+ from dotenv import load_dotenv
5
+ from PyPDF2 import PdfReader
6
+ from io import BytesIO
7
+ from reportlab.lib.pagesizes import letter
8
+ from reportlab.pdfgen import canvas
9
+ from reportlab.lib.utils import simpleSplit
10
+ from bs4 import BeautifulSoup
11
+ import requests
12
+ from langchain.embeddings import HuggingFaceEmbeddings
13
+ from langchain.vectorstores import FAISS
14
+ from langchain.llms import OpenAI
15
+ from langchain.chains import RetrievalQA
16
+
17
+ load_dotenv()
18
+
19
+ # Initialize Groq API
20
+ client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
21
+
22
+ # Use HuggingFaceEmbeddings for Sentence Transformer model
23
+ embedding_model = "all-MiniLM-L6-v2" # This is the model name, not the actual model object
24
+ embeddings = HuggingFaceEmbeddings(model_name=embedding_model)
25
+
26
+ def summarize_text_groq(input_text, model="llama-3.3-70b-versatile", max_tokens=150):
27
+ try:
28
+ response = client.chat.completions.create(
29
+ messages=[{"role": "system", "content": "You are a helpful assistant."},
30
+ {"role": "user", "content": f"Summarize the following text:\n\n{input_text}"}],
31
+ model=model,
32
+ )
33
+ return response.choices[0].message.content.strip()
34
+ except Exception as e:
35
+ raise RuntimeError(f"API call failed: {e}")
36
+
37
+ def extract_text_from_pdf(uploaded_pdf):
38
+ try:
39
+ pdf_reader = PdfReader(uploaded_pdf)
40
+ if pdf_reader.is_encrypted:
41
+ st.error("❌ The uploaded PDF is encrypted and cannot be processed.")
42
+ return ""
43
+ text = ""
44
+ for page in pdf_reader.pages:
45
+ text += page.extract_text() or ""
46
+ if not text.strip():
47
+ raise RuntimeError("No extractable text found in the PDF.")
48
+ return text
49
+ except Exception as e:
50
+ raise RuntimeError(f"Failed to extract text from PDF: {e}")
51
+
52
+ def save_summary_to_pdf(summary_text):
53
+ try:
54
+ summary_stream = BytesIO()
55
+ c = canvas.Canvas(summary_stream, pagesize=letter)
56
+ width, height = letter
57
+ c.setFont("Helvetica-Bold", 14)
58
+ c.drawString(100, height - 50, "Summary:")
59
+ c.setFont("Helvetica", 10)
60
+ text_margin = 50
61
+ top_margin = height - 80
62
+ bottom_margin = 50
63
+ line_height = 12
64
+ lines = simpleSplit(summary_text, "Helvetica", 10, width - 2 * text_margin)
65
+ y_position = top_margin
66
+ for line in lines:
67
+ if y_position <= bottom_margin:
68
+ c.showPage()
69
+ c.setFont("Helvetica", 10)
70
+ y_position = top_margin
71
+ c.drawString(text_margin, y_position, line)
72
+ y_position -= line_height
73
+ c.save()
74
+ summary_stream.seek(0)
75
+ return summary_stream
76
+ except Exception as e:
77
+ raise RuntimeError(f"Failed to save summary to PDF: {e}")
78
+
79
+ def extract_text_from_webpage(url):
80
+ try:
81
+ response = requests.get(url)
82
+ response.raise_for_status()
83
+ soup = BeautifulSoup(response.content, "html.parser")
84
+ text = soup.get_text(separator="\n", strip=True)
85
+ if not text.strip():
86
+ raise RuntimeError("No extractable text found on the webpage.")
87
+ return text
88
+ except Exception as e:
89
+ raise RuntimeError(f"Failed to extract text from webpage: {e}")
90
+
91
+ # FAISS Index Creation
92
+ def create_faiss_index(documents):
93
+ try:
94
+ # Create vector store using FAISS from the extracted documents
95
+ vectorstore = FAISS.from_texts(documents, embeddings)
96
+ return vectorstore
97
+ except Exception as e:
98
+ raise RuntimeError(f"Failed to create FAISS index: {e}")
99
+
100
+ # RAG Pipeline Creation
101
+ def create_rag_pipeline(retriever):
102
+ try:
103
+ # Use LangChain RetrievalQA for generating answers from the retrieved documents
104
+ qa_chain = RetrievalQA.from_chain_type(
105
+ llm=OpenAI(temperature=0, model="text-davinci-003"),
106
+ chain_type="stuff",
107
+ retriever=retriever
108
+ )
109
+ return qa_chain
110
+ except Exception as e:
111
+ raise RuntimeError(f"Failed to create RAG pipeline: {e}")
112
+
113
+ # Streamlit UI
114
+ st.set_page_config(page_title="Text Summarization App", page_icon="πŸ“š", layout="wide")
115
+ st.title("πŸ“š Text Summarization App with Groq API")
116
+
117
+ tab1, tab2, tab3, tab4, tab5 = st.tabs([
118
+ "Manual Text Input",
119
+ "PDF Upload",
120
+ "πŸ“š Multi-Document Summarizer",
121
+ "πŸ—£οΈ Chat with Bot",
122
+ "🌐 Webpage Summarizer"
123
+ ])
124
+
125
+ # Manual Text Input
126
+ with tab1:
127
+ st.subheader("πŸ“ Enter Your Text")
128
+ input_text = st.text_area("Enter the text to summarize", height=200, max_chars=2000)
129
+ if st.button("πŸ” Summarize Text"):
130
+ if input_text:
131
+ with st.spinner("Summarizing your text..."):
132
+ try:
133
+ summary = summarize_text_groq(input_text)
134
+ st.success("βœ… Summary:")
135
+ st.write(summary)
136
+ summary_pdf = save_summary_to_pdf(summary)
137
+ st.download_button(
138
+ label="πŸ’Ύ Download Summary as PDF",
139
+ data=summary_pdf,
140
+ file_name="text_summary.pdf",
141
+ mime="application/pdf",
142
+ )
143
+ except Exception as e:
144
+ st.error(f"❌ An error occurred: {e}")
145
+ else:
146
+ st.warning("⚠️ Please enter some text to summarize!")
147
+
148
+ # PDF Upload
149
+ with tab2:
150
+ st.subheader("πŸ“€ Upload a PDF for Summarization")
151
+ uploaded_pdf = st.file_uploader("Upload PDF", type=["pdf"])
152
+ if uploaded_pdf:
153
+ with st.spinner("Extracting text from PDF..."):
154
+ try:
155
+ extracted_text = extract_text_from_pdf(uploaded_pdf)
156
+ st.success("βœ… Text extracted from PDF.")
157
+ st.text_area("πŸ“„ Extracted Text:", extracted_text, height=200)
158
+ if st.button("πŸ” Summarize PDF"):
159
+ with st.spinner("Summarizing the extracted text..."):
160
+ try:
161
+ summary = summarize_text_groq(extracted_text)
162
+ st.success("βœ… PDF Summary:")
163
+ st.write(summary)
164
+ summary_pdf = save_summary_to_pdf(summary)
165
+ st.download_button(
166
+ label="πŸ’Ύ Download Summary PDF",
167
+ data=summary_pdf,
168
+ file_name="summary.pdf",
169
+ mime="application/pdf",
170
+ )
171
+ except Exception as e:
172
+ st.error(f"❌ An error occurred: {e}")
173
+ except RuntimeError as e:
174
+ st.error(f"❌ {e}")
175
+
176
+ # Multi-Document Summarizer with RAG Pipeline
177
+ with tab3:
178
+ st.subheader("πŸ“€ Upload Multiple PDFs for Summarization")
179
+ uploaded_pdfs = st.file_uploader("Upload PDFs (select multiple files)", type=["pdf"], accept_multiple_files=True)
180
+ if uploaded_pdfs:
181
+ documents = []
182
+ summaries = []
183
+ with st.spinner("Processing your documents..."):
184
+ for uploaded_pdf in uploaded_pdfs:
185
+ try:
186
+ extracted_text = extract_text_from_pdf(uploaded_pdf)
187
+ documents.append(extracted_text)
188
+ st.success(f"βœ… Extracted text from: {uploaded_pdf.name}")
189
+ except RuntimeError as e:
190
+ st.error(f"❌ Failed to process {uploaded_pdf.name}: {e}")
191
+
192
+ if documents:
193
+ # Create FAISS index from documents
194
+ vectorstore = create_faiss_index(documents)
195
+ retriever = vectorstore.as_retriever()
196
+ qa_chain = create_rag_pipeline(retriever)
197
+
198
+ for doc in documents:
199
+ summary = qa_chain.run(doc)
200
+ summaries.append(summary)
201
+ st.subheader("Summary:")
202
+ st.write(summary)
203
+
204
+ # Combined summary
205
+ combined_summary = "\n\n".join(summaries)
206
+ summary_pdf = save_summary_to_pdf(combined_summary)
207
+ st.download_button(
208
+ label="πŸ’Ύ Download Combined Summary PDF",
209
+ data=summary_pdf,
210
+ file_name="combined_summary.pdf",
211
+ mime="application/pdf",
212
+ )
213
+
214
+ # Chat with Bot
215
+ with tab4:
216
+ st.subheader("πŸ—£οΈ Chat with the Bot")
217
+ if "messages" not in st.session_state:
218
+ st.session_state.messages = [{"role": "system", "content": "You are a helpful assistant."}]
219
+ for message in st.session_state.messages:
220
+ if message["role"] == "user":
221
+ st.write(f"**User**: {message['content']}")
222
+ else:
223
+ st.write(f"**Bot**: {message['content']}")
224
+ user_input = st.text_input("Type your message:", "")
225
+ if st.button("Send Message"):
226
+ if user_input:
227
+ st.session_state.messages.append({"role": "user", "content": user_input})
228
+ with st.spinner("Bot is typing..."):
229
+ try:
230
+ response = client.chat.completions.create(
231
+ messages=st.session_state.messages,
232
+ model="llama-3.3-70b-versatile",
233
+ )
234
+ bot_message = response.choices[0].message.content.strip()
235
+ st.session_state.messages.append({"role": "assistant", "content": bot_message})
236
+ st.write(f"**Bot**: {bot_message}")
237
+ except Exception as e:
238
+ st.error(f"❌ An error occurred: {e}")
239
+ else:
240
+ st.warning("⚠️ Please enter a message to send!")
241
+
242
+ # Webpage Summarizer
243
+ with tab5:
244
+ st.subheader("🌐 Enter a Webpage URL for Summarization")
245
+ url = st.text_input("Enter the webpage URL:")
246
+ if st.button("πŸ” Summarize Webpage"):
247
+ if url:
248
+ with st.spinner("Extracting text from webpage..."):
249
+ try:
250
+ extracted_text = extract_text_from_webpage(url)
251
+ st.success("βœ… Text extracted from webpage.")
252
+ st.text_area("🌐 Extracted Text:", extracted_text, height=200)
253
+ with st.spinner("Summarizing the extracted text..."):
254
+ try:
255
+ summary = summarize_text_groq(extracted_text)
256
+ st.success("βœ… Webpage Summary:")
257
+ st.write(summary)
258
+ summary_pdf = save_summary_to_pdf(summary)
259
+ st.download_button(
260
+ label="πŸ’Ύ Download Summary PDF",
261
+ data=summary_pdf,
262
+ file_name="webpage_summary.pdf",
263
+ mime="application/pdf",
264
+ )
265
+ except Exception as e:
266
+ st.error(f"❌ An error occurred: {e}")
267
+ except RuntimeError as e:
268
+ st.error(f"❌ {e}")
269
+ else:
270
+ st.warning("⚠️ Please enter a valid URL!")