nishant43s commited on
Commit
088848a
Β·
verified Β·
1 Parent(s): 32b84b0

Upload 14 files

Browse files
data/.streamlit/config.toml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ [theme]
2
+ base="dark"
3
+ primaryColor="#6643b5"
4
+ backgroundColor="#0E1117"
5
+ textColor="#FAFAFA"
6
+ secondaryBackgroundColor="#262730"
data/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from functions import *
2
+ import lxml
3
+ import lxml_html_clean
data/__pycache__/functions.cpython-312.pyc ADDED
Binary file (4.75 kB). View file
 
data/about_app.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ st.set_page_config(
4
+ layout="wide",
5
+ initial_sidebar_state="collapsed"
6
+ )
7
+
8
+ ### insert external css
9
+ def insert_css(css_file:str):
10
+ with open(css_file) as f:
11
+ st.markdown(f"<style>{f.read()}</style>",unsafe_allow_html=True)
12
+
13
+ # app settings css
14
+ insert_css("css_files/app.css")
15
+
16
+ ### insert external html file
17
+ def insert_html(html_file):
18
+ with open(html_file) as f:
19
+ return f.read()
20
+
21
+ st.markdown(insert_html("html_files/about.html"),unsafe_allow_html=True)
data/animation/particles.html ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ <script src="https://cdn.jsdelivr.net/npm/particles.js@2.0.0/particles.min.js"></script>
3
+ <script>
4
+ document.addEventListener("DOMContentLoaded", function() {
5
+ particlesJS('particles-js', {
6
+ "particles": {
7
+ "number": {
8
+ "value": 80,
9
+ "density": {
10
+ "enable": true,
11
+ "value_area": 800
12
+ }
13
+ },
14
+ "color": {
15
+ "value": "#ffffff"
16
+ },
17
+ "shape": {
18
+ "type": "circle",
19
+ "stroke": {
20
+ "width": 0,
21
+ "color": "#000000"
22
+ }
23
+ },
24
+ "opacity": {
25
+ "value": 0.5,
26
+ "random": false,
27
+ "anim": {
28
+ "enable": false,
29
+ "speed": 1,
30
+ "opacity_min": 0.1,
31
+ "sync": false
32
+ }
33
+ },
34
+ "size": {
35
+ "value": 3,
36
+ "random": true,
37
+ "anim": {
38
+ "enable": false,
39
+ "speed": 40,
40
+ "size_min": 0.1,
41
+ "sync": false
42
+ }
43
+ },
44
+ "line_linked": {
45
+ "enable": true,
46
+ "distance": 150,
47
+ "color": "#ffffff",
48
+ "opacity": 0.4,
49
+ "width": 1
50
+ },
51
+ "move": {
52
+ "enable": true,
53
+ "speed": 6,
54
+ "direction": "none",
55
+ "random": false,
56
+ "straight": false,
57
+ "out_mode": "out",
58
+ "bounce": false,
59
+ "attract": {
60
+ "enable": false,
61
+ "rotateX": 600,
62
+ "rotateY": 1200
63
+ }
64
+ }
65
+ },
66
+ "interactivity": {
67
+ "detect_on": "canvas",
68
+ "events": {
69
+ "onhover": {
70
+ "enable": true,
71
+ "mode": "repulse"
72
+ },
73
+ "onclick": {
74
+ "enable": true,
75
+ "mode": "push"
76
+ },
77
+ "resize": true
78
+ }
79
+ },
80
+ "retina_detect": true
81
+ });
82
+ });
83
+ </script>
84
+ <div id="particles-js" style="position: absolute; width: 100%; height: 100%;"></div>
85
+
86
+
87
+
88
+
data/app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ ### page setup
4
+
5
+ web_qna = st.Page(
6
+ page="webscraper.py",
7
+ title="web Scraper",
8
+ icon=":material/globe:",
9
+ default=True
10
+ )
11
+
12
+ text_qna = st.Page(
13
+ page="text.py",
14
+ title="Text Q&A , Summarizer",
15
+ icon=":material/description:",
16
+ )
17
+
18
+ document_qna = st.Page(
19
+ page="document.py",
20
+ title="Document Q&A , Summarizer",
21
+ icon=":material/picture_as_pdf:",
22
+ )
23
+
24
+
25
+
26
+ about_app = st.Page(
27
+ page="about_app.py",
28
+ title="About App",
29
+ icon=":material/person:"
30
+ )
31
+
32
+ pg = st.navigation(
33
+ pages=[web_qna,text_qna,document_qna,about_app],
34
+ expanded=False,position="sidebar"
35
+ )
36
+ pg.run()
37
+
38
+ app_sidebar = st.sidebar
39
+
40
+ with app_sidebar:
41
+
42
+ # project Link
43
+ st.link_button(
44
+ label="Project Link",
45
+ url="https://github.com/Nishant43S/Gen-Ai-Summarizer-Question-Answering-App.git",
46
+ icon=":material/code_off:",
47
+ use_container_width=True
48
+ )
49
+
50
+ ### insert external css
51
+ def insert_css(css_file:str):
52
+ with open(css_file) as f:
53
+ st.markdown(f"<style>{f.read()}</style>",unsafe_allow_html=True)
54
+
55
+ # app settings css
56
+ insert_css("css_files/app.css")
data/css_files/app.css ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *{
2
+ scrollbar-width: none;
3
+ }
4
+
5
+ /* main app css */
6
+ [class="stAppViewMain main st-emotion-cache-bm2z3a ea3mdgi8"]{
7
+ background: #0f1116;
8
+ }
9
+
10
+ [class="st-emotion-cache-luriig ezrtsby2"]{
11
+ background: #0f1116;
12
+ }
13
+
14
+ [data-testid="stBaseButton-header"]{
15
+ visibility: hidden;
16
+ }
17
+
18
+ ::selection{
19
+ background: #421b9b;
20
+ color: whitesmoke;
21
+ }
22
+
data/data/Nishant Maity Latest.pdf ADDED
Binary file (82.7 kB). View file
 
data/document.py ADDED
@@ -0,0 +1,320 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from functions import *
3
+ from transformers import pipeline
4
+ from pdfminer.high_level import extract_text
5
+ import os
6
+ import PyPDF2
7
+ import base64
8
+
9
+
10
+
11
+ #### chatbot function
12
+
13
+ def Chat_Bot(text_input,Best_size,max_answer_length):
14
+ st.markdown(
15
+ """
16
+ <style>
17
+ /* Fix the chat input box at the bottom */
18
+ div[data-testid="stChatInput"] {
19
+ position: fixed;
20
+ bottom: 0;
21
+ margin-bottom: 36px;
22
+
23
+ }
24
+ </style>
25
+ """,
26
+ unsafe_allow_html=True
27
+ )
28
+ # Load the Question Answering model
29
+ qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
30
+
31
+ # Initialize session state for chat history
32
+ if "messages" not in st.session_state:
33
+ st.session_state.messages = []
34
+
35
+
36
+ # User inputs context
37
+ context = text_input
38
+
39
+ # Display chat history
40
+ for message in st.session_state.messages:
41
+ with st.chat_message(message["role"]):
42
+ st.markdown(message["content"])
43
+
44
+ if context:
45
+ user_input = st.chat_input("πŸ’¬ Ask a question based on the context:")
46
+ if user_input:
47
+ with st.chat_message("user"):
48
+ st.markdown(user_input)
49
+
50
+ st.session_state.messages.append({"role": "user", "content": user_input})
51
+
52
+ with st.spinner("πŸ€” Thinking..."):
53
+ response = qa_pipeline(
54
+ {"question": user_input, "context": context},
55
+ max_answer_len=max_answer_length, n_best_size=Best_size
56
+ )
57
+ answer = response["answer"]
58
+
59
+ with st.chat_message("assistant"):
60
+ st.markdown(f"{answer}")
61
+
62
+ st.session_state.messages.append({"role": "assistant", "content": f"{answer}"})
63
+
64
+ # Clear chat history button
65
+ if st.button("πŸ—‘οΈ Clear Chat"):
66
+ st.session_state.messages = []
67
+ st.rerun()
68
+
69
+
70
+ # page settings
71
+ st.set_page_config(
72
+ layout="wide",
73
+ initial_sidebar_state="collapsed"
74
+ )
75
+
76
+ ### insert external css
77
+ def insert_css(css_file:str):
78
+ with open(css_file) as f:
79
+ st.markdown(f"<style>{f.read()}</style>",unsafe_allow_html=True)
80
+
81
+ # app settings css
82
+ insert_css("css_files/app.css")
83
+
84
+ def extract_pdf_text(pdf_file):
85
+ """Extracts text from a PDF file."""
86
+ return extract_text(pdf_file)
87
+
88
+
89
+
90
+ #### displaying uploaded pdf file
91
+ def display_pdf_file(uploaded_file):
92
+ """
93
+ it is used to display the
94
+ file on screen
95
+ """
96
+ #### saving the uploaded file
97
+ def save_uploadfile(save_file):
98
+ with open(os.path.join("data",save_file.name),"wb") as f:
99
+ f.write(save_file.getbuffer())
100
+ return st.toast("file uploaded: {}".format(save_file.name))
101
+
102
+ try:
103
+ ### display pdf on screen
104
+ def displayPDF(pdf_file):
105
+ with open(pdf_file,"rb") as f:
106
+ base64_pdf = base64.b64encode(f.read()).decode("utf-8")
107
+
108
+ pdf_display = f"""
109
+ <iframe
110
+ src="data:application/pdf;base64,{base64_pdf}"
111
+ width="950" height="1000"
112
+ type="application/pdf"
113
+ >
114
+ </iframe>
115
+ """
116
+
117
+ st.markdown(pdf_display,unsafe_allow_html=True)
118
+
119
+ ### save and display file
120
+ save_uploadfile(uploaded_file)
121
+ pdf_file = "data/"+uploaded_file.name
122
+ displayPDF(pdf_file)
123
+ except Exception as e:
124
+ st.warning("Something Went wrong...\n\n",e,icon="⚠️")
125
+
126
+
127
+ # --- PDF Page Text Extractor Function ---
128
+ def extract_text_from_pdf(pdf_file, page_num):
129
+ try:
130
+ reader = PyPDF2.PdfReader(pdf_file)
131
+ total_pages = len(reader.pages)
132
+ if 1 <= page_num <= total_pages:
133
+ page = reader.pages[page_num - 1] # Adjusting for 0-based index
134
+ text = page.extract_text()
135
+ return text, total_pages
136
+ else:
137
+ return None, total_pages
138
+ except Exception as e:
139
+ st.error(f"Error extracting text: {e}")
140
+ return None, 0
141
+
142
+ def pdf_Summarizer(file):
143
+ Display_col, Summarizer_col = st.tabs(["Pdf Display","PDF Summarizer"])
144
+ with Display_col:
145
+ display_pdf_file(file)
146
+ with Summarizer_col:
147
+ temp_reader = PyPDF2.PdfReader(file)
148
+ total_pages = len(temp_reader.pages)
149
+ st.write(f"### Total Pages: {total_pages}")
150
+
151
+ ## columns
152
+ Input_col = st.columns([4,10])
153
+ with Input_col[0]:
154
+ page_number = st.number_input(
155
+ "Select page number",
156
+ min_value=1, max_value=total_pages,
157
+ value=1, step=1)
158
+ st.write("Page Number {}".format(page_number))
159
+ text, _ = extract_text_from_pdf(file, page_number)
160
+ return text
161
+
162
+
163
+ app_sidebar = st.sidebar
164
+
165
+ with app_sidebar:
166
+ select_mode = st.selectbox(
167
+ label="Select Mode",
168
+ options=["Summarizer","Que/Ans"],
169
+ key="mode selector",
170
+ index=0
171
+ )
172
+
173
+ if select_mode == "Que/Ans":
174
+ st.write("### Que/Ans Settings")
175
+
176
+ max_answer_length = st.slider(
177
+ label="Max answer",
178
+ min_value=1,
179
+ max_value=10,
180
+ key="max answer",
181
+ value=4
182
+ )
183
+
184
+ max_answer_length = max_answer_length*10
185
+
186
+ Best_size_ = st.slider(
187
+ label="n best size",
188
+ min_value=1,
189
+ max_value=10,
190
+ key="best size",
191
+ value=5
192
+ )
193
+
194
+ def Summarizer_Model(context,Max_Length):
195
+ try:
196
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
197
+ Summary = summarizer(
198
+ context,
199
+ max_length=Max_Length+20,
200
+ min_length=Max_Length,
201
+ do_sample=False
202
+ )
203
+ return Summary[0]['summary_text']
204
+
205
+ except Exception as e:
206
+ st.warning(f"Error...\n{e}",icon="⚠️")
207
+
208
+ app_col = st.columns([2,8,2])
209
+
210
+ with app_col[1]:
211
+
212
+ if select_mode == "Summarizer":
213
+ st.write("## πŸ“‘ Document Summarizer")
214
+ elif select_mode == "Que/Ans":
215
+ st.write("## πŸ“‘ Document Question Answering")
216
+
217
+ ### question answering
218
+ que_col = st.columns([2,8,2])
219
+
220
+ with que_col[1]:
221
+ if select_mode == "Que/Ans":
222
+ ## input file
223
+ File_input = st.file_uploader(
224
+ label="Drop Your File hear",
225
+ type=["txt", "pdf"],
226
+ key="file uploader"
227
+ )
228
+
229
+ if File_input is not None:
230
+ if File_input.type == "text/plain":
231
+ text = File_input.read().decode("utf-8")
232
+ Chat_Bot(
233
+ text_input=Text_Cleaning(text),
234
+ Best_size=Best_size_,
235
+ max_answer_length=max_answer_length
236
+ )
237
+ else:
238
+ text = extract_pdf_text(File_input)
239
+ Chat_Bot(
240
+ text_input=Text_Cleaning(text),
241
+ Best_size=Best_size_,
242
+ max_answer_length=max_answer_length
243
+ )
244
+
245
+ # session state
246
+ if 'input_text' not in st.session_state:
247
+ st.session_state.input_text = []
248
+
249
+ if 'pdf_text' not in st.session_state:
250
+ st.session_state.pdf_text = []
251
+
252
+ if 'summary_text' not in st.session_state:
253
+ st.session_state.summary_text = []
254
+
255
+ summ_col = st.columns([2,8,2])
256
+
257
+ with summ_col[1]:
258
+ if select_mode == "Summarizer":
259
+ ## input file
260
+ File_input = st.file_uploader(
261
+ label="Drop Your File hear",
262
+ type=["txt", "pdf"],
263
+ key="file uploader"
264
+ )
265
+ if File_input is not None:
266
+ if File_input.type == "text/plain":
267
+ text = File_input.read().decode("utf-8")
268
+ st.session_state.input_text = []
269
+ st.session_state.input_text = st.text_area(label="Uploaded document Text",value=Text_Cleaning(text),height=200)
270
+ Text_input = Text_Cleaning(st.session_state.input_text)
271
+ value_func = lambda x: x * 0.3
272
+ max_length = st.slider(
273
+ label="Max Length",min_value=1,
274
+ max_value=len(st.session_state.input_text.split()),
275
+ value=int(value_func(len(st.session_state.input_text.split())))
276
+ )
277
+
278
+ if st.button(label="πŸ“„ Generate Summary"):
279
+ with st.spinner("Generating Summary"):
280
+
281
+ Generated_Summary = Summarizer_Model(context=Text_input,Max_Length=max_length)
282
+ st.write(Generated_Summary)
283
+ Copy_Text(Generated_Summary)
284
+
285
+ else:
286
+ st.session_state.pdf_text = []
287
+ st.session_state.summary_text = []
288
+ st.session_state.pdf_text = pdf_Summarizer(File_input)
289
+
290
+ ## text area
291
+ Text_Area_Input = st.text_area(
292
+ "Pdf Text",value=Text_Cleaning(st.session_state.pdf_text),
293
+ key="text area",height=450
294
+ )
295
+
296
+ value_func = lambda x: x * 0.3
297
+ Max_Pdf_Summary_len = st.slider(
298
+ label="MAx Length",
299
+ min_value=1,
300
+ max_value=len(Text_Area_Input.split()),
301
+ value=int(value_func(len(Text_Area_Input.split()))),
302
+ key="pdf summarizer Slider"
303
+ )
304
+
305
+ if st.button("πŸ“‘ Generate Summary",key="pdf Summary"):
306
+ # generating summary
307
+ with st.spinner("Generating Summary"):
308
+ ## initilizing model
309
+ st.session_state.summary_text = Summarizer_Model(
310
+ context=Text_Area_Input,Max_Length=Max_Pdf_Summary_len
311
+ )
312
+
313
+ st.write(st.session_state.summary_text)
314
+ Copy_Text(st.session_state.summary_text)
315
+
316
+
317
+
318
+
319
+
320
+
data/functions.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # creating function file
2
+ import streamlit as st
3
+ import requests
4
+ from bs4 import BeautifulSoup
5
+ import re
6
+ from cleantext import clean
7
+ import streamlit.components.v1 as component
8
+
9
+ def Copy_Text(text):
10
+ """
11
+ copy button to copy text
12
+ """
13
+ Html_Code = f"""
14
+ <!DOCTYPE html>
15
+ <html lang="en">
16
+ <head>
17
+ <meta charset="UTF-8">
18
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
19
+ <title>Copy Button</title>
20
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/all.min.css">
21
+ <style>
22
+ p {{
23
+ font-size: 18px;
24
+ margin-bottom: 10px;
25
+ }}
26
+ .copy-link {{
27
+ color: #6643b5;
28
+ text-decoration: none;
29
+ margin-top: 32px;
30
+ margin-left: 13px;
31
+ font-size: 20px;
32
+ display: inline-flex;
33
+ align-items: center;
34
+ gap: 5px;
35
+ position: relative;
36
+ transition: background 0.3s;
37
+ cursor: pointer;
38
+ }}
39
+ .copy-link:hover {{
40
+ color: #8594e4;
41
+ }}
42
+ .tooltip {{
43
+ position: absolute;
44
+ top: -30px;
45
+ left: 50%;
46
+ transform: translateX(-50%);
47
+ background: black;
48
+ color: white;
49
+ padding: 5px 10px;
50
+ font-size: 12px;
51
+ border-radius: 5px;
52
+ opacity: 0;
53
+ transition: opacity 0.3s, transform 0.3s;
54
+ }}
55
+ .show-tooltip {{
56
+ opacity: 1;
57
+ transform: translate(-50%, -10px);
58
+ }}
59
+ </style>
60
+ </head>
61
+ <body>
62
+ <div class="container">
63
+ <a href="#" class="copy-link" onclick="copyText(event)">
64
+ <i class="fa-regular fa-copy"></i>
65
+ <span class="tooltip" id="tooltip">Copied!</span>
66
+ </a>
67
+ <br>
68
+ <br>
69
+ <p id="text">{text}</p>
70
+ </div>
71
+ <script>
72
+ function copyText(event) {{
73
+ event.preventDefault();
74
+ const text = document.getElementById("text").innerText;
75
+ const textarea = document.createElement("textarea");
76
+ textarea.value = text;
77
+ document.body.appendChild(textarea);
78
+ textarea.select();
79
+ document.execCommand("copy");
80
+ document.body.removeChild(textarea);
81
+
82
+ const tooltip = document.getElementById("tooltip");
83
+ tooltip.classList.add("show-tooltip");
84
+ setTimeout(() => {{
85
+ tooltip.classList.remove("show-tooltip");
86
+ }}, 1000);
87
+ }}
88
+ </script>
89
+ </body>
90
+ </html>
91
+ """
92
+ component.html(Html_Code,height=60,width=60)
93
+
94
+
95
+
96
+
97
+ def scrape_paragraphs(url, num_paragraphs):
98
+ try:
99
+ response = requests.get(url)
100
+ if response.status_code != 200:
101
+ return []
102
+ soup = BeautifulSoup(response.text, 'lxml')
103
+ paragraphs = [p.get_text() for p in soup.find_all('p')[:num_paragraphs]]
104
+ return paragraphs
105
+ except Exception as e:
106
+ st.warning(f"Error...\n{e}",icon="⚠️")
107
+ return []
108
+
109
+
110
+ ### text cleaning
111
+ def Text_Cleaning(text:str)->str:
112
+ """
113
+ this function gives clean
114
+ text of the paragraphs , etc
115
+ which makes easy to understand of the text
116
+ """
117
+ pattern = r'[`^]'
118
+ cleaned_paragraph = re.sub(pattern, '', text)
119
+
120
+ clean_text = clean(
121
+ text=cleaned_paragraph,fix_unicode=True,
122
+ to_ascii=True,
123
+ no_line_breaks=False,
124
+ keep_two_line_breaks=True
125
+ )
126
+
127
+ pattern = r'\[\d+\]'
128
+ cleaned_text_output = re.sub(pattern, '', clean_text)
129
+ return cleaned_text_output
data/html_files/about.html ADDED
@@ -0,0 +1,526 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+
4
+ <head>
5
+ <meta charset="UTF-8">
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
+ <title>About Project</title>
8
+ <!-- <link rel="stylesheet" href="style.css"> -->
9
+ <!-- link for poppins font -->
10
+ <link rel="preconnect" href="https://fonts.googleapis.com">
11
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
12
+ <link
13
+ href="https://fonts.googleapis.com/css2?family=Poppins:ital,wght@0,100;0,200;0,300;0,400;0,500;0,600;0,700;0,800;0,900;1,100;1,200;1,300;1,400;1,500;1,600;1,700;1,800;1,900&display=swap"
14
+ rel="stylesheet">
15
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.7.2/css/all.min.css"
16
+ integrity="sha512-Evv84Mr4kqVGRNSgIGL/F/aIDqQb7xQ2vcrdIwxfjThSH8CSR7PBEakCr51Ck+w+/U6swU2Im1vVX0SVk9ABhg=="
17
+ crossorigin="anonymous" referrerpolicy="no-referrer" />
18
+ <!-- Internal CSS -->
19
+ <style>
20
+ * {
21
+ margin: 0;
22
+ padding: 0;
23
+ box-sizing: border-box;
24
+ /* cursor: pointer; */
25
+ }
26
+ body {
27
+ font-family: 'Poppins', sans-serif;
28
+ background-color: rgb(14, 17, 23);
29
+ color: rgb(250, 250, 250);
30
+ line-height: 1.6;
31
+ }
32
+ .main-container {
33
+ max-width: 900px;
34
+ margin: 0 auto;
35
+ padding: 20px;
36
+ }
37
+ /* ------ Header Section -------*/
38
+ header {
39
+ /* text-align: center; */
40
+ margin-bottom: 30px;
41
+ }
42
+ .hover {
43
+ text-shadow: 0 0 20px rgba(98, 162, 250, 0.4);
44
+ }
45
+ header h1 {
46
+ font-size: 3em;
47
+ color: rgb(98, 162, 250);
48
+ cursor: pointer;
49
+ }
50
+ header h2 {
51
+ font-size: 2.5em;
52
+ color: white;
53
+ cursor: pointer;
54
+ }
55
+ header h2:hover {
56
+ text-shadow: 0 0 20px rgba(255, 255, 255, 0.4);
57
+ }
58
+ /* ------ About Section ------ */
59
+ .about {
60
+ position: relative;
61
+ background-color: rgb(24, 28, 38);
62
+ padding: 20px;
63
+ border-radius: 15px;
64
+ margin-bottom: 40px;
65
+ box-shadow: 0 8px 15px rgba(0, 0, 0, 0.4);
66
+ cursor: pointer;
67
+ /* width: 500px; */
68
+ }
69
+ .about:hover {
70
+ transform: scale(1.02);
71
+ border: 2px solid rgb(98, 162, 250);
72
+ }
73
+ .about h3 {
74
+ font-size: 1.8em;
75
+ margin-bottom: 15px;
76
+ color: rgb(98, 162, 250);
77
+ }
78
+ .about p,
79
+ .about ul {
80
+ font-size: 1.1em;
81
+ margin-bottom: 15px;
82
+ }
83
+ .about ul {
84
+ padding-left: 20px;
85
+ list-style-type: disc;
86
+ }
87
+ /* ------ Technologies Section ------ */
88
+ .technologies {
89
+ margin-top: 30px;
90
+ }
91
+ .technologies h2 {
92
+ font-size: 2em;
93
+ margin-bottom: 20px;
94
+ color: rgb(98, 162, 250);
95
+ cursor: pointer;
96
+ }
97
+ .technologies h2:hover {
98
+ /* text-decoration: underline; */
99
+ text-shadow: 0 0 20px rgba(98, 162, 250, 0.4);
100
+ }
101
+ .row {
102
+ display: flex;
103
+ justify-content: space-around;
104
+ align-items: center;
105
+ }
106
+ .tech-card {
107
+ align-items: center;
108
+ /* width: 30%;
109
+ height: 300px; */
110
+ width: 43%;
111
+ height: 255px;
112
+ /* background-color: rgb(46, 84, 182); */
113
+ background-color: rgb(24, 28, 38);
114
+ padding: 15px;
115
+ border-radius: 15px;
116
+ box-shadow: 0 8px 15px rgba(0, 0, 0, 0.4);
117
+ margin-bottom: 20px;
118
+ position: relative;
119
+ overflow: hidden;
120
+ transition: transform 0.3s ease;
121
+ cursor: pointer;
122
+ }
123
+ .tech-card h4 {
124
+ color: rgb(98, 162, 250);
125
+ font-size: 1.3rem;
126
+ text-align: center;
127
+ margin-bottom: 5px;
128
+ }
129
+ /*
130
+ .tech-card > p {
131
+ text-align: justify;
132
+ }
133
+ */
134
+ .tech-card:hover {
135
+ transform: scale(1.02);
136
+ }
137
+ .tech-card::before {
138
+ content: '';
139
+ position: absolute;
140
+ top: 0;
141
+ left: 0;
142
+ width: 100%;
143
+ height: 100%;
144
+ background: rgba(24, 28, 38, 0.4);
145
+ /* blur color */
146
+ filter: blur(20px);
147
+ z-index: -1;
148
+ opacity: 0;
149
+ transition: opacity 0.3s ease;
150
+ }
151
+ .tech-card:hover::before {
152
+ opacity: 1;
153
+ }
154
+ @keyframes glow {
155
+ 0% {
156
+ box-shadow: 0 0 20px rgba(24, 28, 38, 0.4), 0 0 40px rgba(24, 28, 38, 0.3), 0 0 60px rgba(24, 28, 38, 0.2);
157
+ }
158
+
159
+ 100% {
160
+ box-shadow: 0 0 20px rgba(24, 28, 38, 0.4), 0 0 40px rgba(24, 28, 38, 0.3), 0 0 60px rgba(24, 28, 38, 0.2);
161
+ }
162
+ }
163
+ .tech-card:hover {
164
+ animation: glow 1.5s infinite ease-in-out;
165
+ }
166
+ /* ------ Developers Section ------ */
167
+ .developers {
168
+ margin-top: 30px;
169
+ }
170
+ .developers h2 {
171
+ font-size: 2em;
172
+ margin-bottom: 20px;
173
+ color: rgb(98, 162, 250);
174
+ cursor: pointer;
175
+ }
176
+ .developers h2:hover {
177
+ text-shadow: 0 0 20px rgba(98, 162, 250, 0.4);
178
+ }
179
+ .developer-card {
180
+ display: flex;
181
+ align-items: center;
182
+ background-color: rgb(24, 28, 38);
183
+ padding: 15px;
184
+ border-radius: 15px;
185
+ box-shadow: 0 8px 15px rgba(0, 0, 0, 0.4);
186
+ margin-bottom: 20px;
187
+ }
188
+ .profile img {
189
+ width: 100px;
190
+ height: 100px;
191
+ border-radius: 50%;
192
+ border: 2px solid rgb(98, 162, 250);
193
+ }
194
+ .details {
195
+ margin-left: 20px;
196
+ display: flex;
197
+ width: 80%;
198
+ margin: auto;
199
+ justify-content: space-between;
200
+ }
201
+ .details p {
202
+ font-size: 1.2em;
203
+ margin-bottom: 10px;
204
+ }
205
+ .developer {
206
+ display: inline-block;
207
+ width: 200px;
208
+ }
209
+ .developer-name {
210
+ cursor: pointer;
211
+ width: 200px;
212
+ }
213
+ .developer-name:hover {
214
+ text-shadow: 0 0 20px rgba(255, 255, 255, 0.4);
215
+ }
216
+ .social-icons {
217
+ display: flex;
218
+
219
+ }
220
+ .social-icons a {
221
+ text-decoration: none;
222
+ margin: 0px 15px;
223
+ }
224
+ .fa-brands.fa-linkedin.fa-2xl {
225
+ color: gray
226
+ }
227
+ .fa-brands.fa-linkedin.fa-2xl:hover {
228
+ color: rgb(98, 162, 250);
229
+ text-shadow: 0 0 20px rgba(98, 162, 250, 0.9);
230
+ }
231
+ .fa-brands.fa-github.fa-2xl {
232
+ color: gray;
233
+ }
234
+ .fa-brands.fa-github.fa-2xl:hover {
235
+ /* color: rgb(98, 162, 250); */
236
+ color: rgb(255, 255, 255);
237
+ text-shadow: 0 0 20px rgba(98, 162, 250, 0.9);
238
+ }
239
+ /* ----------- Media Query Section ------------ */
240
+ @media screen and (max-width: 768px) {
241
+ .main-container {
242
+ max-width: 600px;
243
+ margin: 0 auto;
244
+ padding: 20px;
245
+ }
246
+ .row {
247
+ display: block;
248
+ }
249
+ .tech-card {
250
+ margin: auto;
251
+ width: 100%;
252
+ bottom: 10px;
253
+ margin-bottom: 20px;
254
+ height: 215px;
255
+ }
256
+ }
257
+ @media screen and (max-width: 600px) {
258
+ .about p,
259
+ .about ul {
260
+ font-size: 16px;
261
+ margin-bottom: 10px;
262
+ }
263
+ }
264
+ @media screen and (max-width: 500px) {
265
+ /* .main-container {
266
+ max-width: 600px;
267
+ margin: 0 auto;
268
+ padding: 20px;
269
+ } */
270
+ header h1 {
271
+ font-size: 35px;
272
+ }
273
+ header h2 {
274
+ font-size: 28px;
275
+ }
276
+ .about h3 {
277
+ font-size: 1.5em;
278
+ }
279
+ .technologies h2 {
280
+ font-size: 1.5em;
281
+ }
282
+ .developers h2 {
283
+ font-size: 1.5em;
284
+ }
285
+ .details p {
286
+ font-size: 17px;
287
+ margin-bottom: 10px;
288
+ display: block;
289
+ }
290
+ .developer {
291
+ width: 50%;
292
+ margin: auto;
293
+ display: block;
294
+ margin-top: 10px;
295
+ }
296
+ }
297
+ @media screen and (max-width: 420px) {
298
+ .about p,
299
+ .about ul {
300
+ font-size: 15px;
301
+ }
302
+ p {
303
+ font-size: 15px;
304
+ }
305
+ header h1 {
306
+ font-size: 28px;
307
+ }
308
+ header h2 {
309
+ font-size: 24px;
310
+ }
311
+ .about h3 {
312
+ font-size: 1.2em;
313
+ }
314
+ .technologies h2 {
315
+ font-size: 1.2em;
316
+ }
317
+ .developers h2 {
318
+ font-size: 1.2em;
319
+ }
320
+ .details {
321
+ display: block;
322
+ }
323
+ }
324
+ @media screen and (max-width: 375px) {
325
+ header h1 {
326
+ font-size: 24px;
327
+ }
328
+ header h2 {
329
+ font-size: 20px;
330
+ }
331
+ .about h3 {
332
+ font-size: 1.1em;
333
+ }
334
+ .technologies h2 {
335
+ font-size: 1.1em;
336
+ }
337
+ .developers h2 {
338
+ font-size: 1.1em;
339
+ }
340
+ .details {
341
+ display: block;
342
+ }
343
+ }
344
+ </style>
345
+ </head>
346
+ <body>
347
+ <!-- Main Container -->
348
+ <div class="main-container">
349
+ <!-- Header Section -->
350
+ <header>
351
+ <h1 class="hover">About Project</h1>
352
+ </header>
353
+ <!-- About Project Section -->
354
+ <section class="about">
355
+ <p>
356
+ In the digital era, information overload is a major challenge. Our <strong>AI for Smart Document
357
+ Summarization
358
+ and Automated Question Generation </strong> aims to simplify content processing by leveraging
359
+ advanced natural
360
+ language processing (NLP) techniques.
361
+ This project is designed for students, researchers, content creators, and professionals who need quick
362
+ insights from large volumes of data. By automating summarization and question generation, our AI
363
+ solution enhances productivity and knowledge retention effortlessly.
364
+ </p>
365
+ <p><strong>How It Works:</strong></p>
366
+ <ul>
367
+ <li>
368
+ Input – Paste text, enter a URL, or upload a document.
369
+ </li>
370
+ <li>
371
+ Processing – The AI extracts essential insights, removes redundancies, and formulates a structured
372
+ summary.
373
+ </li>
374
+ <li>
375
+ Output – Receive a well-structured summary and a set of AI-generated questions for further analysis.
376
+ </li>
377
+ </ul>
378
+ <p>
379
+ <p><strong>Key features include:</strong></p>
380
+ <ul>
381
+ <li>
382
+ Smart Summarization – Input any text, URL, or document, and our AI-powered system generates a
383
+ concise and meaningful summary.
384
+ </li>
385
+ <li>
386
+ Automated Question Generation – Transform lengthy articles into relevant questions, making it ideal
387
+ for study materials, research, and knowledge assessment.
388
+ </li>
389
+ </ul>
390
+ <p>
391
+ Developed using <strong>Streamlit</strong>, this project demonstrates cutting-edge technology with an
392
+ interactive and user-friendly interface.
393
+ </p>
394
+ </section>
395
+ <!-- Teechnologies Used Section -->
396
+ <section class="technologies">
397
+ <h2>Technologies Used</h2>
398
+ <div class="technology-card">
399
+ <!-- Row 1 -->
400
+ <div class="row">
401
+ <div class="tech-card">
402
+ <h4>Python</h4>
403
+ <p>
404
+ Used as the core programming language for implementing document summarization and automated question generation, leveraging NLP models and AI algorithms for efficient text processing.
405
+ </p>
406
+ </div>
407
+ <div class="tech-card">
408
+ <h4>HTML</h4>
409
+ <p>
410
+ Used to structure the web pages, including sections like "About Project," "Technologies Used," and user interaction areas, ensuring a clean and semantic layout.
411
+ </p>
412
+ </div>
413
+ </div>
414
+ <!-- Row 2 -->
415
+ <div class="row">
416
+ <div class="tech-card">
417
+ <h4>CSS</h4>
418
+ <p>
419
+ Utilized for styling the web interface, improving visual aesthetics, adding animations, and ensuring a responsive design across different devices.
420
+ </p>
421
+ </div>
422
+ <div class="tech-card">
423
+ <h4>JavaScript</h4>
424
+ <p>
425
+ Implemented interactive elements like real-time updates, animations, and user-friendly components to enhance the overall experience.
426
+ </p>
427
+ </div>
428
+ </div>
429
+ <!-- Row 3 -->
430
+ <div class="row">
431
+ <div class="tech-card">
432
+ <h4>Streamlit</h4>
433
+ <p>
434
+ Used to develop the web-based interface, allowing users to input text, URLs, or documents for summarization and question generation seamlessly.
435
+ </p>
436
+ </div>
437
+ <div class="tech-card">
438
+ <h4>Facebook-BART</h4>
439
+ <p>
440
+ A powerful transformer-based model used for text summarization and natural language understanding, ensuring high-quality summaries with contextual relevance.
441
+ </p>
442
+ </div>
443
+ </div>
444
+ <!-- Row 4 -->
445
+ <div class="row">
446
+ <div class="tech-card">
447
+ <h4>Transformers (Hugging Face)</h4>
448
+ <p>
449
+ Leverages pre-trained transformer models for advanced natural language processing tasks, optimizing summarization and question generation.
450
+ </p>
451
+ </div>
452
+ <div class="tech-card">
453
+ <h4>PDFMiner</h4>
454
+ <p>
455
+ Used for extracting text from PDF files, enabling document-based summarization and automated question generation without losing content integrity.
456
+ </p>
457
+ </div>
458
+ </div><!-- Row 5 -->
459
+ <div class="row">
460
+ <div class="tech-card">
461
+ <h4>Web Summarizer & Q&A Module</h4>
462
+ <p>
463
+ An AI-powered system designed to extract key insights from online content, generating concise summaries and relevant questions for better comprehension.
464
+ </p>
465
+ </div>
466
+ <div class="tech-card">
467
+ <h4>Text Processing</h4>
468
+ <p>
469
+ Handles raw text input efficiently, ensuring proper parsing, cleaning, and analysis before summarization and question generation.
470
+ </p>
471
+ </div>
472
+ </div>
473
+ <!-- Row 6 -->
474
+ <div class="row">
475
+ <div class="tech-card">
476
+ <h4>PDF Support</h4>
477
+ <p>
478
+ Enables users to upload and process PDF documents, extracting text to generate summaries and structured questions automatically.
479
+ </p>
480
+ </div>
481
+ <!-- <div class="tech-card">
482
+ <h4>Text Processing</h4>
483
+ <p>
484
+ Handles raw text input efficiently, ensuring proper parsing, cleaning, and analysis before summarization and question generation.
485
+ </p>
486
+ </div> -->
487
+ </div>
488
+ </div>
489
+ </section>
490
+ <!-- Developers Section -->
491
+ <section class="developers">
492
+ <h2>Developers</h2>
493
+ <div class="developer-card">
494
+ <div class="details">
495
+ <!-- Nishant -->
496
+ <div class="developer">
497
+ <p class="developer-name"><strong>Nishant Maity</strong></p>
498
+ <div class="social-icons">
499
+ <a href="https://www.linkedin.com/in/nishant-maity/" target="_blank">
500
+ <i class="fa-brands fa-linkedin fa-2xl"></i>
501
+ </a>
502
+ <a href="https://github.com/Nishant43S" target="_blank">
503
+ <i class="fa-brands fa-github fa-2xl"></i>
504
+ </a>
505
+ </div>
506
+ </div>
507
+ <!-- Yash -->
508
+ <div class="developer">
509
+ <p class="developer-name"><strong>Yash Sahu</strong></p>
510
+ <div class="social-icons">
511
+ <a href="https://www.linkedin.com/in/yashsahu02" target="_blank">
512
+ <i class="fa-brands fa-linkedin fa-2xl"></i>
513
+ </a>
514
+ <a href="https://github.com/yashsahu02" target="_blank">
515
+ <i class="fa-brands fa-github fa-2xl"></i>
516
+ </a>
517
+ </div>
518
+ </div>
519
+ </div>
520
+ </div>
521
+ </section>
522
+ </div>
523
+ <!-- FontAwesome for icons -->
524
+ <script src="https://kit.fontawesome.com/a076d05399.js" crossorigin="anonymous"></script>
525
+ </body>
526
+ </html>
data/requirements.txt ADDED
Binary file (18.6 kB). View file
 
data/text.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import re
3
+ from cleantext import clean
4
+ import streamlit.components.v1 as component
5
+ from transformers import pipeline
6
+ from functions import Copy_Text
7
+ from functions import *
8
+
9
+ # page settings
10
+ st.set_page_config(
11
+ layout="wide",
12
+ initial_sidebar_state="collapsed"
13
+ )
14
+
15
+ ### insert external css
16
+ def insert_css(css_file:str):
17
+ with open(css_file) as f:
18
+ st.markdown(f"<style>{f.read()}</style>",unsafe_allow_html=True)
19
+
20
+ # app settings css
21
+ insert_css("css_files/app.css")
22
+
23
+ # sidebar
24
+ app_sidebar = st.sidebar
25
+ with app_sidebar:
26
+ select_mode = st.selectbox(
27
+ label="Select Mode",
28
+ options=["Summarizer","Que/Ans"],
29
+ key="mode selector",
30
+ index=0
31
+ )
32
+
33
+ if select_mode == "Que/Ans":
34
+ st.write("### Que/Ans Settings")
35
+
36
+ max_answer_length = st.slider(
37
+ label="Max answer",
38
+ min_value=1,
39
+ max_value=10,
40
+ key="max answer",
41
+ value=4
42
+ )
43
+
44
+ max_answer_length = max_answer_length*10
45
+
46
+ Best_size = st.slider(
47
+ label="n best size",
48
+ min_value=1,
49
+ max_value=10,
50
+ key="best size",
51
+ value=5
52
+ )
53
+
54
+ # initilize session state
55
+ if 'summary' not in st.session_state:
56
+ st.session_state.summary = []
57
+
58
+ app_col = st.columns([2,8,2])
59
+
60
+ with app_col[1]:
61
+
62
+ if select_mode == "Summarizer":
63
+ st.write("## Text Summarizer")
64
+ elif select_mode == "Que/Ans":
65
+ st.write("## πŸ“š Text Question Answering")
66
+
67
+ #################### question answering ####################
68
+
69
+ if select_mode == "Que/Ans":
70
+ app_c = st.columns([2,8,2])
71
+ with app_c[0]:
72
+ pass
73
+ with app_c[1]:
74
+ # Inject custom CSS to place the chat input at the bottom
75
+ st.markdown(
76
+ """
77
+ <style>
78
+ /* Fix the chat input box at the bottom */
79
+ div[data-testid="stChatInput"] {
80
+ position: fixed;
81
+ bottom: 0;
82
+ margin-bottom: 36px;
83
+
84
+ }
85
+ </style>
86
+ """,
87
+ unsafe_allow_html=True
88
+ )
89
+ # Load model
90
+ qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
91
+
92
+ # Initialize session state
93
+ if "messages" not in st.session_state:
94
+ st.session_state.messages = []
95
+
96
+ # User inputs context
97
+ context = st.text_area("πŸ“œ Enter Text Hear", "", height=200)
98
+ context = Text_Cleaning(context)
99
+
100
+ # Display chat history
101
+ for message in st.session_state.messages:
102
+ with st.chat_message(message["role"]):
103
+ st.markdown(message["content"])
104
+
105
+ if context:
106
+ user_input = st.chat_input("πŸ’¬ Ask a question ",)
107
+ if user_input:
108
+ with st.chat_message("user"):
109
+ st.markdown(user_input)
110
+
111
+ st.session_state.messages.append({"role": "user", "content": user_input})
112
+
113
+ with st.spinner("πŸ€” Thinking..."):
114
+ response = qa_pipeline({"question": user_input, "context": context},
115
+ max_answer_len=max_answer_length, n_best_size=Best_size)
116
+ answer = response["answer"]
117
+
118
+ with st.chat_message("assistant"):
119
+ st.markdown(f"{answer}")
120
+
121
+ st.session_state.messages.append({"role": "assistant", "content": f"{answer}"})
122
+
123
+ # Clear chat history button
124
+ if st.button("πŸ—‘οΈ Clear Chat"):
125
+ st.session_state.messages = []
126
+ st.rerun()
127
+
128
+
129
+ ############ summarizer ###########
130
+
131
+ app_sum_col = st.columns([2,8,2])
132
+
133
+
134
+ # add session state
135
+ if 'summary' not in st.session_state:
136
+ st.session_state.summary = []
137
+
138
+ with app_sum_col[1]:
139
+ if select_mode == "Summarizer":
140
+ Text_input = st.text_area(label="πŸ“œ Enter Text Hear",key="Summarizer input",height=220)
141
+ Text_input = Text_Cleaning(Text_input)
142
+
143
+ if Text_input.strip() != "":
144
+ st.session_state.summary = []
145
+
146
+ value_func = lambda x: x * 0.3
147
+ # max length
148
+ max_tokens = st.slider(
149
+ label="Max Length",
150
+ key="max length",
151
+ min_value=1,
152
+ max_value=len(Text_input.split()),
153
+ value=int(value_func(len(Text_input.split())))
154
+ )
155
+
156
+ if st.button(label="πŸ“„ Generate Summary "):
157
+ try:
158
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
159
+ st.session_state.summary = summarizer(
160
+ Text_input,
161
+ max_length=max_tokens+20,
162
+ min_length=max_tokens,
163
+ do_sample=False
164
+ )
165
+
166
+ except Exception as e:
167
+ st.warning(f"Error...\n{e}",icon="⚠️")
168
+
169
+ if st.session_state.summary:
170
+ with st.spinner("Generating Summary..."):
171
+ st.write("### Summary")
172
+ generated_summary = st.session_state.summary[0]['summary_text']
173
+ st.write(generated_summary)
174
+ Copy_Text(generated_summary)
data/webscraper.py ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ import re
5
+ from cleantext import clean
6
+ import streamlit.components.v1 as component
7
+ from transformers import pipeline
8
+ from functions import Copy_Text
9
+ from functions import *
10
+
11
+ ### import animation
12
+ def particle(Js_file):
13
+ with open(Js_file) as f:
14
+ component.html(f"{f.read()}", height=400)
15
+
16
+ ### insert external css
17
+ def insert_css(css_file:str):
18
+ with open(css_file) as f:
19
+ st.markdown(f"<style>{f.read()}</style>",unsafe_allow_html=True)
20
+
21
+
22
+ # page settings
23
+ st.set_page_config(
24
+ layout="wide",
25
+ initial_sidebar_state="collapsed"
26
+ )
27
+
28
+ # sidebar
29
+ app_sidebar = st.sidebar
30
+ with app_sidebar:
31
+ select_mode = st.selectbox(
32
+ label="Select Mode",
33
+ options=["Summarizer","Que/Ans"],
34
+ key="mode selector",
35
+ index=0
36
+ )
37
+
38
+ if select_mode == "Que/Ans":
39
+ st.write("### Que/Ans Settings")
40
+
41
+ max_answer_length = st.slider(
42
+ label="Max answer",
43
+ min_value=1,
44
+ max_value=10,
45
+ key="max answer",
46
+ value=4
47
+ )
48
+
49
+ max_answer_length = max_answer_length*10
50
+
51
+ Best_size = st.slider(
52
+ label="n best size",
53
+ min_value=1,
54
+ max_value=10,
55
+ key="best size",
56
+ value=5
57
+ )
58
+
59
+ # Initialize session state
60
+ if 'scraped_paragraphs' not in st.session_state:
61
+ st.session_state.scraped_paragraphs = []
62
+ if 'summarizer_mode' not in st.session_state:
63
+ st.session_state.summarizer_mode = False
64
+ if 'summary' not in st.session_state:
65
+ st.session_state.summary = []
66
+
67
+ app_col = st.columns([2,8,2],gap="small")
68
+
69
+ with app_col[0]:
70
+ pass
71
+
72
+ with app_col[2]:
73
+ pass
74
+
75
+ with app_col[1]:
76
+ # Title
77
+ st.write("## GenAi Scraper")
78
+
79
+ # Input URL
80
+ url_input = st.text_input(label="Enter Website URL",key="url input",placeholder="https://www.example.com")
81
+
82
+ # number of paragraphs
83
+ num_paragraphs = st.slider("Select number of paragraphs to scrape", 1, 30, 5)
84
+
85
+ scrap_btn = st.button("Scrape Paragraphs",key="scrap button")
86
+
87
+ if url_input.strip() == "" and not scrap_btn:
88
+ # animation
89
+ particle("animation/particles.html")
90
+
91
+ else:
92
+ if scrap_btn:
93
+ st.session_state.scraped_paragraphs = scrape_paragraphs(url_input, num_paragraphs)
94
+ st.session_state.summary = [] # Reset summary
95
+
96
+ # Display scraped paragraphs
97
+ if st.session_state.scraped_paragraphs:
98
+
99
+ st.write("### Scraped Paragraphs")
100
+
101
+ paragraph_scrap = "\n\n".join(st.session_state.scraped_paragraphs)
102
+ st.write(Text_Cleaning(paragraph_scrap))
103
+
104
+ Copy_Text(Text_Cleaning(paragraph_scrap)) ## copy text
105
+
106
+ #################### summarizer #############
107
+
108
+ if select_mode == "Summarizer":
109
+ if st.session_state.scraped_paragraphs:
110
+ # Toggle for summarization mode
111
+ st.session_state.summarizer_mode = st.toggle("Enable Summarizer Mode", st.session_state.summarizer_mode)
112
+
113
+ if st.session_state.summarizer_mode:
114
+ value_func = lambda x: x * 0.3
115
+ max_tokens = st.slider(label="Select Max Token Length", min_value=10,
116
+ max_value=sum(len(p.split()) for p in st.session_state.scraped_paragraphs),
117
+ value=int(value_func(
118
+ sum(len(p.split()) for p in st.session_state.scraped_paragraphs)
119
+ ))
120
+ )
121
+ if st.button("πŸ“„ Generate Summary"):
122
+ with st.spinner("Generating Summary..."):
123
+ try:
124
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
125
+ st.session_state.summary = summarizer(
126
+ Text_Cleaning(" ".join(st.session_state.scraped_paragraphs)),
127
+ max_length=max_tokens+20,
128
+ min_length=max_tokens,
129
+ do_sample=False
130
+ )
131
+
132
+ except Exception as e:
133
+ st.warning(f"Error...\n{e}",icon="⚠️")
134
+
135
+ # Display summary
136
+ if st.session_state.summary:
137
+ st.write("### Summary")
138
+ generated_summary = st.session_state.summary[0]['summary_text']
139
+ st.write(generated_summary)
140
+ Copy_Text(generated_summary)
141
+
142
+ ################# question answering #####################
143
+
144
+ elif select_mode == "Que/Ans":
145
+ if st.session_state.scraped_paragraphs:
146
+ if st.toggle(label="Question Answering",key="Q/A"):
147
+ # Inject custom CSS to place the chat input at the bottom
148
+ st.markdown(
149
+ """
150
+ <style>
151
+ /* Fix the chat input box at the bottom */
152
+ div[data-testid="stChatInput"] {
153
+ position: fixed;
154
+ bottom: 0;
155
+ margin-bottom: 36px;
156
+
157
+ }
158
+ </style>
159
+ """,
160
+ unsafe_allow_html=True
161
+ )
162
+ qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
163
+
164
+ # Initialize session state
165
+ if "messages" not in st.session_state:
166
+ st.session_state.messages = []
167
+
168
+ # User inputs context
169
+ context = Text_Cleaning(paragraph_scrap)
170
+
171
+ # Display chat history
172
+ for message in st.session_state.messages:
173
+ with st.chat_message(message["role"]):
174
+ st.markdown(message["content"])
175
+
176
+ if context:
177
+ user_input = st.chat_input("πŸ’¬ Ask a question ",)
178
+ if user_input:
179
+ with st.chat_message("user"):
180
+ st.markdown(user_input)
181
+
182
+ st.session_state.messages.append({"role": "user", "content": user_input})
183
+
184
+ with st.spinner("πŸ€” Thinking..."):
185
+ response = qa_pipeline({"question": user_input, "context": context},
186
+ max_answer_len=max_answer_length, n_best_size=Best_size)
187
+ answer = response["answer"]
188
+
189
+ with st.chat_message("assistant"):
190
+ st.markdown(f"{answer}")
191
+
192
+ st.session_state.messages.append({"role": "assistant", "content": f"{answer}"})
193
+
194
+ # Clear chat history button
195
+ if st.button("πŸ—‘οΈ Clear Chat"):
196
+ st.session_state.messages = []
197
+ st.rerun()
198
+
199
+ # app settings css
200
+ insert_css("css_files/app.css")