Spaces:
Running
Running
Upload 14 files
Browse files- data/.streamlit/config.toml +6 -0
- data/__init__.py +3 -0
- data/__pycache__/functions.cpython-312.pyc +0 -0
- data/about_app.py +21 -0
- data/animation/particles.html +88 -0
- data/app.py +56 -0
- data/css_files/app.css +22 -0
- data/data/Nishant Maity Latest.pdf +0 -0
- data/document.py +320 -0
- data/functions.py +129 -0
- data/html_files/about.html +526 -0
- data/requirements.txt +0 -0
- data/text.py +174 -0
- data/webscraper.py +200 -0
data/.streamlit/config.toml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[theme]
|
| 2 |
+
base="dark"
|
| 3 |
+
primaryColor="#6643b5"
|
| 4 |
+
backgroundColor="#0E1117"
|
| 5 |
+
textColor="#FAFAFA"
|
| 6 |
+
secondaryBackgroundColor="#262730"
|
data/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from functions import *
|
| 2 |
+
import lxml
|
| 3 |
+
import lxml_html_clean
|
data/__pycache__/functions.cpython-312.pyc
ADDED
|
Binary file (4.75 kB). View file
|
|
|
data/about_app.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
|
| 3 |
+
st.set_page_config(
|
| 4 |
+
layout="wide",
|
| 5 |
+
initial_sidebar_state="collapsed"
|
| 6 |
+
)
|
| 7 |
+
|
| 8 |
+
### insert external css
|
| 9 |
+
def insert_css(css_file:str):
|
| 10 |
+
with open(css_file) as f:
|
| 11 |
+
st.markdown(f"<style>{f.read()}</style>",unsafe_allow_html=True)
|
| 12 |
+
|
| 13 |
+
# app settings css
|
| 14 |
+
insert_css("css_files/app.css")
|
| 15 |
+
|
| 16 |
+
### insert external html file
|
| 17 |
+
def insert_html(html_file):
|
| 18 |
+
with open(html_file) as f:
|
| 19 |
+
return f.read()
|
| 20 |
+
|
| 21 |
+
st.markdown(insert_html("html_files/about.html"),unsafe_allow_html=True)
|
data/animation/particles.html
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
<script src="https://cdn.jsdelivr.net/npm/particles.js@2.0.0/particles.min.js"></script>
|
| 3 |
+
<script>
|
| 4 |
+
document.addEventListener("DOMContentLoaded", function() {
|
| 5 |
+
particlesJS('particles-js', {
|
| 6 |
+
"particles": {
|
| 7 |
+
"number": {
|
| 8 |
+
"value": 80,
|
| 9 |
+
"density": {
|
| 10 |
+
"enable": true,
|
| 11 |
+
"value_area": 800
|
| 12 |
+
}
|
| 13 |
+
},
|
| 14 |
+
"color": {
|
| 15 |
+
"value": "#ffffff"
|
| 16 |
+
},
|
| 17 |
+
"shape": {
|
| 18 |
+
"type": "circle",
|
| 19 |
+
"stroke": {
|
| 20 |
+
"width": 0,
|
| 21 |
+
"color": "#000000"
|
| 22 |
+
}
|
| 23 |
+
},
|
| 24 |
+
"opacity": {
|
| 25 |
+
"value": 0.5,
|
| 26 |
+
"random": false,
|
| 27 |
+
"anim": {
|
| 28 |
+
"enable": false,
|
| 29 |
+
"speed": 1,
|
| 30 |
+
"opacity_min": 0.1,
|
| 31 |
+
"sync": false
|
| 32 |
+
}
|
| 33 |
+
},
|
| 34 |
+
"size": {
|
| 35 |
+
"value": 3,
|
| 36 |
+
"random": true,
|
| 37 |
+
"anim": {
|
| 38 |
+
"enable": false,
|
| 39 |
+
"speed": 40,
|
| 40 |
+
"size_min": 0.1,
|
| 41 |
+
"sync": false
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"line_linked": {
|
| 45 |
+
"enable": true,
|
| 46 |
+
"distance": 150,
|
| 47 |
+
"color": "#ffffff",
|
| 48 |
+
"opacity": 0.4,
|
| 49 |
+
"width": 1
|
| 50 |
+
},
|
| 51 |
+
"move": {
|
| 52 |
+
"enable": true,
|
| 53 |
+
"speed": 6,
|
| 54 |
+
"direction": "none",
|
| 55 |
+
"random": false,
|
| 56 |
+
"straight": false,
|
| 57 |
+
"out_mode": "out",
|
| 58 |
+
"bounce": false,
|
| 59 |
+
"attract": {
|
| 60 |
+
"enable": false,
|
| 61 |
+
"rotateX": 600,
|
| 62 |
+
"rotateY": 1200
|
| 63 |
+
}
|
| 64 |
+
}
|
| 65 |
+
},
|
| 66 |
+
"interactivity": {
|
| 67 |
+
"detect_on": "canvas",
|
| 68 |
+
"events": {
|
| 69 |
+
"onhover": {
|
| 70 |
+
"enable": true,
|
| 71 |
+
"mode": "repulse"
|
| 72 |
+
},
|
| 73 |
+
"onclick": {
|
| 74 |
+
"enable": true,
|
| 75 |
+
"mode": "push"
|
| 76 |
+
},
|
| 77 |
+
"resize": true
|
| 78 |
+
}
|
| 79 |
+
},
|
| 80 |
+
"retina_detect": true
|
| 81 |
+
});
|
| 82 |
+
});
|
| 83 |
+
</script>
|
| 84 |
+
<div id="particles-js" style="position: absolute; width: 100%; height: 100%;"></div>
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
|
data/app.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
|
| 3 |
+
### page setup
|
| 4 |
+
|
| 5 |
+
web_qna = st.Page(
|
| 6 |
+
page="webscraper.py",
|
| 7 |
+
title="web Scraper",
|
| 8 |
+
icon=":material/globe:",
|
| 9 |
+
default=True
|
| 10 |
+
)
|
| 11 |
+
|
| 12 |
+
text_qna = st.Page(
|
| 13 |
+
page="text.py",
|
| 14 |
+
title="Text Q&A , Summarizer",
|
| 15 |
+
icon=":material/description:",
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
document_qna = st.Page(
|
| 19 |
+
page="document.py",
|
| 20 |
+
title="Document Q&A , Summarizer",
|
| 21 |
+
icon=":material/picture_as_pdf:",
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
about_app = st.Page(
|
| 27 |
+
page="about_app.py",
|
| 28 |
+
title="About App",
|
| 29 |
+
icon=":material/person:"
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
pg = st.navigation(
|
| 33 |
+
pages=[web_qna,text_qna,document_qna,about_app],
|
| 34 |
+
expanded=False,position="sidebar"
|
| 35 |
+
)
|
| 36 |
+
pg.run()
|
| 37 |
+
|
| 38 |
+
app_sidebar = st.sidebar
|
| 39 |
+
|
| 40 |
+
with app_sidebar:
|
| 41 |
+
|
| 42 |
+
# project Link
|
| 43 |
+
st.link_button(
|
| 44 |
+
label="Project Link",
|
| 45 |
+
url="https://github.com/Nishant43S/Gen-Ai-Summarizer-Question-Answering-App.git",
|
| 46 |
+
icon=":material/code_off:",
|
| 47 |
+
use_container_width=True
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
### insert external css
|
| 51 |
+
def insert_css(css_file:str):
|
| 52 |
+
with open(css_file) as f:
|
| 53 |
+
st.markdown(f"<style>{f.read()}</style>",unsafe_allow_html=True)
|
| 54 |
+
|
| 55 |
+
# app settings css
|
| 56 |
+
insert_css("css_files/app.css")
|
data/css_files/app.css
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*{
|
| 2 |
+
scrollbar-width: none;
|
| 3 |
+
}
|
| 4 |
+
|
| 5 |
+
/* main app css */
|
| 6 |
+
[class="stAppViewMain main st-emotion-cache-bm2z3a ea3mdgi8"]{
|
| 7 |
+
background: #0f1116;
|
| 8 |
+
}
|
| 9 |
+
|
| 10 |
+
[class="st-emotion-cache-luriig ezrtsby2"]{
|
| 11 |
+
background: #0f1116;
|
| 12 |
+
}
|
| 13 |
+
|
| 14 |
+
[data-testid="stBaseButton-header"]{
|
| 15 |
+
visibility: hidden;
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
::selection{
|
| 19 |
+
background: #421b9b;
|
| 20 |
+
color: whitesmoke;
|
| 21 |
+
}
|
| 22 |
+
|
data/data/Nishant Maity Latest.pdf
ADDED
|
Binary file (82.7 kB). View file
|
|
|
data/document.py
ADDED
|
@@ -0,0 +1,320 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
from functions import *
|
| 3 |
+
from transformers import pipeline
|
| 4 |
+
from pdfminer.high_level import extract_text
|
| 5 |
+
import os
|
| 6 |
+
import PyPDF2
|
| 7 |
+
import base64
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
#### chatbot function
|
| 12 |
+
|
| 13 |
+
def Chat_Bot(text_input,Best_size,max_answer_length):
|
| 14 |
+
st.markdown(
|
| 15 |
+
"""
|
| 16 |
+
<style>
|
| 17 |
+
/* Fix the chat input box at the bottom */
|
| 18 |
+
div[data-testid="stChatInput"] {
|
| 19 |
+
position: fixed;
|
| 20 |
+
bottom: 0;
|
| 21 |
+
margin-bottom: 36px;
|
| 22 |
+
|
| 23 |
+
}
|
| 24 |
+
</style>
|
| 25 |
+
""",
|
| 26 |
+
unsafe_allow_html=True
|
| 27 |
+
)
|
| 28 |
+
# Load the Question Answering model
|
| 29 |
+
qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
|
| 30 |
+
|
| 31 |
+
# Initialize session state for chat history
|
| 32 |
+
if "messages" not in st.session_state:
|
| 33 |
+
st.session_state.messages = []
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
# User inputs context
|
| 37 |
+
context = text_input
|
| 38 |
+
|
| 39 |
+
# Display chat history
|
| 40 |
+
for message in st.session_state.messages:
|
| 41 |
+
with st.chat_message(message["role"]):
|
| 42 |
+
st.markdown(message["content"])
|
| 43 |
+
|
| 44 |
+
if context:
|
| 45 |
+
user_input = st.chat_input("π¬ Ask a question based on the context:")
|
| 46 |
+
if user_input:
|
| 47 |
+
with st.chat_message("user"):
|
| 48 |
+
st.markdown(user_input)
|
| 49 |
+
|
| 50 |
+
st.session_state.messages.append({"role": "user", "content": user_input})
|
| 51 |
+
|
| 52 |
+
with st.spinner("π€ Thinking..."):
|
| 53 |
+
response = qa_pipeline(
|
| 54 |
+
{"question": user_input, "context": context},
|
| 55 |
+
max_answer_len=max_answer_length, n_best_size=Best_size
|
| 56 |
+
)
|
| 57 |
+
answer = response["answer"]
|
| 58 |
+
|
| 59 |
+
with st.chat_message("assistant"):
|
| 60 |
+
st.markdown(f"{answer}")
|
| 61 |
+
|
| 62 |
+
st.session_state.messages.append({"role": "assistant", "content": f"{answer}"})
|
| 63 |
+
|
| 64 |
+
# Clear chat history button
|
| 65 |
+
if st.button("ποΈ Clear Chat"):
|
| 66 |
+
st.session_state.messages = []
|
| 67 |
+
st.rerun()
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
# page settings
|
| 71 |
+
st.set_page_config(
|
| 72 |
+
layout="wide",
|
| 73 |
+
initial_sidebar_state="collapsed"
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
### insert external css
|
| 77 |
+
def insert_css(css_file:str):
|
| 78 |
+
with open(css_file) as f:
|
| 79 |
+
st.markdown(f"<style>{f.read()}</style>",unsafe_allow_html=True)
|
| 80 |
+
|
| 81 |
+
# app settings css
|
| 82 |
+
insert_css("css_files/app.css")
|
| 83 |
+
|
| 84 |
+
def extract_pdf_text(pdf_file):
|
| 85 |
+
"""Extracts text from a PDF file."""
|
| 86 |
+
return extract_text(pdf_file)
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
#### displaying uploaded pdf file
|
| 91 |
+
def display_pdf_file(uploaded_file):
|
| 92 |
+
"""
|
| 93 |
+
it is used to display the
|
| 94 |
+
file on screen
|
| 95 |
+
"""
|
| 96 |
+
#### saving the uploaded file
|
| 97 |
+
def save_uploadfile(save_file):
|
| 98 |
+
with open(os.path.join("data",save_file.name),"wb") as f:
|
| 99 |
+
f.write(save_file.getbuffer())
|
| 100 |
+
return st.toast("file uploaded: {}".format(save_file.name))
|
| 101 |
+
|
| 102 |
+
try:
|
| 103 |
+
### display pdf on screen
|
| 104 |
+
def displayPDF(pdf_file):
|
| 105 |
+
with open(pdf_file,"rb") as f:
|
| 106 |
+
base64_pdf = base64.b64encode(f.read()).decode("utf-8")
|
| 107 |
+
|
| 108 |
+
pdf_display = f"""
|
| 109 |
+
<iframe
|
| 110 |
+
src="data:application/pdf;base64,{base64_pdf}"
|
| 111 |
+
width="950" height="1000"
|
| 112 |
+
type="application/pdf"
|
| 113 |
+
>
|
| 114 |
+
</iframe>
|
| 115 |
+
"""
|
| 116 |
+
|
| 117 |
+
st.markdown(pdf_display,unsafe_allow_html=True)
|
| 118 |
+
|
| 119 |
+
### save and display file
|
| 120 |
+
save_uploadfile(uploaded_file)
|
| 121 |
+
pdf_file = "data/"+uploaded_file.name
|
| 122 |
+
displayPDF(pdf_file)
|
| 123 |
+
except Exception as e:
|
| 124 |
+
st.warning("Something Went wrong...\n\n",e,icon="β οΈ")
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
# --- PDF Page Text Extractor Function ---
|
| 128 |
+
def extract_text_from_pdf(pdf_file, page_num):
|
| 129 |
+
try:
|
| 130 |
+
reader = PyPDF2.PdfReader(pdf_file)
|
| 131 |
+
total_pages = len(reader.pages)
|
| 132 |
+
if 1 <= page_num <= total_pages:
|
| 133 |
+
page = reader.pages[page_num - 1] # Adjusting for 0-based index
|
| 134 |
+
text = page.extract_text()
|
| 135 |
+
return text, total_pages
|
| 136 |
+
else:
|
| 137 |
+
return None, total_pages
|
| 138 |
+
except Exception as e:
|
| 139 |
+
st.error(f"Error extracting text: {e}")
|
| 140 |
+
return None, 0
|
| 141 |
+
|
| 142 |
+
def pdf_Summarizer(file):
|
| 143 |
+
Display_col, Summarizer_col = st.tabs(["Pdf Display","PDF Summarizer"])
|
| 144 |
+
with Display_col:
|
| 145 |
+
display_pdf_file(file)
|
| 146 |
+
with Summarizer_col:
|
| 147 |
+
temp_reader = PyPDF2.PdfReader(file)
|
| 148 |
+
total_pages = len(temp_reader.pages)
|
| 149 |
+
st.write(f"### Total Pages: {total_pages}")
|
| 150 |
+
|
| 151 |
+
## columns
|
| 152 |
+
Input_col = st.columns([4,10])
|
| 153 |
+
with Input_col[0]:
|
| 154 |
+
page_number = st.number_input(
|
| 155 |
+
"Select page number",
|
| 156 |
+
min_value=1, max_value=total_pages,
|
| 157 |
+
value=1, step=1)
|
| 158 |
+
st.write("Page Number {}".format(page_number))
|
| 159 |
+
text, _ = extract_text_from_pdf(file, page_number)
|
| 160 |
+
return text
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
app_sidebar = st.sidebar
|
| 164 |
+
|
| 165 |
+
with app_sidebar:
|
| 166 |
+
select_mode = st.selectbox(
|
| 167 |
+
label="Select Mode",
|
| 168 |
+
options=["Summarizer","Que/Ans"],
|
| 169 |
+
key="mode selector",
|
| 170 |
+
index=0
|
| 171 |
+
)
|
| 172 |
+
|
| 173 |
+
if select_mode == "Que/Ans":
|
| 174 |
+
st.write("### Que/Ans Settings")
|
| 175 |
+
|
| 176 |
+
max_answer_length = st.slider(
|
| 177 |
+
label="Max answer",
|
| 178 |
+
min_value=1,
|
| 179 |
+
max_value=10,
|
| 180 |
+
key="max answer",
|
| 181 |
+
value=4
|
| 182 |
+
)
|
| 183 |
+
|
| 184 |
+
max_answer_length = max_answer_length*10
|
| 185 |
+
|
| 186 |
+
Best_size_ = st.slider(
|
| 187 |
+
label="n best size",
|
| 188 |
+
min_value=1,
|
| 189 |
+
max_value=10,
|
| 190 |
+
key="best size",
|
| 191 |
+
value=5
|
| 192 |
+
)
|
| 193 |
+
|
| 194 |
+
def Summarizer_Model(context,Max_Length):
|
| 195 |
+
try:
|
| 196 |
+
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
| 197 |
+
Summary = summarizer(
|
| 198 |
+
context,
|
| 199 |
+
max_length=Max_Length+20,
|
| 200 |
+
min_length=Max_Length,
|
| 201 |
+
do_sample=False
|
| 202 |
+
)
|
| 203 |
+
return Summary[0]['summary_text']
|
| 204 |
+
|
| 205 |
+
except Exception as e:
|
| 206 |
+
st.warning(f"Error...\n{e}",icon="β οΈ")
|
| 207 |
+
|
| 208 |
+
app_col = st.columns([2,8,2])
|
| 209 |
+
|
| 210 |
+
with app_col[1]:
|
| 211 |
+
|
| 212 |
+
if select_mode == "Summarizer":
|
| 213 |
+
st.write("## π Document Summarizer")
|
| 214 |
+
elif select_mode == "Que/Ans":
|
| 215 |
+
st.write("## π Document Question Answering")
|
| 216 |
+
|
| 217 |
+
### question answering
|
| 218 |
+
que_col = st.columns([2,8,2])
|
| 219 |
+
|
| 220 |
+
with que_col[1]:
|
| 221 |
+
if select_mode == "Que/Ans":
|
| 222 |
+
## input file
|
| 223 |
+
File_input = st.file_uploader(
|
| 224 |
+
label="Drop Your File hear",
|
| 225 |
+
type=["txt", "pdf"],
|
| 226 |
+
key="file uploader"
|
| 227 |
+
)
|
| 228 |
+
|
| 229 |
+
if File_input is not None:
|
| 230 |
+
if File_input.type == "text/plain":
|
| 231 |
+
text = File_input.read().decode("utf-8")
|
| 232 |
+
Chat_Bot(
|
| 233 |
+
text_input=Text_Cleaning(text),
|
| 234 |
+
Best_size=Best_size_,
|
| 235 |
+
max_answer_length=max_answer_length
|
| 236 |
+
)
|
| 237 |
+
else:
|
| 238 |
+
text = extract_pdf_text(File_input)
|
| 239 |
+
Chat_Bot(
|
| 240 |
+
text_input=Text_Cleaning(text),
|
| 241 |
+
Best_size=Best_size_,
|
| 242 |
+
max_answer_length=max_answer_length
|
| 243 |
+
)
|
| 244 |
+
|
| 245 |
+
# session state
|
| 246 |
+
if 'input_text' not in st.session_state:
|
| 247 |
+
st.session_state.input_text = []
|
| 248 |
+
|
| 249 |
+
if 'pdf_text' not in st.session_state:
|
| 250 |
+
st.session_state.pdf_text = []
|
| 251 |
+
|
| 252 |
+
if 'summary_text' not in st.session_state:
|
| 253 |
+
st.session_state.summary_text = []
|
| 254 |
+
|
| 255 |
+
summ_col = st.columns([2,8,2])
|
| 256 |
+
|
| 257 |
+
with summ_col[1]:
|
| 258 |
+
if select_mode == "Summarizer":
|
| 259 |
+
## input file
|
| 260 |
+
File_input = st.file_uploader(
|
| 261 |
+
label="Drop Your File hear",
|
| 262 |
+
type=["txt", "pdf"],
|
| 263 |
+
key="file uploader"
|
| 264 |
+
)
|
| 265 |
+
if File_input is not None:
|
| 266 |
+
if File_input.type == "text/plain":
|
| 267 |
+
text = File_input.read().decode("utf-8")
|
| 268 |
+
st.session_state.input_text = []
|
| 269 |
+
st.session_state.input_text = st.text_area(label="Uploaded document Text",value=Text_Cleaning(text),height=200)
|
| 270 |
+
Text_input = Text_Cleaning(st.session_state.input_text)
|
| 271 |
+
value_func = lambda x: x * 0.3
|
| 272 |
+
max_length = st.slider(
|
| 273 |
+
label="Max Length",min_value=1,
|
| 274 |
+
max_value=len(st.session_state.input_text.split()),
|
| 275 |
+
value=int(value_func(len(st.session_state.input_text.split())))
|
| 276 |
+
)
|
| 277 |
+
|
| 278 |
+
if st.button(label="π Generate Summary"):
|
| 279 |
+
with st.spinner("Generating Summary"):
|
| 280 |
+
|
| 281 |
+
Generated_Summary = Summarizer_Model(context=Text_input,Max_Length=max_length)
|
| 282 |
+
st.write(Generated_Summary)
|
| 283 |
+
Copy_Text(Generated_Summary)
|
| 284 |
+
|
| 285 |
+
else:
|
| 286 |
+
st.session_state.pdf_text = []
|
| 287 |
+
st.session_state.summary_text = []
|
| 288 |
+
st.session_state.pdf_text = pdf_Summarizer(File_input)
|
| 289 |
+
|
| 290 |
+
## text area
|
| 291 |
+
Text_Area_Input = st.text_area(
|
| 292 |
+
"Pdf Text",value=Text_Cleaning(st.session_state.pdf_text),
|
| 293 |
+
key="text area",height=450
|
| 294 |
+
)
|
| 295 |
+
|
| 296 |
+
value_func = lambda x: x * 0.3
|
| 297 |
+
Max_Pdf_Summary_len = st.slider(
|
| 298 |
+
label="MAx Length",
|
| 299 |
+
min_value=1,
|
| 300 |
+
max_value=len(Text_Area_Input.split()),
|
| 301 |
+
value=int(value_func(len(Text_Area_Input.split()))),
|
| 302 |
+
key="pdf summarizer Slider"
|
| 303 |
+
)
|
| 304 |
+
|
| 305 |
+
if st.button("π Generate Summary",key="pdf Summary"):
|
| 306 |
+
# generating summary
|
| 307 |
+
with st.spinner("Generating Summary"):
|
| 308 |
+
## initilizing model
|
| 309 |
+
st.session_state.summary_text = Summarizer_Model(
|
| 310 |
+
context=Text_Area_Input,Max_Length=Max_Pdf_Summary_len
|
| 311 |
+
)
|
| 312 |
+
|
| 313 |
+
st.write(st.session_state.summary_text)
|
| 314 |
+
Copy_Text(st.session_state.summary_text)
|
| 315 |
+
|
| 316 |
+
|
| 317 |
+
|
| 318 |
+
|
| 319 |
+
|
| 320 |
+
|
data/functions.py
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# creating function file
|
| 2 |
+
import streamlit as st
|
| 3 |
+
import requests
|
| 4 |
+
from bs4 import BeautifulSoup
|
| 5 |
+
import re
|
| 6 |
+
from cleantext import clean
|
| 7 |
+
import streamlit.components.v1 as component
|
| 8 |
+
|
| 9 |
+
def Copy_Text(text):
|
| 10 |
+
"""
|
| 11 |
+
copy button to copy text
|
| 12 |
+
"""
|
| 13 |
+
Html_Code = f"""
|
| 14 |
+
<!DOCTYPE html>
|
| 15 |
+
<html lang="en">
|
| 16 |
+
<head>
|
| 17 |
+
<meta charset="UTF-8">
|
| 18 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 19 |
+
<title>Copy Button</title>
|
| 20 |
+
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/all.min.css">
|
| 21 |
+
<style>
|
| 22 |
+
p {{
|
| 23 |
+
font-size: 18px;
|
| 24 |
+
margin-bottom: 10px;
|
| 25 |
+
}}
|
| 26 |
+
.copy-link {{
|
| 27 |
+
color: #6643b5;
|
| 28 |
+
text-decoration: none;
|
| 29 |
+
margin-top: 32px;
|
| 30 |
+
margin-left: 13px;
|
| 31 |
+
font-size: 20px;
|
| 32 |
+
display: inline-flex;
|
| 33 |
+
align-items: center;
|
| 34 |
+
gap: 5px;
|
| 35 |
+
position: relative;
|
| 36 |
+
transition: background 0.3s;
|
| 37 |
+
cursor: pointer;
|
| 38 |
+
}}
|
| 39 |
+
.copy-link:hover {{
|
| 40 |
+
color: #8594e4;
|
| 41 |
+
}}
|
| 42 |
+
.tooltip {{
|
| 43 |
+
position: absolute;
|
| 44 |
+
top: -30px;
|
| 45 |
+
left: 50%;
|
| 46 |
+
transform: translateX(-50%);
|
| 47 |
+
background: black;
|
| 48 |
+
color: white;
|
| 49 |
+
padding: 5px 10px;
|
| 50 |
+
font-size: 12px;
|
| 51 |
+
border-radius: 5px;
|
| 52 |
+
opacity: 0;
|
| 53 |
+
transition: opacity 0.3s, transform 0.3s;
|
| 54 |
+
}}
|
| 55 |
+
.show-tooltip {{
|
| 56 |
+
opacity: 1;
|
| 57 |
+
transform: translate(-50%, -10px);
|
| 58 |
+
}}
|
| 59 |
+
</style>
|
| 60 |
+
</head>
|
| 61 |
+
<body>
|
| 62 |
+
<div class="container">
|
| 63 |
+
<a href="#" class="copy-link" onclick="copyText(event)">
|
| 64 |
+
<i class="fa-regular fa-copy"></i>
|
| 65 |
+
<span class="tooltip" id="tooltip">Copied!</span>
|
| 66 |
+
</a>
|
| 67 |
+
<br>
|
| 68 |
+
<br>
|
| 69 |
+
<p id="text">{text}</p>
|
| 70 |
+
</div>
|
| 71 |
+
<script>
|
| 72 |
+
function copyText(event) {{
|
| 73 |
+
event.preventDefault();
|
| 74 |
+
const text = document.getElementById("text").innerText;
|
| 75 |
+
const textarea = document.createElement("textarea");
|
| 76 |
+
textarea.value = text;
|
| 77 |
+
document.body.appendChild(textarea);
|
| 78 |
+
textarea.select();
|
| 79 |
+
document.execCommand("copy");
|
| 80 |
+
document.body.removeChild(textarea);
|
| 81 |
+
|
| 82 |
+
const tooltip = document.getElementById("tooltip");
|
| 83 |
+
tooltip.classList.add("show-tooltip");
|
| 84 |
+
setTimeout(() => {{
|
| 85 |
+
tooltip.classList.remove("show-tooltip");
|
| 86 |
+
}}, 1000);
|
| 87 |
+
}}
|
| 88 |
+
</script>
|
| 89 |
+
</body>
|
| 90 |
+
</html>
|
| 91 |
+
"""
|
| 92 |
+
component.html(Html_Code,height=60,width=60)
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def scrape_paragraphs(url, num_paragraphs):
|
| 98 |
+
try:
|
| 99 |
+
response = requests.get(url)
|
| 100 |
+
if response.status_code != 200:
|
| 101 |
+
return []
|
| 102 |
+
soup = BeautifulSoup(response.text, 'lxml')
|
| 103 |
+
paragraphs = [p.get_text() for p in soup.find_all('p')[:num_paragraphs]]
|
| 104 |
+
return paragraphs
|
| 105 |
+
except Exception as e:
|
| 106 |
+
st.warning(f"Error...\n{e}",icon="β οΈ")
|
| 107 |
+
return []
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
### text cleaning
|
| 111 |
+
def Text_Cleaning(text:str)->str:
|
| 112 |
+
"""
|
| 113 |
+
this function gives clean
|
| 114 |
+
text of the paragraphs , etc
|
| 115 |
+
which makes easy to understand of the text
|
| 116 |
+
"""
|
| 117 |
+
pattern = r'[`^]'
|
| 118 |
+
cleaned_paragraph = re.sub(pattern, '', text)
|
| 119 |
+
|
| 120 |
+
clean_text = clean(
|
| 121 |
+
text=cleaned_paragraph,fix_unicode=True,
|
| 122 |
+
to_ascii=True,
|
| 123 |
+
no_line_breaks=False,
|
| 124 |
+
keep_two_line_breaks=True
|
| 125 |
+
)
|
| 126 |
+
|
| 127 |
+
pattern = r'\[\d+\]'
|
| 128 |
+
cleaned_text_output = re.sub(pattern, '', clean_text)
|
| 129 |
+
return cleaned_text_output
|
data/html_files/about.html
ADDED
|
@@ -0,0 +1,526 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
|
| 4 |
+
<head>
|
| 5 |
+
<meta charset="UTF-8">
|
| 6 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 7 |
+
<title>About Project</title>
|
| 8 |
+
<!-- <link rel="stylesheet" href="style.css"> -->
|
| 9 |
+
<!-- link for poppins font -->
|
| 10 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 11 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 12 |
+
<link
|
| 13 |
+
href="https://fonts.googleapis.com/css2?family=Poppins:ital,wght@0,100;0,200;0,300;0,400;0,500;0,600;0,700;0,800;0,900;1,100;1,200;1,300;1,400;1,500;1,600;1,700;1,800;1,900&display=swap"
|
| 14 |
+
rel="stylesheet">
|
| 15 |
+
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.7.2/css/all.min.css"
|
| 16 |
+
integrity="sha512-Evv84Mr4kqVGRNSgIGL/F/aIDqQb7xQ2vcrdIwxfjThSH8CSR7PBEakCr51Ck+w+/U6swU2Im1vVX0SVk9ABhg=="
|
| 17 |
+
crossorigin="anonymous" referrerpolicy="no-referrer" />
|
| 18 |
+
<!-- Internal CSS -->
|
| 19 |
+
<style>
|
| 20 |
+
* {
|
| 21 |
+
margin: 0;
|
| 22 |
+
padding: 0;
|
| 23 |
+
box-sizing: border-box;
|
| 24 |
+
/* cursor: pointer; */
|
| 25 |
+
}
|
| 26 |
+
body {
|
| 27 |
+
font-family: 'Poppins', sans-serif;
|
| 28 |
+
background-color: rgb(14, 17, 23);
|
| 29 |
+
color: rgb(250, 250, 250);
|
| 30 |
+
line-height: 1.6;
|
| 31 |
+
}
|
| 32 |
+
.main-container {
|
| 33 |
+
max-width: 900px;
|
| 34 |
+
margin: 0 auto;
|
| 35 |
+
padding: 20px;
|
| 36 |
+
}
|
| 37 |
+
/* ------ Header Section -------*/
|
| 38 |
+
header {
|
| 39 |
+
/* text-align: center; */
|
| 40 |
+
margin-bottom: 30px;
|
| 41 |
+
}
|
| 42 |
+
.hover {
|
| 43 |
+
text-shadow: 0 0 20px rgba(98, 162, 250, 0.4);
|
| 44 |
+
}
|
| 45 |
+
header h1 {
|
| 46 |
+
font-size: 3em;
|
| 47 |
+
color: rgb(98, 162, 250);
|
| 48 |
+
cursor: pointer;
|
| 49 |
+
}
|
| 50 |
+
header h2 {
|
| 51 |
+
font-size: 2.5em;
|
| 52 |
+
color: white;
|
| 53 |
+
cursor: pointer;
|
| 54 |
+
}
|
| 55 |
+
header h2:hover {
|
| 56 |
+
text-shadow: 0 0 20px rgba(255, 255, 255, 0.4);
|
| 57 |
+
}
|
| 58 |
+
/* ------ About Section ------ */
|
| 59 |
+
.about {
|
| 60 |
+
position: relative;
|
| 61 |
+
background-color: rgb(24, 28, 38);
|
| 62 |
+
padding: 20px;
|
| 63 |
+
border-radius: 15px;
|
| 64 |
+
margin-bottom: 40px;
|
| 65 |
+
box-shadow: 0 8px 15px rgba(0, 0, 0, 0.4);
|
| 66 |
+
cursor: pointer;
|
| 67 |
+
/* width: 500px; */
|
| 68 |
+
}
|
| 69 |
+
.about:hover {
|
| 70 |
+
transform: scale(1.02);
|
| 71 |
+
border: 2px solid rgb(98, 162, 250);
|
| 72 |
+
}
|
| 73 |
+
.about h3 {
|
| 74 |
+
font-size: 1.8em;
|
| 75 |
+
margin-bottom: 15px;
|
| 76 |
+
color: rgb(98, 162, 250);
|
| 77 |
+
}
|
| 78 |
+
.about p,
|
| 79 |
+
.about ul {
|
| 80 |
+
font-size: 1.1em;
|
| 81 |
+
margin-bottom: 15px;
|
| 82 |
+
}
|
| 83 |
+
.about ul {
|
| 84 |
+
padding-left: 20px;
|
| 85 |
+
list-style-type: disc;
|
| 86 |
+
}
|
| 87 |
+
/* ------ Technologies Section ------ */
|
| 88 |
+
.technologies {
|
| 89 |
+
margin-top: 30px;
|
| 90 |
+
}
|
| 91 |
+
.technologies h2 {
|
| 92 |
+
font-size: 2em;
|
| 93 |
+
margin-bottom: 20px;
|
| 94 |
+
color: rgb(98, 162, 250);
|
| 95 |
+
cursor: pointer;
|
| 96 |
+
}
|
| 97 |
+
.technologies h2:hover {
|
| 98 |
+
/* text-decoration: underline; */
|
| 99 |
+
text-shadow: 0 0 20px rgba(98, 162, 250, 0.4);
|
| 100 |
+
}
|
| 101 |
+
.row {
|
| 102 |
+
display: flex;
|
| 103 |
+
justify-content: space-around;
|
| 104 |
+
align-items: center;
|
| 105 |
+
}
|
| 106 |
+
.tech-card {
|
| 107 |
+
align-items: center;
|
| 108 |
+
/* width: 30%;
|
| 109 |
+
height: 300px; */
|
| 110 |
+
width: 43%;
|
| 111 |
+
height: 255px;
|
| 112 |
+
/* background-color: rgb(46, 84, 182); */
|
| 113 |
+
background-color: rgb(24, 28, 38);
|
| 114 |
+
padding: 15px;
|
| 115 |
+
border-radius: 15px;
|
| 116 |
+
box-shadow: 0 8px 15px rgba(0, 0, 0, 0.4);
|
| 117 |
+
margin-bottom: 20px;
|
| 118 |
+
position: relative;
|
| 119 |
+
overflow: hidden;
|
| 120 |
+
transition: transform 0.3s ease;
|
| 121 |
+
cursor: pointer;
|
| 122 |
+
}
|
| 123 |
+
.tech-card h4 {
|
| 124 |
+
color: rgb(98, 162, 250);
|
| 125 |
+
font-size: 1.3rem;
|
| 126 |
+
text-align: center;
|
| 127 |
+
margin-bottom: 5px;
|
| 128 |
+
}
|
| 129 |
+
/*
|
| 130 |
+
.tech-card > p {
|
| 131 |
+
text-align: justify;
|
| 132 |
+
}
|
| 133 |
+
*/
|
| 134 |
+
.tech-card:hover {
|
| 135 |
+
transform: scale(1.02);
|
| 136 |
+
}
|
| 137 |
+
.tech-card::before {
|
| 138 |
+
content: '';
|
| 139 |
+
position: absolute;
|
| 140 |
+
top: 0;
|
| 141 |
+
left: 0;
|
| 142 |
+
width: 100%;
|
| 143 |
+
height: 100%;
|
| 144 |
+
background: rgba(24, 28, 38, 0.4);
|
| 145 |
+
/* blur color */
|
| 146 |
+
filter: blur(20px);
|
| 147 |
+
z-index: -1;
|
| 148 |
+
opacity: 0;
|
| 149 |
+
transition: opacity 0.3s ease;
|
| 150 |
+
}
|
| 151 |
+
.tech-card:hover::before {
|
| 152 |
+
opacity: 1;
|
| 153 |
+
}
|
| 154 |
+
@keyframes glow {
|
| 155 |
+
0% {
|
| 156 |
+
box-shadow: 0 0 20px rgba(24, 28, 38, 0.4), 0 0 40px rgba(24, 28, 38, 0.3), 0 0 60px rgba(24, 28, 38, 0.2);
|
| 157 |
+
}
|
| 158 |
+
|
| 159 |
+
100% {
|
| 160 |
+
box-shadow: 0 0 20px rgba(24, 28, 38, 0.4), 0 0 40px rgba(24, 28, 38, 0.3), 0 0 60px rgba(24, 28, 38, 0.2);
|
| 161 |
+
}
|
| 162 |
+
}
|
| 163 |
+
.tech-card:hover {
|
| 164 |
+
animation: glow 1.5s infinite ease-in-out;
|
| 165 |
+
}
|
| 166 |
+
/* ------ Developers Section ------ */
|
| 167 |
+
.developers {
|
| 168 |
+
margin-top: 30px;
|
| 169 |
+
}
|
| 170 |
+
.developers h2 {
|
| 171 |
+
font-size: 2em;
|
| 172 |
+
margin-bottom: 20px;
|
| 173 |
+
color: rgb(98, 162, 250);
|
| 174 |
+
cursor: pointer;
|
| 175 |
+
}
|
| 176 |
+
.developers h2:hover {
|
| 177 |
+
text-shadow: 0 0 20px rgba(98, 162, 250, 0.4);
|
| 178 |
+
}
|
| 179 |
+
.developer-card {
|
| 180 |
+
display: flex;
|
| 181 |
+
align-items: center;
|
| 182 |
+
background-color: rgb(24, 28, 38);
|
| 183 |
+
padding: 15px;
|
| 184 |
+
border-radius: 15px;
|
| 185 |
+
box-shadow: 0 8px 15px rgba(0, 0, 0, 0.4);
|
| 186 |
+
margin-bottom: 20px;
|
| 187 |
+
}
|
| 188 |
+
.profile img {
|
| 189 |
+
width: 100px;
|
| 190 |
+
height: 100px;
|
| 191 |
+
border-radius: 50%;
|
| 192 |
+
border: 2px solid rgb(98, 162, 250);
|
| 193 |
+
}
|
| 194 |
+
.details {
|
| 195 |
+
margin-left: 20px;
|
| 196 |
+
display: flex;
|
| 197 |
+
width: 80%;
|
| 198 |
+
margin: auto;
|
| 199 |
+
justify-content: space-between;
|
| 200 |
+
}
|
| 201 |
+
.details p {
|
| 202 |
+
font-size: 1.2em;
|
| 203 |
+
margin-bottom: 10px;
|
| 204 |
+
}
|
| 205 |
+
.developer {
|
| 206 |
+
display: inline-block;
|
| 207 |
+
width: 200px;
|
| 208 |
+
}
|
| 209 |
+
.developer-name {
|
| 210 |
+
cursor: pointer;
|
| 211 |
+
width: 200px;
|
| 212 |
+
}
|
| 213 |
+
.developer-name:hover {
|
| 214 |
+
text-shadow: 0 0 20px rgba(255, 255, 255, 0.4);
|
| 215 |
+
}
|
| 216 |
+
.social-icons {
|
| 217 |
+
display: flex;
|
| 218 |
+
|
| 219 |
+
}
|
| 220 |
+
.social-icons a {
|
| 221 |
+
text-decoration: none;
|
| 222 |
+
margin: 0px 15px;
|
| 223 |
+
}
|
| 224 |
+
.fa-brands.fa-linkedin.fa-2xl {
|
| 225 |
+
color: gray
|
| 226 |
+
}
|
| 227 |
+
.fa-brands.fa-linkedin.fa-2xl:hover {
|
| 228 |
+
color: rgb(98, 162, 250);
|
| 229 |
+
text-shadow: 0 0 20px rgba(98, 162, 250, 0.9);
|
| 230 |
+
}
|
| 231 |
+
.fa-brands.fa-github.fa-2xl {
|
| 232 |
+
color: gray;
|
| 233 |
+
}
|
| 234 |
+
.fa-brands.fa-github.fa-2xl:hover {
|
| 235 |
+
/* color: rgb(98, 162, 250); */
|
| 236 |
+
color: rgb(255, 255, 255);
|
| 237 |
+
text-shadow: 0 0 20px rgba(98, 162, 250, 0.9);
|
| 238 |
+
}
|
| 239 |
+
/* ----------- Media Query Section ------------ */
|
| 240 |
+
@media screen and (max-width: 768px) {
|
| 241 |
+
.main-container {
|
| 242 |
+
max-width: 600px;
|
| 243 |
+
margin: 0 auto;
|
| 244 |
+
padding: 20px;
|
| 245 |
+
}
|
| 246 |
+
.row {
|
| 247 |
+
display: block;
|
| 248 |
+
}
|
| 249 |
+
.tech-card {
|
| 250 |
+
margin: auto;
|
| 251 |
+
width: 100%;
|
| 252 |
+
bottom: 10px;
|
| 253 |
+
margin-bottom: 20px;
|
| 254 |
+
height: 215px;
|
| 255 |
+
}
|
| 256 |
+
}
|
| 257 |
+
@media screen and (max-width: 600px) {
|
| 258 |
+
.about p,
|
| 259 |
+
.about ul {
|
| 260 |
+
font-size: 16px;
|
| 261 |
+
margin-bottom: 10px;
|
| 262 |
+
}
|
| 263 |
+
}
|
| 264 |
+
@media screen and (max-width: 500px) {
|
| 265 |
+
/* .main-container {
|
| 266 |
+
max-width: 600px;
|
| 267 |
+
margin: 0 auto;
|
| 268 |
+
padding: 20px;
|
| 269 |
+
} */
|
| 270 |
+
header h1 {
|
| 271 |
+
font-size: 35px;
|
| 272 |
+
}
|
| 273 |
+
header h2 {
|
| 274 |
+
font-size: 28px;
|
| 275 |
+
}
|
| 276 |
+
.about h3 {
|
| 277 |
+
font-size: 1.5em;
|
| 278 |
+
}
|
| 279 |
+
.technologies h2 {
|
| 280 |
+
font-size: 1.5em;
|
| 281 |
+
}
|
| 282 |
+
.developers h2 {
|
| 283 |
+
font-size: 1.5em;
|
| 284 |
+
}
|
| 285 |
+
.details p {
|
| 286 |
+
font-size: 17px;
|
| 287 |
+
margin-bottom: 10px;
|
| 288 |
+
display: block;
|
| 289 |
+
}
|
| 290 |
+
.developer {
|
| 291 |
+
width: 50%;
|
| 292 |
+
margin: auto;
|
| 293 |
+
display: block;
|
| 294 |
+
margin-top: 10px;
|
| 295 |
+
}
|
| 296 |
+
}
|
| 297 |
+
@media screen and (max-width: 420px) {
|
| 298 |
+
.about p,
|
| 299 |
+
.about ul {
|
| 300 |
+
font-size: 15px;
|
| 301 |
+
}
|
| 302 |
+
p {
|
| 303 |
+
font-size: 15px;
|
| 304 |
+
}
|
| 305 |
+
header h1 {
|
| 306 |
+
font-size: 28px;
|
| 307 |
+
}
|
| 308 |
+
header h2 {
|
| 309 |
+
font-size: 24px;
|
| 310 |
+
}
|
| 311 |
+
.about h3 {
|
| 312 |
+
font-size: 1.2em;
|
| 313 |
+
}
|
| 314 |
+
.technologies h2 {
|
| 315 |
+
font-size: 1.2em;
|
| 316 |
+
}
|
| 317 |
+
.developers h2 {
|
| 318 |
+
font-size: 1.2em;
|
| 319 |
+
}
|
| 320 |
+
.details {
|
| 321 |
+
display: block;
|
| 322 |
+
}
|
| 323 |
+
}
|
| 324 |
+
@media screen and (max-width: 375px) {
|
| 325 |
+
header h1 {
|
| 326 |
+
font-size: 24px;
|
| 327 |
+
}
|
| 328 |
+
header h2 {
|
| 329 |
+
font-size: 20px;
|
| 330 |
+
}
|
| 331 |
+
.about h3 {
|
| 332 |
+
font-size: 1.1em;
|
| 333 |
+
}
|
| 334 |
+
.technologies h2 {
|
| 335 |
+
font-size: 1.1em;
|
| 336 |
+
}
|
| 337 |
+
.developers h2 {
|
| 338 |
+
font-size: 1.1em;
|
| 339 |
+
}
|
| 340 |
+
.details {
|
| 341 |
+
display: block;
|
| 342 |
+
}
|
| 343 |
+
}
|
| 344 |
+
</style>
|
| 345 |
+
</head>
|
| 346 |
+
<body>
|
| 347 |
+
<!-- Main Container -->
|
| 348 |
+
<div class="main-container">
|
| 349 |
+
<!-- Header Section -->
|
| 350 |
+
<header>
|
| 351 |
+
<h1 class="hover">About Project</h1>
|
| 352 |
+
</header>
|
| 353 |
+
<!-- About Project Section -->
|
| 354 |
+
<section class="about">
|
| 355 |
+
<p>
|
| 356 |
+
In the digital era, information overload is a major challenge. Our <strong>AI for Smart Document
|
| 357 |
+
Summarization
|
| 358 |
+
and Automated Question Generation </strong> aims to simplify content processing by leveraging
|
| 359 |
+
advanced natural
|
| 360 |
+
language processing (NLP) techniques.
|
| 361 |
+
This project is designed for students, researchers, content creators, and professionals who need quick
|
| 362 |
+
insights from large volumes of data. By automating summarization and question generation, our AI
|
| 363 |
+
solution enhances productivity and knowledge retention effortlessly.
|
| 364 |
+
</p>
|
| 365 |
+
<p><strong>How It Works:</strong></p>
|
| 366 |
+
<ul>
|
| 367 |
+
<li>
|
| 368 |
+
Input β Paste text, enter a URL, or upload a document.
|
| 369 |
+
</li>
|
| 370 |
+
<li>
|
| 371 |
+
Processing β The AI extracts essential insights, removes redundancies, and formulates a structured
|
| 372 |
+
summary.
|
| 373 |
+
</li>
|
| 374 |
+
<li>
|
| 375 |
+
Output β Receive a well-structured summary and a set of AI-generated questions for further analysis.
|
| 376 |
+
</li>
|
| 377 |
+
</ul>
|
| 378 |
+
<p>
|
| 379 |
+
<p><strong>Key features include:</strong></p>
|
| 380 |
+
<ul>
|
| 381 |
+
<li>
|
| 382 |
+
Smart Summarization β Input any text, URL, or document, and our AI-powered system generates a
|
| 383 |
+
concise and meaningful summary.
|
| 384 |
+
</li>
|
| 385 |
+
<li>
|
| 386 |
+
Automated Question Generation β Transform lengthy articles into relevant questions, making it ideal
|
| 387 |
+
for study materials, research, and knowledge assessment.
|
| 388 |
+
</li>
|
| 389 |
+
</ul>
|
| 390 |
+
<p>
|
| 391 |
+
Developed using <strong>Streamlit</strong>, this project demonstrates cutting-edge technology with an
|
| 392 |
+
interactive and user-friendly interface.
|
| 393 |
+
</p>
|
| 394 |
+
</section>
|
| 395 |
+
<!-- Teechnologies Used Section -->
|
| 396 |
+
<section class="technologies">
|
| 397 |
+
<h2>Technologies Used</h2>
|
| 398 |
+
<div class="technology-card">
|
| 399 |
+
<!-- Row 1 -->
|
| 400 |
+
<div class="row">
|
| 401 |
+
<div class="tech-card">
|
| 402 |
+
<h4>Python</h4>
|
| 403 |
+
<p>
|
| 404 |
+
Used as the core programming language for implementing document summarization and automated question generation, leveraging NLP models and AI algorithms for efficient text processing.
|
| 405 |
+
</p>
|
| 406 |
+
</div>
|
| 407 |
+
<div class="tech-card">
|
| 408 |
+
<h4>HTML</h4>
|
| 409 |
+
<p>
|
| 410 |
+
Used to structure the web pages, including sections like "About Project," "Technologies Used," and user interaction areas, ensuring a clean and semantic layout.
|
| 411 |
+
</p>
|
| 412 |
+
</div>
|
| 413 |
+
</div>
|
| 414 |
+
<!-- Row 2 -->
|
| 415 |
+
<div class="row">
|
| 416 |
+
<div class="tech-card">
|
| 417 |
+
<h4>CSS</h4>
|
| 418 |
+
<p>
|
| 419 |
+
Utilized for styling the web interface, improving visual aesthetics, adding animations, and ensuring a responsive design across different devices.
|
| 420 |
+
</p>
|
| 421 |
+
</div>
|
| 422 |
+
<div class="tech-card">
|
| 423 |
+
<h4>JavaScript</h4>
|
| 424 |
+
<p>
|
| 425 |
+
Implemented interactive elements like real-time updates, animations, and user-friendly components to enhance the overall experience.
|
| 426 |
+
</p>
|
| 427 |
+
</div>
|
| 428 |
+
</div>
|
| 429 |
+
<!-- Row 3 -->
|
| 430 |
+
<div class="row">
|
| 431 |
+
<div class="tech-card">
|
| 432 |
+
<h4>Streamlit</h4>
|
| 433 |
+
<p>
|
| 434 |
+
Used to develop the web-based interface, allowing users to input text, URLs, or documents for summarization and question generation seamlessly.
|
| 435 |
+
</p>
|
| 436 |
+
</div>
|
| 437 |
+
<div class="tech-card">
|
| 438 |
+
<h4>Facebook-BART</h4>
|
| 439 |
+
<p>
|
| 440 |
+
A powerful transformer-based model used for text summarization and natural language understanding, ensuring high-quality summaries with contextual relevance.
|
| 441 |
+
</p>
|
| 442 |
+
</div>
|
| 443 |
+
</div>
|
| 444 |
+
<!-- Row 4 -->
|
| 445 |
+
<div class="row">
|
| 446 |
+
<div class="tech-card">
|
| 447 |
+
<h4>Transformers (Hugging Face)</h4>
|
| 448 |
+
<p>
|
| 449 |
+
Leverages pre-trained transformer models for advanced natural language processing tasks, optimizing summarization and question generation.
|
| 450 |
+
</p>
|
| 451 |
+
</div>
|
| 452 |
+
<div class="tech-card">
|
| 453 |
+
<h4>PDFMiner</h4>
|
| 454 |
+
<p>
|
| 455 |
+
Used for extracting text from PDF files, enabling document-based summarization and automated question generation without losing content integrity.
|
| 456 |
+
</p>
|
| 457 |
+
</div>
|
| 458 |
+
</div><!-- Row 5 -->
|
| 459 |
+
<div class="row">
|
| 460 |
+
<div class="tech-card">
|
| 461 |
+
<h4>Web Summarizer & Q&A Module</h4>
|
| 462 |
+
<p>
|
| 463 |
+
An AI-powered system designed to extract key insights from online content, generating concise summaries and relevant questions for better comprehension.
|
| 464 |
+
</p>
|
| 465 |
+
</div>
|
| 466 |
+
<div class="tech-card">
|
| 467 |
+
<h4>Text Processing</h4>
|
| 468 |
+
<p>
|
| 469 |
+
Handles raw text input efficiently, ensuring proper parsing, cleaning, and analysis before summarization and question generation.
|
| 470 |
+
</p>
|
| 471 |
+
</div>
|
| 472 |
+
</div>
|
| 473 |
+
<!-- Row 6 -->
|
| 474 |
+
<div class="row">
|
| 475 |
+
<div class="tech-card">
|
| 476 |
+
<h4>PDF Support</h4>
|
| 477 |
+
<p>
|
| 478 |
+
Enables users to upload and process PDF documents, extracting text to generate summaries and structured questions automatically.
|
| 479 |
+
</p>
|
| 480 |
+
</div>
|
| 481 |
+
<!-- <div class="tech-card">
|
| 482 |
+
<h4>Text Processing</h4>
|
| 483 |
+
<p>
|
| 484 |
+
Handles raw text input efficiently, ensuring proper parsing, cleaning, and analysis before summarization and question generation.
|
| 485 |
+
</p>
|
| 486 |
+
</div> -->
|
| 487 |
+
</div>
|
| 488 |
+
</div>
|
| 489 |
+
</section>
|
| 490 |
+
<!-- Developers Section -->
|
| 491 |
+
<section class="developers">
|
| 492 |
+
<h2>Developers</h2>
|
| 493 |
+
<div class="developer-card">
|
| 494 |
+
<div class="details">
|
| 495 |
+
<!-- Nishant -->
|
| 496 |
+
<div class="developer">
|
| 497 |
+
<p class="developer-name"><strong>Nishant Maity</strong></p>
|
| 498 |
+
<div class="social-icons">
|
| 499 |
+
<a href="https://www.linkedin.com/in/nishant-maity/" target="_blank">
|
| 500 |
+
<i class="fa-brands fa-linkedin fa-2xl"></i>
|
| 501 |
+
</a>
|
| 502 |
+
<a href="https://github.com/Nishant43S" target="_blank">
|
| 503 |
+
<i class="fa-brands fa-github fa-2xl"></i>
|
| 504 |
+
</a>
|
| 505 |
+
</div>
|
| 506 |
+
</div>
|
| 507 |
+
<!-- Yash -->
|
| 508 |
+
<div class="developer">
|
| 509 |
+
<p class="developer-name"><strong>Yash Sahu</strong></p>
|
| 510 |
+
<div class="social-icons">
|
| 511 |
+
<a href="https://www.linkedin.com/in/yashsahu02" target="_blank">
|
| 512 |
+
<i class="fa-brands fa-linkedin fa-2xl"></i>
|
| 513 |
+
</a>
|
| 514 |
+
<a href="https://github.com/yashsahu02" target="_blank">
|
| 515 |
+
<i class="fa-brands fa-github fa-2xl"></i>
|
| 516 |
+
</a>
|
| 517 |
+
</div>
|
| 518 |
+
</div>
|
| 519 |
+
</div>
|
| 520 |
+
</div>
|
| 521 |
+
</section>
|
| 522 |
+
</div>
|
| 523 |
+
<!-- FontAwesome for icons -->
|
| 524 |
+
<script src="https://kit.fontawesome.com/a076d05399.js" crossorigin="anonymous"></script>
|
| 525 |
+
</body>
|
| 526 |
+
</html>
|
data/requirements.txt
ADDED
|
Binary file (18.6 kB). View file
|
|
|
data/text.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import re
|
| 3 |
+
from cleantext import clean
|
| 4 |
+
import streamlit.components.v1 as component
|
| 5 |
+
from transformers import pipeline
|
| 6 |
+
from functions import Copy_Text
|
| 7 |
+
from functions import *
|
| 8 |
+
|
| 9 |
+
# page settings
|
| 10 |
+
st.set_page_config(
|
| 11 |
+
layout="wide",
|
| 12 |
+
initial_sidebar_state="collapsed"
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
### insert external css
|
| 16 |
+
def insert_css(css_file:str):
|
| 17 |
+
with open(css_file) as f:
|
| 18 |
+
st.markdown(f"<style>{f.read()}</style>",unsafe_allow_html=True)
|
| 19 |
+
|
| 20 |
+
# app settings css
|
| 21 |
+
insert_css("css_files/app.css")
|
| 22 |
+
|
| 23 |
+
# sidebar
|
| 24 |
+
app_sidebar = st.sidebar
|
| 25 |
+
with app_sidebar:
|
| 26 |
+
select_mode = st.selectbox(
|
| 27 |
+
label="Select Mode",
|
| 28 |
+
options=["Summarizer","Que/Ans"],
|
| 29 |
+
key="mode selector",
|
| 30 |
+
index=0
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
if select_mode == "Que/Ans":
|
| 34 |
+
st.write("### Que/Ans Settings")
|
| 35 |
+
|
| 36 |
+
max_answer_length = st.slider(
|
| 37 |
+
label="Max answer",
|
| 38 |
+
min_value=1,
|
| 39 |
+
max_value=10,
|
| 40 |
+
key="max answer",
|
| 41 |
+
value=4
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
max_answer_length = max_answer_length*10
|
| 45 |
+
|
| 46 |
+
Best_size = st.slider(
|
| 47 |
+
label="n best size",
|
| 48 |
+
min_value=1,
|
| 49 |
+
max_value=10,
|
| 50 |
+
key="best size",
|
| 51 |
+
value=5
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
# initilize session state
|
| 55 |
+
if 'summary' not in st.session_state:
|
| 56 |
+
st.session_state.summary = []
|
| 57 |
+
|
| 58 |
+
app_col = st.columns([2,8,2])
|
| 59 |
+
|
| 60 |
+
with app_col[1]:
|
| 61 |
+
|
| 62 |
+
if select_mode == "Summarizer":
|
| 63 |
+
st.write("## Text Summarizer")
|
| 64 |
+
elif select_mode == "Que/Ans":
|
| 65 |
+
st.write("## π Text Question Answering")
|
| 66 |
+
|
| 67 |
+
#################### question answering ####################
|
| 68 |
+
|
| 69 |
+
if select_mode == "Que/Ans":
|
| 70 |
+
app_c = st.columns([2,8,2])
|
| 71 |
+
with app_c[0]:
|
| 72 |
+
pass
|
| 73 |
+
with app_c[1]:
|
| 74 |
+
# Inject custom CSS to place the chat input at the bottom
|
| 75 |
+
st.markdown(
|
| 76 |
+
"""
|
| 77 |
+
<style>
|
| 78 |
+
/* Fix the chat input box at the bottom */
|
| 79 |
+
div[data-testid="stChatInput"] {
|
| 80 |
+
position: fixed;
|
| 81 |
+
bottom: 0;
|
| 82 |
+
margin-bottom: 36px;
|
| 83 |
+
|
| 84 |
+
}
|
| 85 |
+
</style>
|
| 86 |
+
""",
|
| 87 |
+
unsafe_allow_html=True
|
| 88 |
+
)
|
| 89 |
+
# Load model
|
| 90 |
+
qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
|
| 91 |
+
|
| 92 |
+
# Initialize session state
|
| 93 |
+
if "messages" not in st.session_state:
|
| 94 |
+
st.session_state.messages = []
|
| 95 |
+
|
| 96 |
+
# User inputs context
|
| 97 |
+
context = st.text_area("π Enter Text Hear", "", height=200)
|
| 98 |
+
context = Text_Cleaning(context)
|
| 99 |
+
|
| 100 |
+
# Display chat history
|
| 101 |
+
for message in st.session_state.messages:
|
| 102 |
+
with st.chat_message(message["role"]):
|
| 103 |
+
st.markdown(message["content"])
|
| 104 |
+
|
| 105 |
+
if context:
|
| 106 |
+
user_input = st.chat_input("π¬ Ask a question ",)
|
| 107 |
+
if user_input:
|
| 108 |
+
with st.chat_message("user"):
|
| 109 |
+
st.markdown(user_input)
|
| 110 |
+
|
| 111 |
+
st.session_state.messages.append({"role": "user", "content": user_input})
|
| 112 |
+
|
| 113 |
+
with st.spinner("π€ Thinking..."):
|
| 114 |
+
response = qa_pipeline({"question": user_input, "context": context},
|
| 115 |
+
max_answer_len=max_answer_length, n_best_size=Best_size)
|
| 116 |
+
answer = response["answer"]
|
| 117 |
+
|
| 118 |
+
with st.chat_message("assistant"):
|
| 119 |
+
st.markdown(f"{answer}")
|
| 120 |
+
|
| 121 |
+
st.session_state.messages.append({"role": "assistant", "content": f"{answer}"})
|
| 122 |
+
|
| 123 |
+
# Clear chat history button
|
| 124 |
+
if st.button("ποΈ Clear Chat"):
|
| 125 |
+
st.session_state.messages = []
|
| 126 |
+
st.rerun()
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
############ summarizer ###########
|
| 130 |
+
|
| 131 |
+
app_sum_col = st.columns([2,8,2])
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
# add session state
|
| 135 |
+
if 'summary' not in st.session_state:
|
| 136 |
+
st.session_state.summary = []
|
| 137 |
+
|
| 138 |
+
with app_sum_col[1]:
|
| 139 |
+
if select_mode == "Summarizer":
|
| 140 |
+
Text_input = st.text_area(label="π Enter Text Hear",key="Summarizer input",height=220)
|
| 141 |
+
Text_input = Text_Cleaning(Text_input)
|
| 142 |
+
|
| 143 |
+
if Text_input.strip() != "":
|
| 144 |
+
st.session_state.summary = []
|
| 145 |
+
|
| 146 |
+
value_func = lambda x: x * 0.3
|
| 147 |
+
# max length
|
| 148 |
+
max_tokens = st.slider(
|
| 149 |
+
label="Max Length",
|
| 150 |
+
key="max length",
|
| 151 |
+
min_value=1,
|
| 152 |
+
max_value=len(Text_input.split()),
|
| 153 |
+
value=int(value_func(len(Text_input.split())))
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
+
if st.button(label="π Generate Summary "):
|
| 157 |
+
try:
|
| 158 |
+
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
| 159 |
+
st.session_state.summary = summarizer(
|
| 160 |
+
Text_input,
|
| 161 |
+
max_length=max_tokens+20,
|
| 162 |
+
min_length=max_tokens,
|
| 163 |
+
do_sample=False
|
| 164 |
+
)
|
| 165 |
+
|
| 166 |
+
except Exception as e:
|
| 167 |
+
st.warning(f"Error...\n{e}",icon="β οΈ")
|
| 168 |
+
|
| 169 |
+
if st.session_state.summary:
|
| 170 |
+
with st.spinner("Generating Summary..."):
|
| 171 |
+
st.write("### Summary")
|
| 172 |
+
generated_summary = st.session_state.summary[0]['summary_text']
|
| 173 |
+
st.write(generated_summary)
|
| 174 |
+
Copy_Text(generated_summary)
|
data/webscraper.py
ADDED
|
@@ -0,0 +1,200 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import requests
|
| 3 |
+
from bs4 import BeautifulSoup
|
| 4 |
+
import re
|
| 5 |
+
from cleantext import clean
|
| 6 |
+
import streamlit.components.v1 as component
|
| 7 |
+
from transformers import pipeline
|
| 8 |
+
from functions import Copy_Text
|
| 9 |
+
from functions import *
|
| 10 |
+
|
| 11 |
+
### import animation
|
| 12 |
+
def particle(Js_file):
|
| 13 |
+
with open(Js_file) as f:
|
| 14 |
+
component.html(f"{f.read()}", height=400)
|
| 15 |
+
|
| 16 |
+
### insert external css
|
| 17 |
+
def insert_css(css_file:str):
|
| 18 |
+
with open(css_file) as f:
|
| 19 |
+
st.markdown(f"<style>{f.read()}</style>",unsafe_allow_html=True)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
# page settings
|
| 23 |
+
st.set_page_config(
|
| 24 |
+
layout="wide",
|
| 25 |
+
initial_sidebar_state="collapsed"
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
# sidebar
|
| 29 |
+
app_sidebar = st.sidebar
|
| 30 |
+
with app_sidebar:
|
| 31 |
+
select_mode = st.selectbox(
|
| 32 |
+
label="Select Mode",
|
| 33 |
+
options=["Summarizer","Que/Ans"],
|
| 34 |
+
key="mode selector",
|
| 35 |
+
index=0
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
if select_mode == "Que/Ans":
|
| 39 |
+
st.write("### Que/Ans Settings")
|
| 40 |
+
|
| 41 |
+
max_answer_length = st.slider(
|
| 42 |
+
label="Max answer",
|
| 43 |
+
min_value=1,
|
| 44 |
+
max_value=10,
|
| 45 |
+
key="max answer",
|
| 46 |
+
value=4
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
max_answer_length = max_answer_length*10
|
| 50 |
+
|
| 51 |
+
Best_size = st.slider(
|
| 52 |
+
label="n best size",
|
| 53 |
+
min_value=1,
|
| 54 |
+
max_value=10,
|
| 55 |
+
key="best size",
|
| 56 |
+
value=5
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
# Initialize session state
|
| 60 |
+
if 'scraped_paragraphs' not in st.session_state:
|
| 61 |
+
st.session_state.scraped_paragraphs = []
|
| 62 |
+
if 'summarizer_mode' not in st.session_state:
|
| 63 |
+
st.session_state.summarizer_mode = False
|
| 64 |
+
if 'summary' not in st.session_state:
|
| 65 |
+
st.session_state.summary = []
|
| 66 |
+
|
| 67 |
+
app_col = st.columns([2,8,2],gap="small")
|
| 68 |
+
|
| 69 |
+
with app_col[0]:
|
| 70 |
+
pass
|
| 71 |
+
|
| 72 |
+
with app_col[2]:
|
| 73 |
+
pass
|
| 74 |
+
|
| 75 |
+
with app_col[1]:
|
| 76 |
+
# Title
|
| 77 |
+
st.write("## GenAi Scraper")
|
| 78 |
+
|
| 79 |
+
# Input URL
|
| 80 |
+
url_input = st.text_input(label="Enter Website URL",key="url input",placeholder="https://www.example.com")
|
| 81 |
+
|
| 82 |
+
# number of paragraphs
|
| 83 |
+
num_paragraphs = st.slider("Select number of paragraphs to scrape", 1, 30, 5)
|
| 84 |
+
|
| 85 |
+
scrap_btn = st.button("Scrape Paragraphs",key="scrap button")
|
| 86 |
+
|
| 87 |
+
if url_input.strip() == "" and not scrap_btn:
|
| 88 |
+
# animation
|
| 89 |
+
particle("animation/particles.html")
|
| 90 |
+
|
| 91 |
+
else:
|
| 92 |
+
if scrap_btn:
|
| 93 |
+
st.session_state.scraped_paragraphs = scrape_paragraphs(url_input, num_paragraphs)
|
| 94 |
+
st.session_state.summary = [] # Reset summary
|
| 95 |
+
|
| 96 |
+
# Display scraped paragraphs
|
| 97 |
+
if st.session_state.scraped_paragraphs:
|
| 98 |
+
|
| 99 |
+
st.write("### Scraped Paragraphs")
|
| 100 |
+
|
| 101 |
+
paragraph_scrap = "\n\n".join(st.session_state.scraped_paragraphs)
|
| 102 |
+
st.write(Text_Cleaning(paragraph_scrap))
|
| 103 |
+
|
| 104 |
+
Copy_Text(Text_Cleaning(paragraph_scrap)) ## copy text
|
| 105 |
+
|
| 106 |
+
#################### summarizer #############
|
| 107 |
+
|
| 108 |
+
if select_mode == "Summarizer":
|
| 109 |
+
if st.session_state.scraped_paragraphs:
|
| 110 |
+
# Toggle for summarization mode
|
| 111 |
+
st.session_state.summarizer_mode = st.toggle("Enable Summarizer Mode", st.session_state.summarizer_mode)
|
| 112 |
+
|
| 113 |
+
if st.session_state.summarizer_mode:
|
| 114 |
+
value_func = lambda x: x * 0.3
|
| 115 |
+
max_tokens = st.slider(label="Select Max Token Length", min_value=10,
|
| 116 |
+
max_value=sum(len(p.split()) for p in st.session_state.scraped_paragraphs),
|
| 117 |
+
value=int(value_func(
|
| 118 |
+
sum(len(p.split()) for p in st.session_state.scraped_paragraphs)
|
| 119 |
+
))
|
| 120 |
+
)
|
| 121 |
+
if st.button("π Generate Summary"):
|
| 122 |
+
with st.spinner("Generating Summary..."):
|
| 123 |
+
try:
|
| 124 |
+
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
| 125 |
+
st.session_state.summary = summarizer(
|
| 126 |
+
Text_Cleaning(" ".join(st.session_state.scraped_paragraphs)),
|
| 127 |
+
max_length=max_tokens+20,
|
| 128 |
+
min_length=max_tokens,
|
| 129 |
+
do_sample=False
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
except Exception as e:
|
| 133 |
+
st.warning(f"Error...\n{e}",icon="β οΈ")
|
| 134 |
+
|
| 135 |
+
# Display summary
|
| 136 |
+
if st.session_state.summary:
|
| 137 |
+
st.write("### Summary")
|
| 138 |
+
generated_summary = st.session_state.summary[0]['summary_text']
|
| 139 |
+
st.write(generated_summary)
|
| 140 |
+
Copy_Text(generated_summary)
|
| 141 |
+
|
| 142 |
+
################# question answering #####################
|
| 143 |
+
|
| 144 |
+
elif select_mode == "Que/Ans":
|
| 145 |
+
if st.session_state.scraped_paragraphs:
|
| 146 |
+
if st.toggle(label="Question Answering",key="Q/A"):
|
| 147 |
+
# Inject custom CSS to place the chat input at the bottom
|
| 148 |
+
st.markdown(
|
| 149 |
+
"""
|
| 150 |
+
<style>
|
| 151 |
+
/* Fix the chat input box at the bottom */
|
| 152 |
+
div[data-testid="stChatInput"] {
|
| 153 |
+
position: fixed;
|
| 154 |
+
bottom: 0;
|
| 155 |
+
margin-bottom: 36px;
|
| 156 |
+
|
| 157 |
+
}
|
| 158 |
+
</style>
|
| 159 |
+
""",
|
| 160 |
+
unsafe_allow_html=True
|
| 161 |
+
)
|
| 162 |
+
qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
|
| 163 |
+
|
| 164 |
+
# Initialize session state
|
| 165 |
+
if "messages" not in st.session_state:
|
| 166 |
+
st.session_state.messages = []
|
| 167 |
+
|
| 168 |
+
# User inputs context
|
| 169 |
+
context = Text_Cleaning(paragraph_scrap)
|
| 170 |
+
|
| 171 |
+
# Display chat history
|
| 172 |
+
for message in st.session_state.messages:
|
| 173 |
+
with st.chat_message(message["role"]):
|
| 174 |
+
st.markdown(message["content"])
|
| 175 |
+
|
| 176 |
+
if context:
|
| 177 |
+
user_input = st.chat_input("π¬ Ask a question ",)
|
| 178 |
+
if user_input:
|
| 179 |
+
with st.chat_message("user"):
|
| 180 |
+
st.markdown(user_input)
|
| 181 |
+
|
| 182 |
+
st.session_state.messages.append({"role": "user", "content": user_input})
|
| 183 |
+
|
| 184 |
+
with st.spinner("π€ Thinking..."):
|
| 185 |
+
response = qa_pipeline({"question": user_input, "context": context},
|
| 186 |
+
max_answer_len=max_answer_length, n_best_size=Best_size)
|
| 187 |
+
answer = response["answer"]
|
| 188 |
+
|
| 189 |
+
with st.chat_message("assistant"):
|
| 190 |
+
st.markdown(f"{answer}")
|
| 191 |
+
|
| 192 |
+
st.session_state.messages.append({"role": "assistant", "content": f"{answer}"})
|
| 193 |
+
|
| 194 |
+
# Clear chat history button
|
| 195 |
+
if st.button("ποΈ Clear Chat"):
|
| 196 |
+
st.session_state.messages = []
|
| 197 |
+
st.rerun()
|
| 198 |
+
|
| 199 |
+
# app settings css
|
| 200 |
+
insert_css("css_files/app.css")
|