Spaces:
Sleeping
Sleeping
Commit
·
0e6c3f4
1
Parent(s):
aaaf3f2
display uploaded pdf
Browse files- Examples/test.txt +0 -0
- __pycache__/page.cpython-310.pyc +0 -0
- __pycache__/summarize.cpython-310.pyc +0 -0
- page.py +15 -2
- requirements.txt +1 -0
- summarize.py +22 -4
Examples/test.txt
DELETED
|
File without changes
|
__pycache__/page.cpython-310.pyc
CHANGED
|
Binary files a/__pycache__/page.cpython-310.pyc and b/__pycache__/page.cpython-310.pyc differ
|
|
|
__pycache__/summarize.cpython-310.pyc
CHANGED
|
Binary files a/__pycache__/summarize.cpython-310.pyc and b/__pycache__/summarize.cpython-310.pyc differ
|
|
|
page.py
CHANGED
|
@@ -1,8 +1,14 @@
|
|
| 1 |
import streamlit as st
|
|
|
|
|
|
|
| 2 |
from summarize import Summarizer
|
| 3 |
import pdfplumber
|
| 4 |
|
| 5 |
def createDemoPage(path):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
with st.spinner("Summarizing text..."):
|
| 7 |
summarizer = Summarizer(model = "groq")
|
| 8 |
text = readpdf("Examples/Kris.pdf")
|
|
@@ -11,8 +17,15 @@ def createDemoPage(path):
|
|
| 11 |
st.subheader("Summary")
|
| 12 |
st.write(summary)
|
| 13 |
|
| 14 |
-
st.subheader("Extracted Text")
|
| 15 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
def readpdf(path):
|
| 18 |
text = ""
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
+
from streamlit import session_state as ss
|
| 3 |
+
from streamlit_pdf_viewer import pdf_viewer
|
| 4 |
from summarize import Summarizer
|
| 5 |
import pdfplumber
|
| 6 |
|
| 7 |
def createDemoPage(path):
|
| 8 |
+
# For displaying pdf
|
| 9 |
+
if 'pdf_ref' not in ss:
|
| 10 |
+
ss.pdf_ref = None
|
| 11 |
+
|
| 12 |
with st.spinner("Summarizing text..."):
|
| 13 |
summarizer = Summarizer(model = "groq")
|
| 14 |
text = readpdf("Examples/Kris.pdf")
|
|
|
|
| 17 |
st.subheader("Summary")
|
| 18 |
st.write(summary)
|
| 19 |
|
| 20 |
+
#st.subheader("Extracted Text")
|
| 21 |
+
with st.expander("Extracted Text", expanded = False):
|
| 22 |
+
st.write(text)
|
| 23 |
+
|
| 24 |
+
st.subheader("Original pdf")
|
| 25 |
+
with open(path, 'rb') as pdf_ref:
|
| 26 |
+
bytes_data = pdf_ref.read()
|
| 27 |
+
pdf_viewer(input=bytes_data, width=700)
|
| 28 |
+
|
| 29 |
|
| 30 |
def readpdf(path):
|
| 31 |
text = ""
|
requirements.txt
CHANGED
|
@@ -6,3 +6,4 @@ transformers
|
|
| 6 |
torch
|
| 7 |
groq
|
| 8 |
python-dotenv
|
|
|
|
|
|
| 6 |
torch
|
| 7 |
groq
|
| 8 |
python-dotenv
|
| 9 |
+
streamlit_pdf_viewer
|
summarize.py
CHANGED
|
@@ -15,18 +15,31 @@ from transformers import AutoTokenizer
|
|
| 15 |
import transformers
|
| 16 |
import torch
|
| 17 |
|
|
|
|
|
|
|
|
|
|
| 18 |
class Summarizer:
|
| 19 |
|
| 20 |
def __init__(self, model = "groq"):
|
| 21 |
self.model = model
|
| 22 |
|
| 23 |
def run_app(self):
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
if uploaded_file is not None:
|
|
|
|
| 27 |
if uploaded_file.type == "application/pdf":
|
| 28 |
with st.spinner("Extracting text from PDF..."):
|
| 29 |
text = self.extract_text_from_pdf(uploaded_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
else:
|
| 31 |
image = Image.open(uploaded_file)
|
| 32 |
with st.spinner("Extracting text from image..."):
|
|
@@ -37,9 +50,14 @@ class Summarizer:
|
|
| 37 |
summary = self.summarize_using_groq(text)
|
| 38 |
st.subheader("Summary")
|
| 39 |
st.write(summary)
|
| 40 |
-
|
| 41 |
-
st.
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
|
| 45 |
# Function to extract text from an image
|
|
|
|
| 15 |
import transformers
|
| 16 |
import torch
|
| 17 |
|
| 18 |
+
from streamlit import session_state as ss
|
| 19 |
+
from streamlit_pdf_viewer import pdf_viewer
|
| 20 |
+
|
| 21 |
class Summarizer:
|
| 22 |
|
| 23 |
def __init__(self, model = "groq"):
|
| 24 |
self.model = model
|
| 25 |
|
| 26 |
def run_app(self):
|
| 27 |
+
# For displaying pdf
|
| 28 |
+
if 'pdf_ref' not in ss:
|
| 29 |
+
ss.pdf_ref = None
|
| 30 |
+
|
| 31 |
+
uploaded_file = st.file_uploader("Upload an Image or PDF", type=["jpg", "jpeg", "png", "pdf"], key="file")
|
| 32 |
|
| 33 |
if uploaded_file is not None:
|
| 34 |
+
|
| 35 |
if uploaded_file.type == "application/pdf":
|
| 36 |
with st.spinner("Extracting text from PDF..."):
|
| 37 |
text = self.extract_text_from_pdf(uploaded_file)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
if ss.file:
|
| 41 |
+
ss.pdf_ref = ss.file
|
| 42 |
+
|
| 43 |
else:
|
| 44 |
image = Image.open(uploaded_file)
|
| 45 |
with st.spinner("Extracting text from image..."):
|
|
|
|
| 50 |
summary = self.summarize_using_groq(text)
|
| 51 |
st.subheader("Summary")
|
| 52 |
st.write(summary)
|
| 53 |
+
|
| 54 |
+
with st.expander("Extracted Text", expanded = False):
|
| 55 |
+
st.write(text)
|
| 56 |
+
|
| 57 |
+
if ss.pdf_ref:
|
| 58 |
+
st.subheader("Original pdf")
|
| 59 |
+
binary_data = ss.pdf_ref.getvalue()
|
| 60 |
+
pdf_viewer(input=binary_data, width=700)
|
| 61 |
|
| 62 |
|
| 63 |
# Function to extract text from an image
|