Spaces:
Runtime error
Runtime error
ddovidovich commited on
Commit ·
75539bd
1
Parent(s): 35412cc
pdf fix
Browse files- app.py +10 -4
- packages.txt +0 -1
app.py
CHANGED
|
@@ -15,7 +15,7 @@ from langchain.chains.question_answering import load_qa_chain
|
|
| 15 |
from PIL import Image
|
| 16 |
from datetime import datetime
|
| 17 |
from tempfile import NamedTemporaryFile
|
| 18 |
-
|
| 19 |
|
| 20 |
st.subheader("Upload CV in PDF or image format")
|
| 21 |
uploaded_file = st.file_uploader("Upload PDF or Images", type=["pdf","png","jpg","jpeg"])
|
|
@@ -36,9 +36,15 @@ if uploaded_file:
|
|
| 36 |
with NamedTemporaryFile(delete=False, dir='.', suffix='.pdf') as f:
|
| 37 |
f.write(uploaded_file.getbuffer())
|
| 38 |
PDFFileName = f.name
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
# with open(PDFFileName,"rb") as f:
|
| 43 |
# base64_pdf = base64.b64encode(f.read()).decode('utf-8')
|
| 44 |
# pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="800" height="800" type="application/pdf"></iframe>'
|
|
|
|
| 15 |
from PIL import Image
|
| 16 |
from datetime import datetime
|
| 17 |
from tempfile import NamedTemporaryFile
|
| 18 |
+
import pypdfium2 as pdfium
|
| 19 |
|
| 20 |
st.subheader("Upload CV in PDF or image format")
|
| 21 |
uploaded_file = st.file_uploader("Upload PDF or Images", type=["pdf","png","jpg","jpeg"])
|
|
|
|
| 36 |
with NamedTemporaryFile(delete=False, dir='.', suffix='.pdf') as f:
|
| 37 |
f.write(uploaded_file.getbuffer())
|
| 38 |
PDFFileName = f.name
|
| 39 |
+
pdf = pdfium.PdfDocument("my_pdf_file.pdf")
|
| 40 |
+
n_pages = len(pdf)
|
| 41 |
+
for page_number in range(n_pages):
|
| 42 |
+
page = pdf.get_page(page_number)
|
| 43 |
+
pil_image = page.render_topil(scale=1,rotation=0,crop=(0, 0, 0, 0),colour=(255, 255, 255, 255),annotations=True,greyscale=False,optimise_mode=pdfium.OptimiseMode.NONE,)
|
| 44 |
+
st.image(pil_image,width=700)
|
| 45 |
+
# images = convert_from_path(PDFFileName)
|
| 46 |
+
# for i in range(len(images)):
|
| 47 |
+
# st.image(images[i],width=700)
|
| 48 |
# with open(PDFFileName,"rb") as f:
|
| 49 |
# base64_pdf = base64.b64encode(f.read()).decode('utf-8')
|
| 50 |
# pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="800" height="800" type="application/pdf"></iframe>'
|
packages.txt
CHANGED
|
@@ -2,4 +2,3 @@ poppler-utils
|
|
| 2 |
tesseract-ocr
|
| 3 |
chromium
|
| 4 |
chromium-driver
|
| 5 |
-
pdf2image
|
|
|
|
| 2 |
tesseract-ocr
|
| 3 |
chromium
|
| 4 |
chromium-driver
|
|
|