Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,6 +5,7 @@ import tempfile
|
|
| 5 |
import zipfile
|
| 6 |
|
| 7 |
def extract_text(pdf_file):
|
|
|
|
| 8 |
doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
|
| 9 |
text = ""
|
| 10 |
for page in doc:
|
|
@@ -12,6 +13,7 @@ def extract_text(pdf_file):
|
|
| 12 |
return text, None
|
| 13 |
|
| 14 |
def extract_images(pdf_file):
|
|
|
|
| 15 |
doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
|
| 16 |
output_dir = tempfile.mkdtemp()
|
| 17 |
img_count = 0
|
|
@@ -28,7 +30,7 @@ def extract_images(pdf_file):
|
|
| 28 |
with open(image_filename, "wb") as image_file:
|
| 29 |
image_file.write(image_bytes)
|
| 30 |
img_count += 1
|
| 31 |
-
except Exception
|
| 32 |
continue
|
| 33 |
|
| 34 |
if img_count == 0:
|
|
@@ -45,6 +47,7 @@ def extract_images(pdf_file):
|
|
| 45 |
def merge_pdfs(pdf_files):
|
| 46 |
merged_pdf = fitz.open()
|
| 47 |
for pdf_file in pdf_files:
|
|
|
|
| 48 |
with fitz.open(stream=pdf_file.read(), filetype="pdf") as doc:
|
| 49 |
merged_pdf.insert_pdf(doc)
|
| 50 |
temp_path = tempfile.mktemp(suffix=".pdf")
|
|
@@ -52,6 +55,7 @@ def merge_pdfs(pdf_files):
|
|
| 52 |
return "PDFs merged successfully.", temp_path
|
| 53 |
|
| 54 |
def split_pdf(pdf_file):
|
|
|
|
| 55 |
doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
|
| 56 |
output_dir = tempfile.mkdtemp()
|
| 57 |
for page_num in range(len(doc)):
|
|
|
|
| 5 |
import zipfile
|
| 6 |
|
| 7 |
def extract_text(pdf_file):
|
| 8 |
+
pdf_file.seek(0)
|
| 9 |
doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
|
| 10 |
text = ""
|
| 11 |
for page in doc:
|
|
|
|
| 13 |
return text, None
|
| 14 |
|
| 15 |
def extract_images(pdf_file):
|
| 16 |
+
pdf_file.seek(0)
|
| 17 |
doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
|
| 18 |
output_dir = tempfile.mkdtemp()
|
| 19 |
img_count = 0
|
|
|
|
| 30 |
with open(image_filename, "wb") as image_file:
|
| 31 |
image_file.write(image_bytes)
|
| 32 |
img_count += 1
|
| 33 |
+
except Exception:
|
| 34 |
continue
|
| 35 |
|
| 36 |
if img_count == 0:
|
|
|
|
| 47 |
def merge_pdfs(pdf_files):
|
| 48 |
merged_pdf = fitz.open()
|
| 49 |
for pdf_file in pdf_files:
|
| 50 |
+
pdf_file.seek(0)
|
| 51 |
with fitz.open(stream=pdf_file.read(), filetype="pdf") as doc:
|
| 52 |
merged_pdf.insert_pdf(doc)
|
| 53 |
temp_path = tempfile.mktemp(suffix=".pdf")
|
|
|
|
| 55 |
return "PDFs merged successfully.", temp_path
|
| 56 |
|
| 57 |
def split_pdf(pdf_file):
|
| 58 |
+
pdf_file.seek(0)
|
| 59 |
doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
|
| 60 |
output_dir = tempfile.mkdtemp()
|
| 61 |
for page_num in range(len(doc)):
|