Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,6 +14,7 @@ from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
|
|
| 14 |
from olmocr.data.renderpdf import render_pdf_to_base64png
|
| 15 |
from olmocr.prompts import build_finetuning_prompt
|
| 16 |
from olmocr.prompts.anchor import get_anchor_text
|
|
|
|
| 17 |
|
| 18 |
# Set Hugging Face and Torch cache to a guaranteed-writable location
|
| 19 |
cache_dir = "/tmp/huggingface_cache"
|
|
@@ -29,6 +30,9 @@ model = Qwen2VLForConditionalGeneration.from_pretrained(
|
|
| 29 |
).eval().to(device)
|
| 30 |
processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
|
| 31 |
|
|
|
|
|
|
|
|
|
|
| 32 |
def ocr_page(pdf_path, page_num):
|
| 33 |
image_b64 = render_pdf_to_base64png(pdf_path, page_num + 1, target_longest_image_dim=1024)
|
| 34 |
anchor_text = get_anchor_text(pdf_path, page_num + 1, pdf_engine="pdfreport", target_length=4000)
|
|
@@ -89,22 +93,19 @@ def create_epub_from_text(text, output_path, title, author, language, cover_imag
|
|
| 89 |
epub.write_epub(output_path, book)
|
| 90 |
|
| 91 |
def convert_pdf_to_epub(pdf_file, title, author, language):
|
| 92 |
-
|
| 93 |
-
tmp_pdf_path = pdf_file.name # Use the actual temp file path from Gradio
|
| 94 |
|
| 95 |
-
#
|
| 96 |
reader = PdfReader(tmp_pdf_path)
|
| 97 |
-
|
| 98 |
-
# Extract the first page for the cover (if needed)
|
| 99 |
-
first_page = reader.pages[0]
|
| 100 |
cover_path = "/tmp/cover.jpg"
|
| 101 |
images = convert_from_path(tmp_pdf_path, first_page=1, last_page=1)
|
| 102 |
images[0].save(cover_path, "JPEG")
|
| 103 |
|
| 104 |
-
# Run OCR
|
| 105 |
-
|
|
|
|
| 106 |
|
| 107 |
-
#
|
| 108 |
epub_path = "/tmp/output.epub"
|
| 109 |
create_epub_from_text(
|
| 110 |
text=ocr_text,
|
|
|
|
| 14 |
from olmocr.data.renderpdf import render_pdf_to_base64png
|
| 15 |
from olmocr.prompts import build_finetuning_prompt
|
| 16 |
from olmocr.prompts.anchor import get_anchor_text
|
| 17 |
+
from olmocr.pipeline import PDFToTextOCR # ✅ Import the OCR pipeline
|
| 18 |
|
| 19 |
# Set Hugging Face and Torch cache to a guaranteed-writable location
|
| 20 |
cache_dir = "/tmp/huggingface_cache"
|
|
|
|
| 30 |
).eval().to(device)
|
| 31 |
processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
|
| 32 |
|
| 33 |
+
# Initialize olmocr OCR pipeline
|
| 34 |
+
ocr_pipeline = PDFToTextOCR()
|
| 35 |
+
|
| 36 |
def ocr_page(pdf_path, page_num):
|
| 37 |
image_b64 = render_pdf_to_base64png(pdf_path, page_num + 1, target_longest_image_dim=1024)
|
| 38 |
anchor_text = get_anchor_text(pdf_path, page_num + 1, pdf_engine="pdfreport", target_length=4000)
|
|
|
|
| 93 |
epub.write_epub(output_path, book)
|
| 94 |
|
| 95 |
def convert_pdf_to_epub(pdf_file, title, author, language):
|
| 96 |
+
tmp_pdf_path = pdf_file.name
|
|
|
|
| 97 |
|
| 98 |
+
# Read the first page for cover
|
| 99 |
reader = PdfReader(tmp_pdf_path)
|
|
|
|
|
|
|
|
|
|
| 100 |
cover_path = "/tmp/cover.jpg"
|
| 101 |
images = convert_from_path(tmp_pdf_path, first_page=1, last_page=1)
|
| 102 |
images[0].save(cover_path, "JPEG")
|
| 103 |
|
| 104 |
+
# Run OCR using olmocr pipeline
|
| 105 |
+
ocr_result = ocr_pipeline(tmp_pdf_path)
|
| 106 |
+
ocr_text = "\n\n".join([page.text for page in ocr_result.pages])
|
| 107 |
|
| 108 |
+
# Create EPUB
|
| 109 |
epub_path = "/tmp/output.epub"
|
| 110 |
create_epub_from_text(
|
| 111 |
text=ocr_text,
|