Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -67,21 +67,29 @@ ocr_id = {
|
|
| 67 |
def pdf_pil(file_path,page_num):
|
| 68 |
|
| 69 |
pdf = pdfium.PdfDocument("data.pdf")
|
|
|
|
| 70 |
#n_pages = len(pdf)
|
| 71 |
#for page_number in range(n_pages):
|
| 72 |
page = pdf.get_page(int(page_num)-1)
|
|
|
|
|
|
|
| 73 |
bitmap = page.render(
|
| 74 |
scale = 1, # 72dpi resolution
|
| 75 |
rotation = 0, # no additional rotation
|
| 76 |
# ... further rendering options
|
| 77 |
)
|
|
|
|
|
|
|
| 78 |
pil_image = bitmap.to_pil()
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
-
return
|
| 82 |
|
| 83 |
def ocrpdf(file_path,pdf_lang,page_num):
|
| 84 |
-
img1=pdf_pil(file_path,page_num)
|
| 85 |
print("DONE 1 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
|
| 86 |
lang=[f"{ocr_id[pdf_lang]}"]
|
| 87 |
print("DONE 2 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
|
|
|
|
| 67 |
def pdf_pil(file_path,page_num):
|
| 68 |
|
| 69 |
pdf = pdfium.PdfDocument("data.pdf")
|
| 70 |
+
print ("\n PDF read !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! \n")
|
| 71 |
#n_pages = len(pdf)
|
| 72 |
#for page_number in range(n_pages):
|
| 73 |
page = pdf.get_page(int(page_num)-1)
|
| 74 |
+
print ("\n Page read !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! \n")
|
| 75 |
+
|
| 76 |
bitmap = page.render(
|
| 77 |
scale = 1, # 72dpi resolution
|
| 78 |
rotation = 0, # no additional rotation
|
| 79 |
# ... further rendering options
|
| 80 |
)
|
| 81 |
+
print ("\n Page rendered !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! \n")
|
| 82 |
+
|
| 83 |
pil_image = bitmap.to_pil()
|
| 84 |
+
print ("\n Page to PIL !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! \n")
|
| 85 |
+
|
| 86 |
+
pil_image.save(f"image_{page_num}.png")
|
| 87 |
+
print ("\n Page saved !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! \n")
|
| 88 |
|
| 89 |
+
return (f"image_{page_num}.png")
|
| 90 |
|
| 91 |
def ocrpdf(file_path,pdf_lang,page_num):
|
| 92 |
+
img1 = pdf_pil(file_path,page_num)
|
| 93 |
print("DONE 1 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
|
| 94 |
lang=[f"{ocr_id[pdf_lang]}"]
|
| 95 |
print("DONE 2 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
|