Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -77,11 +77,11 @@ def pdf_pil(file_path,page_num):
|
|
| 77 |
|
| 78 |
return (f"image_{page_num}.png")
|
| 79 |
|
| 80 |
-
def ocrpdf(file_path,pdf_lang,page_num,sent_wid,):
|
| 81 |
img1 = pdf_pil(file_path,page_num)
|
| 82 |
lang=[f"{ocr_id[pdf_lang]}"]
|
| 83 |
reader = easyocr.Reader(lang)
|
| 84 |
-
bounds = reader.readtext(img1,width_ths=
|
| 85 |
|
| 86 |
this = ""
|
| 87 |
for bound in bounds:
|
|
@@ -97,7 +97,7 @@ def scrape(instring):
|
|
| 97 |
</div>''')
|
| 98 |
return gr.HTML.update(f'''{html_src}''')
|
| 99 |
|
| 100 |
-
def scrape00(instring, page_num,pdf_lang):
|
| 101 |
response = requests.get(instring, stream=True)
|
| 102 |
|
| 103 |
if response.status_code == 200:
|
|
@@ -119,7 +119,7 @@ def scrape00(instring, page_num,pdf_lang):
|
|
| 119 |
sum_out = summarizer(text)
|
| 120 |
except Exception:
|
| 121 |
try:
|
| 122 |
-
text = ocrpdf("data.pdf",pdf_lang,page_num)
|
| 123 |
sum_out = summarizer(text)
|
| 124 |
except Exception:
|
| 125 |
sum_out = "Error"
|
|
|
|
| 77 |
|
| 78 |
return (f"image_{page_num}.png")
|
| 79 |
|
| 80 |
+
def ocrpdf(file_path,pdf_lang,page_num,sent_wid,contrast_det):
|
| 81 |
img1 = pdf_pil(file_path,page_num)
|
| 82 |
lang=[f"{ocr_id[pdf_lang]}"]
|
| 83 |
reader = easyocr.Reader(lang)
|
| 84 |
+
bounds = reader.readtext(img1,width_ths=sent_wid,contrast_ths=contrast_det)
|
| 85 |
|
| 86 |
this = ""
|
| 87 |
for bound in bounds:
|
|
|
|
| 97 |
</div>''')
|
| 98 |
return gr.HTML.update(f'''{html_src}''')
|
| 99 |
|
| 100 |
+
def scrape00(instring, page_num,pdf_lang,sent_wid,contrast_det):
|
| 101 |
response = requests.get(instring, stream=True)
|
| 102 |
|
| 103 |
if response.status_code == 200:
|
|
|
|
| 119 |
sum_out = summarizer(text)
|
| 120 |
except Exception:
|
| 121 |
try:
|
| 122 |
+
text = ocrpdf("data.pdf",pdf_lang,page_num,sent_wid,contrast_det)
|
| 123 |
sum_out = summarizer(text)
|
| 124 |
except Exception:
|
| 125 |
sum_out = "Error"
|