Spaces:
Sleeping
Sleeping
Luis J Camargo commited on
Commit ·
8d56937
1
Parent(s): aa9595c
attempt for text
Browse files
app.py
CHANGED
|
@@ -57,6 +57,22 @@ class PaddleOCRModelManager(object):
|
|
| 57 |
finally:
|
| 58 |
self._queue.task_done()
|
| 59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
def create_model():
|
| 62 |
"""Initialize PaddleOCR-VL with the fine-tuned Tachiwin model"""
|
|
@@ -97,10 +113,10 @@ def inference(img):
|
|
| 97 |
extracted_texts = []
|
| 98 |
|
| 99 |
for page in result:
|
| 100 |
-
if
|
| 101 |
-
for block in page
|
| 102 |
-
if 'content'
|
| 103 |
-
extracted_texts.append(block
|
| 104 |
|
| 105 |
if not extracted_texts:
|
| 106 |
return "No text could be extracted from the image."
|
|
@@ -136,6 +152,7 @@ examples = [
|
|
| 136 |
['mir.jpg'],
|
| 137 |
['ote.jpg'],
|
| 138 |
['otm.jpg'],
|
|
|
|
| 139 |
]
|
| 140 |
|
| 141 |
example_labels = """
|
|
@@ -147,6 +164,7 @@ example_labels = """
|
|
| 147 |
| maj.jpg | Mazatec, Jalapa de Díaz | Kui xi já maña̱ xi ngakjá ku̱a̱kúya ni xi ts'e̱ Nti̱a̱ná. Kj'a̱í ni xi ku̱a̱kúyanu̱u, kui xi ts'i̱ínkatsúnnu̱u. Najmi ts'i̱ínkie yjoho̱ nga Nda̱ Nti̱a̱ná xi ts'asjejihi̱n. B'a̱ ts'ín ki̱tsa̱ ts'i̱ín nibánehe̱ ra̱ yjoho̱ nga n'e̱kje. Nkjin xi i̱ncha ts'i̱ín ni xi i̱ncha ts'ín jóo̱, ni xi tu̱ subahá maná. |
|
| 148 |
| mir.jpg | Isthmus Mixe | Cab jaduhṉ yhahixøꞌøy coo jaꞌa naam̱dägøꞌøbä tiúnät wiindsǿṉ maa jaꞌa Diostøjcän, coo jaduhṉ ñäꞌä niguiumayǿøjät. |
|
| 149 |
| otm.jpg | Eastern Highland Otomi | ma'ueque ma mbʉihʉ. Nɛ gätho gahʉ dyʉ mbäją gahʉ bi 'dac ma ts |
|
|
|
|
| 150 |
"""
|
| 151 |
|
| 152 |
css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;} .output_markdown {min-height: 30rem !important;}"
|
|
|
|
| 57 |
finally:
|
| 58 |
self._queue.task_done()
|
| 59 |
|
| 60 |
+
def download_model():
|
| 61 |
+
"""Download the fine-tuned Tachiwin model from Hugging Face"""
|
| 62 |
+
model_repo = "tachiwin/PaddleOCR-VL-Tachiwin" # Update this!
|
| 63 |
+
model_dir = "./tachiwin_model"
|
| 64 |
+
|
| 65 |
+
print(f"Downloading Tachiwin model from {model_repo}...")
|
| 66 |
+
|
| 67 |
+
snapshot_download(
|
| 68 |
+
repo_id=model_repo,
|
| 69 |
+
local_dir=model_dir,
|
| 70 |
+
local_dir_use_symlinks=False
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
print(f"Model downloaded successfully to {model_dir}")
|
| 74 |
+
return model_dir
|
| 75 |
+
|
| 76 |
|
| 77 |
def create_model():
|
| 78 |
"""Initialize PaddleOCR-VL with the fine-tuned Tachiwin model"""
|
|
|
|
| 113 |
extracted_texts = []
|
| 114 |
|
| 115 |
for page in result:
|
| 116 |
+
if hasattr(page, 'parsing_res_list'):
|
| 117 |
+
for block in page.parsing_res_list:
|
| 118 |
+
if hasattr(block, 'content') and block.content:
|
| 119 |
+
extracted_texts.append(block.content)
|
| 120 |
|
| 121 |
if not extracted_texts:
|
| 122 |
return "No text could be extracted from the image."
|
|
|
|
| 152 |
['mir.jpg'],
|
| 153 |
['ote.jpg'],
|
| 154 |
['otm.jpg'],
|
| 155 |
+
['lac.jpg'],
|
| 156 |
]
|
| 157 |
|
| 158 |
example_labels = """
|
|
|
|
| 164 |
| maj.jpg | Mazatec, Jalapa de Díaz | Kui xi já maña̱ xi ngakjá ku̱a̱kúya ni xi ts'e̱ Nti̱a̱ná. Kj'a̱í ni xi ku̱a̱kúyanu̱u, kui xi ts'i̱ínkatsúnnu̱u. Najmi ts'i̱ínkie yjoho̱ nga Nda̱ Nti̱a̱ná xi ts'asjejihi̱n. B'a̱ ts'ín ki̱tsa̱ ts'i̱ín nibánehe̱ ra̱ yjoho̱ nga n'e̱kje. Nkjin xi i̱ncha ts'i̱ín ni xi i̱ncha ts'ín jóo̱, ni xi tu̱ subahá maná. |
|
| 165 |
| mir.jpg | Isthmus Mixe | Cab jaduhṉ yhahixøꞌøy coo jaꞌa naam̱dägøꞌøbä tiúnät wiindsǿṉ maa jaꞌa Diostøjcän, coo jaduhṉ ñäꞌä niguiumayǿøjät. |
|
| 166 |
| otm.jpg | Eastern Highland Otomi | ma'ueque ma mbʉihʉ. Nɛ gätho gahʉ dyʉ mbäją gahʉ bi 'dac ma ts |
|
| 167 |
+
| lac.jpg | Lacandon | wa quin chen u'yicob a t'ʌnex, wa yʌn in wu'yicob a ba' cu ya'aric C'uj? Tin t'ʌn, mʌ' in wu'yicob a t'ʌnex, yʌn in wu'yicob a ba' cu ya'aric C'uj. Yʌn in man in wa'aricob a ba' caj in wirajob yejer a ba' caj in wu'yajob ―baxuc tu ya'araj Pedro ti' u jach ts'urirob. Jeroj tune', chich t'ʌn Pedro yejer Juan ten u jach ts'urirob u winiquirob judío, caj ts'oquij caj cha'b u binob ten u jach ts'urirob. |
|
| 168 |
"""
|
| 169 |
|
| 170 |
css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;} .output_markdown {min-height: 30rem !important;}"
|
lac.jpg
ADDED
|