Spaces:
Runtime error
Runtime error
tonic
commited on
Commit
·
bc83a37
1
Parent(s):
0b2fb36
adding interface logic and audio returns
Browse files
app.py
CHANGED
|
@@ -183,7 +183,6 @@ def process_input(image=None, file=None, audio=None, text="", translateto = "Eng
|
|
| 183 |
final_text = text
|
| 184 |
if image is not None:
|
| 185 |
ocr_prediction = ocr_processor.process_image(image)
|
| 186 |
-
# gettig text from ocr object
|
| 187 |
for idx in range(len((list(ocr_prediction)[0][1]))):
|
| 188 |
final_text += " "
|
| 189 |
final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
|
|
@@ -191,13 +190,11 @@ def process_input(image=None, file=None, audio=None, text="", translateto = "Eng
|
|
| 191 |
if file.name.lower().endswith(('.png', '.jpg', '.jpeg')):
|
| 192 |
pil_image = Image.open(file)
|
| 193 |
ocr_prediction = ocr_processor.process_image(pil_image)
|
| 194 |
-
# gettig text from ocr object
|
| 195 |
for idx in range(len((list(ocr_prediction)[0][1]))):
|
| 196 |
final_text += " "
|
| 197 |
final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
|
| 198 |
elif file.name.lower().endswith('.pdf'):
|
| 199 |
ocr_prediction = ocr_processor.process_pdf(file.name)
|
| 200 |
-
# gettig text from ocr object
|
| 201 |
for idx in range(len((list(ocr_prediction)[0][1]))):
|
| 202 |
final_text += " "
|
| 203 |
final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
|
|
@@ -290,12 +287,10 @@ def main():
|
|
| 290 |
translatefrom=input_language, translateto=target_language
|
| 291 |
)
|
| 292 |
|
| 293 |
-
# Prepare outputs for Gradio
|
| 294 |
processed_text_output = final_text
|
| 295 |
audio_output_native_phrases = [native for _, native in audio_outputs]
|
| 296 |
audio_output_target_phrases = [target for target, _ in audio_outputs]
|
| 297 |
|
| 298 |
-
# Assuming there are exactly 3 top phrases for simplicity
|
| 299 |
longest_phrases_outputs = top_phrases[:3]
|
| 300 |
translated_phrases_outputs = translations[:3]
|
| 301 |
audio_outputs_native = audio_output_native_phrases[:3]
|
|
|
|
| 183 |
final_text = text
|
| 184 |
if image is not None:
|
| 185 |
ocr_prediction = ocr_processor.process_image(image)
|
|
|
|
| 186 |
for idx in range(len((list(ocr_prediction)[0][1]))):
|
| 187 |
final_text += " "
|
| 188 |
final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
|
|
|
|
| 190 |
if file.name.lower().endswith(('.png', '.jpg', '.jpeg')):
|
| 191 |
pil_image = Image.open(file)
|
| 192 |
ocr_prediction = ocr_processor.process_image(pil_image)
|
|
|
|
| 193 |
for idx in range(len((list(ocr_prediction)[0][1]))):
|
| 194 |
final_text += " "
|
| 195 |
final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
|
| 196 |
elif file.name.lower().endswith('.pdf'):
|
| 197 |
ocr_prediction = ocr_processor.process_pdf(file.name)
|
|
|
|
| 198 |
for idx in range(len((list(ocr_prediction)[0][1]))):
|
| 199 |
final_text += " "
|
| 200 |
final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
|
|
|
|
| 287 |
translatefrom=input_language, translateto=target_language
|
| 288 |
)
|
| 289 |
|
|
|
|
| 290 |
processed_text_output = final_text
|
| 291 |
audio_output_native_phrases = [native for _, native in audio_outputs]
|
| 292 |
audio_output_target_phrases = [target for target, _ in audio_outputs]
|
| 293 |
|
|
|
|
| 294 |
longest_phrases_outputs = top_phrases[:3]
|
| 295 |
translated_phrases_outputs = translations[:3]
|
| 296 |
audio_outputs_native = audio_output_native_phrases[:3]
|