Spaces:

MultiTransformer
/

AyaTonic

Runtime error

App Files Files Community

tonic commited on Feb 26, 2024

Commit

bc83a37

1 Parent(s): 0b2fb36

adding interface logic and audio returns

Browse files

Files changed (1) hide show

app.py +0 -5

app.py CHANGED Viewed

@@ -183,7 +183,6 @@ def process_input(image=None, file=None, audio=None, text="", translateto = "Eng
     final_text = text
     if image is not None:
         ocr_prediction = ocr_processor.process_image(image)
-        # gettig text from ocr object
         for idx in range(len((list(ocr_prediction)[0][1]))):
             final_text += " "
             final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
@@ -191,13 +190,11 @@ def process_input(image=None, file=None, audio=None, text="", translateto = "Eng
         if file.name.lower().endswith(('.png', '.jpg', '.jpeg')):
             pil_image = Image.open(file)
             ocr_prediction = ocr_processor.process_image(pil_image)
-            # gettig text from ocr object
             for idx in range(len((list(ocr_prediction)[0][1]))):
                 final_text += " "
                 final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
         elif file.name.lower().endswith('.pdf'):
             ocr_prediction = ocr_processor.process_pdf(file.name)
-            # gettig text from ocr object
             for idx in range(len((list(ocr_prediction)[0][1]))):
                 final_text += " "
                 final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
@@ -290,12 +287,10 @@ def main():
                 translatefrom=input_language, translateto=target_language
             )
-            # Prepare outputs for Gradio
             processed_text_output = final_text
             audio_output_native_phrases = [native for _, native in audio_outputs]
             audio_output_target_phrases = [target for target, _ in audio_outputs]
-            # Assuming there are exactly 3 top phrases for simplicity
             longest_phrases_outputs = top_phrases[:3]
             translated_phrases_outputs = translations[:3]
             audio_outputs_native = audio_output_native_phrases[:3]

     final_text = text
     if image is not None:
         ocr_prediction = ocr_processor.process_image(image)
         for idx in range(len((list(ocr_prediction)[0][1]))):
             final_text += " "
             final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
         if file.name.lower().endswith(('.png', '.jpg', '.jpeg')):
             pil_image = Image.open(file)
             ocr_prediction = ocr_processor.process_image(pil_image)
             for idx in range(len((list(ocr_prediction)[0][1]))):
                 final_text += " "
                 final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
         elif file.name.lower().endswith('.pdf'):
             ocr_prediction = ocr_processor.process_pdf(file.name)
             for idx in range(len((list(ocr_prediction)[0][1]))):
                 final_text += " "
                 final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
                 translatefrom=input_language, translateto=target_language
             )
             processed_text_output = final_text
             audio_output_native_phrases = [native for _, native in audio_outputs]
             audio_output_target_phrases = [target for target, _ in audio_outputs]
             longest_phrases_outputs = top_phrases[:3]
             translated_phrases_outputs = translations[:3]
             audio_outputs_native = audio_output_native_phrases[:3]