Spaces:

histlearn
/

rule-based-captioning

Sleeping

histlearn commited on Jun 23, 2024

Commit

dec4175

verified ·

1 Parent(s): c48f486

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -191,7 +191,7 @@ def reordenar_sentenca(sentenca):
     sentenca_normalizada = ""
     for i in range(len(frase)):
         sentenca_normalizada += frase[i] + " "
-    return sentenca_normalizada.strip()
 # Carregar os modelos
 processor = AutoProcessor.from_pretrained("histlearn/microsoft-git-portuguese-neuro-simbolic")
@@ -228,21 +228,21 @@ def text_to_speech_gtts(text, lang='pt'):
 def process_image(image):
     _, pixel_values = prepare_image(image)
     caption_pt = generate_caption(pixel_values)
-    caption_pt = reordenar_sentenca(caption_pt)
     audio_file = text_to_speech_gtts(caption_pt)
-    return caption_pt, audio_file
 # Caminhos para as imagens de exemplo
 example_image_paths = [
-        "./example2.jpeg"
 ]
 # Interface Gradio
 iface = gr.Interface(
     fn=process_image,
     inputs=gr.Image(type="filepath"),
-    outputs=[gr.Textbox(), gr.Audio(type="filepath")],
     examples=example_image_paths,
     title="Image to Voice",
     description="Gera uma descrição em português e a converte em voz a partir de uma imagem."

     sentenca_normalizada = ""
     for i in range(len(frase)):
         sentenca_normalizada += frase[i] + " "
+    return sentenca_normalizada.strip(), tags
 # Carregar os modelos
 processor = AutoProcessor.from_pretrained("histlearn/microsoft-git-portuguese-neuro-simbolic")
 def process_image(image):
     _, pixel_values = prepare_image(image)
     caption_pt = generate_caption(pixel_values)
+    caption_pt, tags = reordenar_sentenca(caption_pt)
     audio_file = text_to_speech_gtts(caption_pt)
+    tags_text = " | ".join(tags)
+    return caption_pt, tags_text, audio_file
 # Caminhos para as imagens de exemplo
 example_image_paths = [
+    "./example2.jpeg"
 ]
 # Interface Gradio
 iface = gr.Interface(
     fn=process_image,
     inputs=gr.Image(type="filepath"),
+    outputs=[gr.Textbox(label="Caption"), gr.Textbox(label="Morphological Analysis"), gr.Audio(type="filepath")],
     examples=example_image_paths,
     title="Image to Voice",
     description="Gera uma descrição em português e a converte em voz a partir de uma imagem."