Spaces:

gopalagra
/

blind-image-captioning

Runtime error

App Files Files Community

gopalagra commited on Nov 13, 2025

Commit

a01c6e8

verified ·

1 Parent(s): 61d0d7a

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -8

app.py CHANGED Viewed

@@ -315,6 +315,7 @@ def generate_caption_translate_speak(image, target_lang):
     # Step 1.5: Safety Check
     if not is_caption_safe(english_caption):
         beep = make_beep_sound()
         return "⚠️ Warning: Unsafe or inappropriate content detected!", "", beep
     # Step 2: Translate
@@ -340,21 +341,20 @@ def vqa_answer(image, question):
         out = vqa_model.generate(**inputs, max_new_tokens=50)
     answer = vqa_processor.decode(out[0], skip_special_tokens=True)
     if not is_caption_safe(answer):
         beep = make_beep_sound()
         return "⚠️ Warning: Unsafe or inappropriate content detected!", beep
     return answer, None
-# ----------------------
-# Gradio UI
-# ----------------------
 # ----------------------
 # Gradio UI
 # ----------------------
 with gr.Blocks(title="BLIP Vision App") as demo:
-    gr.Markdown("## 🖼️ BLIP: Image Captioning + Translation + Speech + VQA (with Safety Filter + Beep Alert)")
     with gr.Tab("Caption + Translate + Speak"):
         with gr.Row():
@@ -362,7 +362,7 @@ with gr.Blocks(title="BLIP Vision App") as demo:
             lang_in = gr.Dropdown(["Hindi", "French", "Spanish"], label="Translate To", value="Hindi")
         eng_out = gr.Textbox(label="English Caption")
         trans_out = gr.Textbox(label="Translated Caption")
-        audio_out = gr.Audio(label="Audio Output", type="filepath", autoplay=True)  # 👈 added autoplay
         btn1 = gr.Button("Generate Caption, Translate & Speak")
         btn1.click(generate_caption_translate_speak, inputs=[img_in, lang_in], outputs=[eng_out, trans_out, audio_out])
@@ -371,12 +371,10 @@ with gr.Blocks(title="BLIP Vision App") as demo:
             img_vqa = gr.Image(type="pil", label="Upload Image")
             q_in = gr.Textbox(label="Ask a Question about the Image")
         ans_out = gr.Textbox(label="Answer")
-        beep_out = gr.Audio(label="Alert Sound", type="filepath", autoplay=True)  # 👈 added autoplay
         btn2 = gr.Button("Ask")
         btn2.click(vqa_answer, inputs=[img_vqa, q_in], outputs=[ans_out, beep_out])
 demo.launch()

     # Step 1.5: Safety Check
     if not is_caption_safe(english_caption):
         beep = make_beep_sound()
+        # Return warning text + auto-playing beep
         return "⚠️ Warning: Unsafe or inappropriate content detected!", "", beep
     # Step 2: Translate
         out = vqa_model.generate(**inputs, max_new_tokens=50)
     answer = vqa_processor.decode(out[0], skip_special_tokens=True)
+    # Safety check
     if not is_caption_safe(answer):
         beep = make_beep_sound()
+        # Return warning + beep sound
         return "⚠️ Warning: Unsafe or inappropriate content detected!", beep
     return answer, None
 # ----------------------
 # Gradio UI
 # ----------------------
 with gr.Blocks(title="BLIP Vision App") as demo:
+    gr.Markdown("## 🖼️ BLIP: Image Captioning + Translation + Speech + VQA (with Safety Filter + Auto Beep Alert)")
     with gr.Tab("Caption + Translate + Speak"):
         with gr.Row():
             lang_in = gr.Dropdown(["Hindi", "French", "Spanish"], label="Translate To", value="Hindi")
         eng_out = gr.Textbox(label="English Caption")
         trans_out = gr.Textbox(label="Translated Caption")
+        audio_out = gr.Audio(label="Audio Output", type="filepath", autoplay=True)  # autoplay enabled
         btn1 = gr.Button("Generate Caption, Translate & Speak")
         btn1.click(generate_caption_translate_speak, inputs=[img_in, lang_in], outputs=[eng_out, trans_out, audio_out])
             img_vqa = gr.Image(type="pil", label="Upload Image")
             q_in = gr.Textbox(label="Ask a Question about the Image")
         ans_out = gr.Textbox(label="Answer")
+        beep_out = gr.Audio(label="Alert Sound", type="filepath", autoplay=True)  # autoplay enabled
         btn2 = gr.Button("Ask")
         btn2.click(vqa_answer, inputs=[img_vqa, q_in], outputs=[ans_out, beep_out])
 demo.launch()