Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -315,6 +315,7 @@ def generate_caption_translate_speak(image, target_lang):
|
|
| 315 |
# Step 1.5: Safety Check
|
| 316 |
if not is_caption_safe(english_caption):
|
| 317 |
beep = make_beep_sound()
|
|
|
|
| 318 |
return "β οΈ Warning: Unsafe or inappropriate content detected!", "", beep
|
| 319 |
|
| 320 |
# Step 2: Translate
|
|
@@ -340,21 +341,20 @@ def vqa_answer(image, question):
|
|
| 340 |
out = vqa_model.generate(**inputs, max_new_tokens=50)
|
| 341 |
answer = vqa_processor.decode(out[0], skip_special_tokens=True)
|
| 342 |
|
|
|
|
| 343 |
if not is_caption_safe(answer):
|
| 344 |
beep = make_beep_sound()
|
|
|
|
| 345 |
return "β οΈ Warning: Unsafe or inappropriate content detected!", beep
|
| 346 |
|
| 347 |
return answer, None
|
| 348 |
|
| 349 |
|
| 350 |
-
# ----------------------
|
| 351 |
-
# Gradio UI
|
| 352 |
-
# ----------------------
|
| 353 |
# ----------------------
|
| 354 |
# Gradio UI
|
| 355 |
# ----------------------
|
| 356 |
with gr.Blocks(title="BLIP Vision App") as demo:
|
| 357 |
-
gr.Markdown("## πΌοΈ BLIP: Image Captioning + Translation + Speech + VQA (with Safety Filter + Beep Alert)")
|
| 358 |
|
| 359 |
with gr.Tab("Caption + Translate + Speak"):
|
| 360 |
with gr.Row():
|
|
@@ -362,7 +362,7 @@ with gr.Blocks(title="BLIP Vision App") as demo:
|
|
| 362 |
lang_in = gr.Dropdown(["Hindi", "French", "Spanish"], label="Translate To", value="Hindi")
|
| 363 |
eng_out = gr.Textbox(label="English Caption")
|
| 364 |
trans_out = gr.Textbox(label="Translated Caption")
|
| 365 |
-
audio_out = gr.Audio(label="Audio Output", type="filepath", autoplay=True) #
|
| 366 |
btn1 = gr.Button("Generate Caption, Translate & Speak")
|
| 367 |
btn1.click(generate_caption_translate_speak, inputs=[img_in, lang_in], outputs=[eng_out, trans_out, audio_out])
|
| 368 |
|
|
@@ -371,12 +371,10 @@ with gr.Blocks(title="BLIP Vision App") as demo:
|
|
| 371 |
img_vqa = gr.Image(type="pil", label="Upload Image")
|
| 372 |
q_in = gr.Textbox(label="Ask a Question about the Image")
|
| 373 |
ans_out = gr.Textbox(label="Answer")
|
| 374 |
-
beep_out = gr.Audio(label="Alert Sound", type="filepath", autoplay=True) #
|
| 375 |
btn2 = gr.Button("Ask")
|
| 376 |
btn2.click(vqa_answer, inputs=[img_vqa, q_in], outputs=[ans_out, beep_out])
|
| 377 |
|
| 378 |
demo.launch()
|
| 379 |
|
| 380 |
|
| 381 |
-
|
| 382 |
-
|
|
|
|
| 315 |
# Step 1.5: Safety Check
|
| 316 |
if not is_caption_safe(english_caption):
|
| 317 |
beep = make_beep_sound()
|
| 318 |
+
# Return warning text + auto-playing beep
|
| 319 |
return "β οΈ Warning: Unsafe or inappropriate content detected!", "", beep
|
| 320 |
|
| 321 |
# Step 2: Translate
|
|
|
|
| 341 |
out = vqa_model.generate(**inputs, max_new_tokens=50)
|
| 342 |
answer = vqa_processor.decode(out[0], skip_special_tokens=True)
|
| 343 |
|
| 344 |
+
# Safety check
|
| 345 |
if not is_caption_safe(answer):
|
| 346 |
beep = make_beep_sound()
|
| 347 |
+
# Return warning + beep sound
|
| 348 |
return "β οΈ Warning: Unsafe or inappropriate content detected!", beep
|
| 349 |
|
| 350 |
return answer, None
|
| 351 |
|
| 352 |
|
|
|
|
|
|
|
|
|
|
| 353 |
# ----------------------
|
| 354 |
# Gradio UI
|
| 355 |
# ----------------------
|
| 356 |
with gr.Blocks(title="BLIP Vision App") as demo:
|
| 357 |
+
gr.Markdown("## πΌοΈ BLIP: Image Captioning + Translation + Speech + VQA (with Safety Filter + Auto Beep Alert)")
|
| 358 |
|
| 359 |
with gr.Tab("Caption + Translate + Speak"):
|
| 360 |
with gr.Row():
|
|
|
|
| 362 |
lang_in = gr.Dropdown(["Hindi", "French", "Spanish"], label="Translate To", value="Hindi")
|
| 363 |
eng_out = gr.Textbox(label="English Caption")
|
| 364 |
trans_out = gr.Textbox(label="Translated Caption")
|
| 365 |
+
audio_out = gr.Audio(label="Audio Output", type="filepath", autoplay=True) # autoplay enabled
|
| 366 |
btn1 = gr.Button("Generate Caption, Translate & Speak")
|
| 367 |
btn1.click(generate_caption_translate_speak, inputs=[img_in, lang_in], outputs=[eng_out, trans_out, audio_out])
|
| 368 |
|
|
|
|
| 371 |
img_vqa = gr.Image(type="pil", label="Upload Image")
|
| 372 |
q_in = gr.Textbox(label="Ask a Question about the Image")
|
| 373 |
ans_out = gr.Textbox(label="Answer")
|
| 374 |
+
beep_out = gr.Audio(label="Alert Sound", type="filepath", autoplay=True) # autoplay enabled
|
| 375 |
btn2 = gr.Button("Ask")
|
| 376 |
btn2.click(vqa_answer, inputs=[img_vqa, q_in], outputs=[ans_out, beep_out])
|
| 377 |
|
| 378 |
demo.launch()
|
| 379 |
|
| 380 |
|
|
|
|
|
|