gopalagra commited on
Commit
a01c6e8
Β·
verified Β·
1 Parent(s): 61d0d7a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -8
app.py CHANGED
@@ -315,6 +315,7 @@ def generate_caption_translate_speak(image, target_lang):
315
  # Step 1.5: Safety Check
316
  if not is_caption_safe(english_caption):
317
  beep = make_beep_sound()
 
318
  return "⚠️ Warning: Unsafe or inappropriate content detected!", "", beep
319
 
320
  # Step 2: Translate
@@ -340,21 +341,20 @@ def vqa_answer(image, question):
340
  out = vqa_model.generate(**inputs, max_new_tokens=50)
341
  answer = vqa_processor.decode(out[0], skip_special_tokens=True)
342
 
 
343
  if not is_caption_safe(answer):
344
  beep = make_beep_sound()
 
345
  return "⚠️ Warning: Unsafe or inappropriate content detected!", beep
346
 
347
  return answer, None
348
 
349
 
350
- # ----------------------
351
- # Gradio UI
352
- # ----------------------
353
  # ----------------------
354
  # Gradio UI
355
  # ----------------------
356
  with gr.Blocks(title="BLIP Vision App") as demo:
357
- gr.Markdown("## πŸ–ΌοΈ BLIP: Image Captioning + Translation + Speech + VQA (with Safety Filter + Beep Alert)")
358
 
359
  with gr.Tab("Caption + Translate + Speak"):
360
  with gr.Row():
@@ -362,7 +362,7 @@ with gr.Blocks(title="BLIP Vision App") as demo:
362
  lang_in = gr.Dropdown(["Hindi", "French", "Spanish"], label="Translate To", value="Hindi")
363
  eng_out = gr.Textbox(label="English Caption")
364
  trans_out = gr.Textbox(label="Translated Caption")
365
- audio_out = gr.Audio(label="Audio Output", type="filepath", autoplay=True) # πŸ‘ˆ added autoplay
366
  btn1 = gr.Button("Generate Caption, Translate & Speak")
367
  btn1.click(generate_caption_translate_speak, inputs=[img_in, lang_in], outputs=[eng_out, trans_out, audio_out])
368
 
@@ -371,12 +371,10 @@ with gr.Blocks(title="BLIP Vision App") as demo:
371
  img_vqa = gr.Image(type="pil", label="Upload Image")
372
  q_in = gr.Textbox(label="Ask a Question about the Image")
373
  ans_out = gr.Textbox(label="Answer")
374
- beep_out = gr.Audio(label="Alert Sound", type="filepath", autoplay=True) # πŸ‘ˆ added autoplay
375
  btn2 = gr.Button("Ask")
376
  btn2.click(vqa_answer, inputs=[img_vqa, q_in], outputs=[ans_out, beep_out])
377
 
378
  demo.launch()
379
 
380
 
381
-
382
-
 
315
  # Step 1.5: Safety Check
316
  if not is_caption_safe(english_caption):
317
  beep = make_beep_sound()
318
+ # Return warning text + auto-playing beep
319
  return "⚠️ Warning: Unsafe or inappropriate content detected!", "", beep
320
 
321
  # Step 2: Translate
 
341
  out = vqa_model.generate(**inputs, max_new_tokens=50)
342
  answer = vqa_processor.decode(out[0], skip_special_tokens=True)
343
 
344
+ # Safety check
345
  if not is_caption_safe(answer):
346
  beep = make_beep_sound()
347
+ # Return warning + beep sound
348
  return "⚠️ Warning: Unsafe or inappropriate content detected!", beep
349
 
350
  return answer, None
351
 
352
 
 
 
 
353
  # ----------------------
354
  # Gradio UI
355
  # ----------------------
356
  with gr.Blocks(title="BLIP Vision App") as demo:
357
+ gr.Markdown("## πŸ–ΌοΈ BLIP: Image Captioning + Translation + Speech + VQA (with Safety Filter + Auto Beep Alert)")
358
 
359
  with gr.Tab("Caption + Translate + Speak"):
360
  with gr.Row():
 
362
  lang_in = gr.Dropdown(["Hindi", "French", "Spanish"], label="Translate To", value="Hindi")
363
  eng_out = gr.Textbox(label="English Caption")
364
  trans_out = gr.Textbox(label="Translated Caption")
365
+ audio_out = gr.Audio(label="Audio Output", type="filepath", autoplay=True) # autoplay enabled
366
  btn1 = gr.Button("Generate Caption, Translate & Speak")
367
  btn1.click(generate_caption_translate_speak, inputs=[img_in, lang_in], outputs=[eng_out, trans_out, audio_out])
368
 
 
371
  img_vqa = gr.Image(type="pil", label="Upload Image")
372
  q_in = gr.Textbox(label="Ask a Question about the Image")
373
  ans_out = gr.Textbox(label="Answer")
374
+ beep_out = gr.Audio(label="Alert Sound", type="filepath", autoplay=True) # autoplay enabled
375
  btn2 = gr.Button("Ask")
376
  btn2.click(vqa_answer, inputs=[img_vqa, q_in], outputs=[ans_out, beep_out])
377
 
378
  demo.launch()
379
 
380