gopalagra commited on
Commit
9cf0535
·
verified ·
1 Parent(s): 0a27bcd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -19
app.py CHANGED
@@ -229,7 +229,7 @@ import tempfile
229
  import base64
230
 
231
  # ----------------------
232
- # Device
233
  # ----------------------
234
  device = "cuda" if torch.cuda.is_available() else "cpu"
235
 
@@ -237,10 +237,16 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
237
  # Simple BEEP sound (base64)
238
  # ----------------------
239
  BEEP_BASE64 = """
240
- SUQzAwAAAAAAF1RTU0UAAAAPAAADTGF2ZjU4LjMyLjEwNAAAAAAAAAAAAAAA//uQxAADB...
 
 
 
 
 
 
241
  """
242
 
243
- # Convert base64 to temp mp3 file
244
  def load_beep():
245
  audio_bytes = base64.b64decode(BEEP_BASE64)
246
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
@@ -248,7 +254,6 @@ def load_beep():
248
  tmp.close()
249
  return tmp.name
250
 
251
-
252
  # ----------------------
253
  # Load models
254
  # ----------------------
@@ -274,7 +279,6 @@ moderation_model = pipeline("text-classification", model="unitary/toxic-bert")
274
 
275
  print("✅ All models loaded!")
276
 
277
-
278
  # ----------------------
279
  # Safety check
280
  # ----------------------
@@ -282,17 +286,17 @@ def is_caption_safe(caption):
282
  try:
283
  result = moderation_model(caption)
284
  if isinstance(result, list) and "label" in result[0]:
285
- if result[0]["label"] == "toxic" and result[0]["score"] > 0.5:
286
  return False
287
  except:
288
  pass
289
 
 
290
  unsafe_words = ["gun", "kill", "dead", "weapon", "blood"]
291
  return not any(w in caption.lower() for w in unsafe_words)
292
 
293
-
294
  # ----------------------
295
- # Auto Caption + Translate + BEEP
296
  # ----------------------
297
  def auto_process(image, target_lang):
298
  if image is None:
@@ -302,6 +306,7 @@ def auto_process(image, target_lang):
302
  inputs = caption_processor(images=image, return_tensors="pt").to(device)
303
  with torch.no_grad():
304
  output = caption_model.generate(**inputs, max_new_tokens=40)
 
305
  caption = caption_processor.decode(output[0], skip_special_tokens=True)
306
 
307
  # Safety
@@ -311,11 +316,8 @@ def auto_process(image, target_lang):
311
  # Translate
312
  translated = translation_models[target_lang](caption)[0]["translation_text"]
313
 
314
- # Always play BEEP once caption is ready
315
- beep_file = load_beep()
316
-
317
- return caption, translated, beep_file
318
-
319
 
320
  # ----------------------
321
  # VQA
@@ -335,26 +337,25 @@ def vqa_answer(image, question):
335
 
336
  return ans
337
 
338
-
339
  # ----------------------
340
- # UI
341
  # ----------------------
342
  with gr.Blocks(title="BLIP App") as demo:
343
- gr.Markdown("## 🖼️ Auto-Caption + Translation + Automatic Beep")
344
 
345
  with gr.Tab("Auto Caption"):
346
  img = gr.Image(type="pil", label="Upload Image")
347
  lang = gr.Dropdown(["Hindi", "French", "Spanish"], value="Hindi", label="Translate To")
348
  out_eng = gr.Textbox(label="English Caption")
349
  out_trans = gr.Textbox(label="Translated")
350
- out_audio = gr.Audio(label="Beep", autoplay=True)
351
 
352
- # 🔥 Auto-run when image is uploaded
353
  img.change(auto_process, inputs=[img, lang], outputs=[out_eng, out_trans, out_audio])
354
  lang.change(auto_process, inputs=[img, lang], outputs=[out_eng, out_trans, out_audio])
355
 
356
  with gr.Tab("VQA"):
357
- img_vqa = gr.Image(type="pil")
358
  q = gr.Textbox(label="Ask a question")
359
  ans = gr.Textbox(label="Answer")
360
  ask_btn = gr.Button("Ask")
 
229
  import base64
230
 
231
  # ----------------------
232
+ # Device setup
233
  # ----------------------
234
  device = "cuda" if torch.cuda.is_available() else "cpu"
235
 
 
237
  # Simple BEEP sound (base64)
238
  # ----------------------
239
  BEEP_BASE64 = """
240
+ SUQzAwAAAAAAFlRFTkMAAAAPAAADdAAAABJBTUFEAAAAGwAAAG1kYXQAAAAA/////wABAAAC
241
+ AgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
242
+ ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICA
243
+ gICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIC
244
+ AgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
245
+ ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICA
246
+ gICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgAAAA==
247
  """
248
 
249
+
250
  def load_beep():
251
  audio_bytes = base64.b64decode(BEEP_BASE64)
252
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
 
254
  tmp.close()
255
  return tmp.name
256
 
 
257
  # ----------------------
258
  # Load models
259
  # ----------------------
 
279
 
280
  print("✅ All models loaded!")
281
 
 
282
  # ----------------------
283
  # Safety check
284
  # ----------------------
 
286
  try:
287
  result = moderation_model(caption)
288
  if isinstance(result, list) and "label" in result[0]:
289
+ if result[0]["label"].lower() == "toxic" and result[0]["score"] > 0.5:
290
  return False
291
  except:
292
  pass
293
 
294
+ # extra simple keyword check
295
  unsafe_words = ["gun", "kill", "dead", "weapon", "blood"]
296
  return not any(w in caption.lower() for w in unsafe_words)
297
 
 
298
  # ----------------------
299
+ # Auto Caption + Translate + Optional BEEP
300
  # ----------------------
301
  def auto_process(image, target_lang):
302
  if image is None:
 
306
  inputs = caption_processor(images=image, return_tensors="pt").to(device)
307
  with torch.no_grad():
308
  output = caption_model.generate(**inputs, max_new_tokens=40)
309
+
310
  caption = caption_processor.decode(output[0], skip_special_tokens=True)
311
 
312
  # Safety
 
316
  # Translate
317
  translated = translation_models[target_lang](caption)[0]["translation_text"]
318
 
319
+ # SAFE No beep
320
+ return caption, translated, None
 
 
 
321
 
322
  # ----------------------
323
  # VQA
 
337
 
338
  return ans
339
 
 
340
  # ----------------------
341
+ # Gradio UI
342
  # ----------------------
343
  with gr.Blocks(title="BLIP App") as demo:
344
+ gr.Markdown("## 🖼️ Auto-Caption + Translation + Safety Beep")
345
 
346
  with gr.Tab("Auto Caption"):
347
  img = gr.Image(type="pil", label="Upload Image")
348
  lang = gr.Dropdown(["Hindi", "French", "Spanish"], value="Hindi", label="Translate To")
349
  out_eng = gr.Textbox(label="English Caption")
350
  out_trans = gr.Textbox(label="Translated")
351
+ out_audio = gr.Audio(label="Audio", type="filepath", autoplay=True)
352
 
353
+ # Auto-run on image or language change
354
  img.change(auto_process, inputs=[img, lang], outputs=[out_eng, out_trans, out_audio])
355
  lang.change(auto_process, inputs=[img, lang], outputs=[out_eng, out_trans, out_audio])
356
 
357
  with gr.Tab("VQA"):
358
+ img_vqa = gr.Image(type="pil", label="Upload Image")
359
  q = gr.Textbox(label="Ask a question")
360
  ans = gr.Textbox(label="Answer")
361
  ask_btn = gr.Button("Ask")