WaysAheadGlobal commited on
Commit
1c5a277
Β·
verified Β·
1 Parent(s): a841bb3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -56
app.py CHANGED
@@ -1,67 +1,50 @@
 
 
1
  import gradio as gr
 
 
2
  import cv2
3
- import tempfile
4
  from PIL import Image
5
- from transformers import Blip2Processor, Blip2ForConditionalGeneration
6
- import torch
7
- import os
8
 
9
- # Load BLIP-2 model (FLAN-T5 - CPU friendly)
10
- processor = Blip2Processor.from_pretrained("Salesforce/blip2-flan-t5-xl")
11
- model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-flan-t5-xl")
12
 
13
- def describe_image(image):
14
- image = image.convert("RGB")
15
- inputs = processor(images=image, return_tensors="pt")
16
- generated_ids = model.generate(**inputs, max_new_tokens=50)
17
- caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
18
- return caption
19
 
20
- def extract_video_frames(video_path, interval=30):
21
- cap = cv2.VideoCapture(video_path)
22
- frames = []
23
- count = 0
24
- success = True
25
- while success:
26
- success, frame = cap.read()
27
- if not success:
28
  break
29
- if count % interval == 0:
30
- frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
31
- frames.append((count, Image.fromarray(frame_rgb)))
32
- count += 1
33
- cap.release()
34
- return frames
35
 
36
- def handle_upload(file):
37
- name = file.name.lower()
38
- if name.endswith((".jpg", ".jpeg", ".png")):
39
- image = Image.open(file)
40
- caption = describe_image(image)
41
- return f"πŸ–ΌοΈ Image Caption:\n{caption}"
42
-
43
- elif name.endswith((".mp4", ".mov", ".avi", ".mkv")):
44
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp:
45
- tmp.write(file.read())
46
- tmp_path = tmp.name
 
47
 
48
- frames = extract_video_frames(tmp_path, interval=30) # 1 fps
49
- captions = []
50
- for idx, frame in frames:
51
- caption = describe_image(frame)
52
- captions.append(f"πŸ•’ Frame {idx}: {caption}")
53
 
54
- os.remove(tmp_path)
55
- return "\n".join(captions)
56
-
57
- else:
58
- return "❌ Unsupported file type. Please upload an image or video."
 
59
 
60
- # Gradio UI
61
- gr.Interface(
62
- fn=handle_upload,
63
- inputs=gr.File(label="Upload Image or Video"),
64
- outputs=gr.Textbox(label="Scene Descriptions"),
65
- title="🧠 Scene Understanding AI – BLIP-2 (Image + Video)",
66
- description="Upload a photo or video. The AI will describe the scene(s) using BLIP-2 (FLAN-T5). Works on CPU."
67
- ).launch()
 
1
+ # app.py
2
+
3
  import gradio as gr
4
+ import torch
5
+ from transformers import BlipProcessor, BlipForConditionalGeneration
6
  import cv2
 
7
  from PIL import Image
 
 
 
8
 
9
+ # Load BLIP captioning model
10
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
11
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
12
 
13
+ device = torch.device("cpu")
14
+ model.to(device)
 
 
 
 
15
 
16
+ # Live webcam captioning generator
17
+ def webcam_caption():
18
+ cap = cv2.VideoCapture(0) # open webcam
19
+ while True:
20
+ ret, frame = cap.read()
21
+ if not ret:
 
 
22
  break
 
 
 
 
 
 
23
 
24
+ # Convert OpenCV frame (BGR) to RGB PIL Image
25
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
26
+ image = Image.fromarray(frame_rgb)
27
+
28
+ # Generate caption
29
+ inputs = processor(images=image, return_tensors="pt").to(device)
30
+ out = model.generate(**inputs, max_new_tokens=50)
31
+ caption = processor.decode(out[0], skip_special_tokens=True)
32
+
33
+ yield frame_rgb, caption
34
+
35
+ cap.release()
36
 
37
+ # Gradio interface
38
+ with gr.Blocks() as demo:
39
+ gr.Markdown("## πŸŽ₯ Live Webcam BLIP Captioning (CPU)")
40
+ video = gr.Image(label="Webcam Stream")
41
+ text = gr.Textbox(label="Caption")
42
 
43
+ demo.load(
44
+ fn=webcam_caption,
45
+ inputs=None,
46
+ outputs=[video, text],
47
+ every=2 # call generator every 2 sec (adjust if you want)
48
+ )
49
 
50
+ demo.launch()