imperiusrex commited on
Commit
4478675
·
verified ·
1 Parent(s): b62a2c7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -6
app.py CHANGED
@@ -6,6 +6,8 @@ import torch
6
  import spaces
7
  from ultralytics import YOLO
8
  from tqdm import tqdm
 
 
9
 
10
  # Fix for Ultralytics config write error in Hugging Face environment
11
  os.environ["YOLO_CONFIG_DIR"] = "/tmp"
@@ -13,10 +15,14 @@ os.environ["YOLO_CONFIG_DIR"] = "/tmp"
13
  # Use GPU if available
14
  device = "cuda" if torch.cuda.is_available() else "cpu"
15
 
16
- # Load models onto the appropriate device
17
  extract_model = YOLO("best.pt").to(device)
18
  detect_model = YOLO("yolov8n.pt").to(device)
19
 
 
 
 
 
20
  @spaces.GPU
21
  def process_video(video_path):
22
  os.makedirs("frames", exist_ok=True)
@@ -92,9 +98,16 @@ def process_video(video_path):
92
  sharp = cv2.addWeighted(selective, 2.0, blur, -1.0, 0)
93
  cv2.imwrite("sharpened_board_color.jpg", sharp)
94
 
95
- return "sharpened_board_color.jpg"
 
 
 
 
 
 
96
 
97
 
 
98
  demo = gr.Interface(
99
  fn=process_video,
100
  inputs=[
@@ -106,13 +119,14 @@ demo = gr.Interface(
106
  )
107
  ],
108
  outputs=[
109
- gr.Image(label="Sharpened Final Board")
 
110
  ],
111
- title="📹 Classroom Board Cleaner",
112
  description=(
113
  "1️⃣ Upload your classroom video (.mp4)\n"
114
- "2️⃣ Automatic extraction, alignment, masking, fusion & sharpening\n"
115
- "3️⃣ View three stages of the cleaned board output"
116
  )
117
  )
118
 
 
6
  import spaces
7
  from ultralytics import YOLO
8
  from tqdm import tqdm
9
+ from PIL import Image
10
+ from transformers import BlipProcessor, BlipForConditionalGeneration
11
 
12
  # Fix for Ultralytics config write error in Hugging Face environment
13
  os.environ["YOLO_CONFIG_DIR"] = "/tmp"
 
15
  # Use GPU if available
16
  device = "cuda" if torch.cuda.is_available() else "cpu"
17
 
18
+ # Load YOLO models onto the appropriate device
19
  extract_model = YOLO("best.pt").to(device)
20
  detect_model = YOLO("yolov8n.pt").to(device)
21
 
22
+ # Load BLIP captioning model and processor
23
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
24
+ caption_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
25
+
26
  @spaces.GPU
27
  def process_video(video_path):
28
  os.makedirs("frames", exist_ok=True)
 
98
  sharp = cv2.addWeighted(selective, 2.0, blur, -1.0, 0)
99
  cv2.imwrite("sharpened_board_color.jpg", sharp)
100
 
101
+ # Step 6: Generate Caption
102
+ image = Image.open("sharpened_board_color.jpg").convert("RGB")
103
+ inputs = processor(images=image, return_tensors="pt").to(device)
104
+ out = caption_model.generate(**inputs, max_new_tokens=30)
105
+ caption = processor.decode(out[0], skip_special_tokens=True)
106
+
107
+ return "sharpened_board_color.jpg", caption
108
 
109
 
110
+ # Build Gradio interface
111
  demo = gr.Interface(
112
  fn=process_video,
113
  inputs=[
 
119
  )
120
  ],
121
  outputs=[
122
+ gr.Image(label="Sharpened Final Board"),
123
+ gr.Textbox(label="Generated Caption (BLIP)")
124
  ],
125
+ title="📹 Classroom Board Cleaner + Captioning",
126
  description=(
127
  "1️⃣ Upload your classroom video (.mp4)\n"
128
+ "2️⃣ Extracts, aligns, masks, fuses, sharpens board frames\n"
129
+ "3️⃣ Generates a caption describing the cleaned board output"
130
  )
131
  )
132