Spaces:

imperiusrex
/

classroomAPI

Sleeping

App Files Files Community

imperiusrex commited on Aug 2, 2025

Commit

4478675

verified ·

1 Parent(s): b62a2c7

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -6

app.py CHANGED Viewed

@@ -6,6 +6,8 @@ import torch
 import spaces
 from ultralytics import YOLO
 from tqdm import tqdm
 # Fix for Ultralytics config write error in Hugging Face environment
 os.environ["YOLO_CONFIG_DIR"] = "/tmp"
@@ -13,10 +15,14 @@ os.environ["YOLO_CONFIG_DIR"] = "/tmp"
 # Use GPU if available
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# Load models onto the appropriate device
 extract_model = YOLO("best.pt").to(device)
 detect_model  = YOLO("yolov8n.pt").to(device)
 @spaces.GPU
 def process_video(video_path):
     os.makedirs("frames", exist_ok=True)
@@ -92,9 +98,16 @@ def process_video(video_path):
     sharp = cv2.addWeighted(selective, 2.0, blur, -1.0, 0)
     cv2.imwrite("sharpened_board_color.jpg", sharp)
-    return "sharpened_board_color.jpg"
 demo = gr.Interface(
     fn=process_video,
     inputs=[
@@ -106,13 +119,14 @@ demo = gr.Interface(
         )
     ],
     outputs=[
-        gr.Image(label="Sharpened Final Board")
     ],
-    title="📹 Classroom Board Cleaner",
     description=(
         "1️⃣ Upload your classroom video (.mp4)\n"
-        "2️⃣ Automatic extraction, alignment, masking, fusion & sharpening\n"
-        "3️⃣ View three stages of the cleaned board output"
     )
 )

 import spaces
 from ultralytics import YOLO
 from tqdm import tqdm
+from PIL import Image
+from transformers import BlipProcessor, BlipForConditionalGeneration
 # Fix for Ultralytics config write error in Hugging Face environment
 os.environ["YOLO_CONFIG_DIR"] = "/tmp"
 # Use GPU if available
 device = "cuda" if torch.cuda.is_available() else "cpu"
+# Load YOLO models onto the appropriate device
 extract_model = YOLO("best.pt").to(device)
 detect_model  = YOLO("yolov8n.pt").to(device)
+# Load BLIP captioning model and processor
+processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+caption_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
 @spaces.GPU
 def process_video(video_path):
     os.makedirs("frames", exist_ok=True)
     sharp = cv2.addWeighted(selective, 2.0, blur, -1.0, 0)
     cv2.imwrite("sharpened_board_color.jpg", sharp)
+    # Step 6: Generate Caption
+    image = Image.open("sharpened_board_color.jpg").convert("RGB")
+    inputs = processor(images=image, return_tensors="pt").to(device)
+    out = caption_model.generate(**inputs, max_new_tokens=30)
+    caption = processor.decode(out[0], skip_special_tokens=True)
+    return "sharpened_board_color.jpg", caption
+# Build Gradio interface
 demo = gr.Interface(
     fn=process_video,
     inputs=[
         )
     ],
     outputs=[
+        gr.Image(label="Sharpened Final Board"),
+        gr.Textbox(label="Generated Caption (BLIP)")
     ],
+    title="📹 Classroom Board Cleaner + Captioning",
     description=(
         "1️⃣ Upload your classroom video (.mp4)\n"
+        "2️⃣ Extracts, aligns, masks, fuses, sharpens board frames\n"
+        "3️⃣ Generates a caption describing the cleaned board output"
     )
 )