Spaces:
Sleeping
Sleeping
File size: 4,143 Bytes
65ec53e 2a3d108 5ba2d63 65ec53e 5ba2d63 2a3d108 65ec53e 5ba2d63 65ec53e 5ba2d63 65ec53e 2a3d108 65ec53e ef376b2 65ec53e ef376b2 65ec53e 2a3d108 65ec53e 2a3d108 65ec53e 2a3d108 65ec53e 2a3d108 65ec53e 2a3d108 65ec53e 5ba2d63 65ec53e 2a3d108 65ec53e 2a3d108 65ec53e 2a3d108 65ec53e 2a3d108 65ec53e 14677da 65ec53e b45ccc7 65ec53e 2a3d108 5ba2d63 2a3d108 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 | import gradio as gr
import os
import cv2
import numpy as np
import torch
import spaces
from ultralytics import YOLO
from tqdm import tqdm
# Fix for Ultralytics config write error in Hugging Face environment
os.environ["YOLO_CONFIG_DIR"] = "/tmp"
# Use GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
# Load models onto the appropriate device
extract_model = YOLO("best.pt").to(device)
detect_model = YOLO("yolov8n.pt").to(device)
@spaces.GPU
def process_video(video_path):
os.makedirs("frames", exist_ok=True)
# Step 1: Extract board-only frames
cap = cv2.VideoCapture(video_path)
frames, idx = [], 0
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
results = extract_model(frame)
labels = [extract_model.names[int(c)] for c in results[0].boxes.cls.cpu().numpy()]
if "board" in labels:
frames.append(frame)
cv2.imwrite(f"frames/frame_{idx:04d}.jpg", frame)
idx += 1
cap.release()
if not frames:
raise RuntimeError("No frames with 'board' found.")
# Step 2: Align
def align_frames(ref, tgt):
orb = cv2.ORB_create(500)
k1, d1 = orb.detectAndCompute(ref, None)
k2, d2 = orb.detectAndCompute(tgt, None)
if d1 is None or d2 is None:
return None
matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
matches = matcher.match(d1, d2)
if len(matches) < 10:
return None
src = np.float32([k2[m.trainIdx].pt for m in matches]).reshape(-1, 1, 2)
dst = np.float32([k1[m.queryIdx].pt for m in matches]).reshape(-1, 1, 2)
H, _ = cv2.findHomography(src, dst, cv2.RANSAC)
return None if H is None else cv2.warpPerspective(tgt, H, (ref.shape[1], ref.shape[0]))
base = frames[0]
aligned = [base]
for f in tqdm(frames[1:], desc="Aligning"):
a = align_frames(base, f)
if a is not None:
aligned.append(a)
if not aligned:
raise RuntimeError("Alignment failed for all frames.")
# Step 3: Median-fuse
stack = np.stack(aligned, axis=0).astype(np.float32)
median_board = np.median(stack, axis=0).astype(np.uint8)
cv2.imwrite("clean_board.jpg", median_board)
# Step 4: Mask persons & selective fuse
sum_img = np.zeros_like(aligned[0], dtype=np.float32)
count = np.zeros(aligned[0].shape[:2], dtype=np.float32)
for f in tqdm(aligned, desc="Masking persons"):
res = detect_model(f, verbose=False)
m = np.zeros(f.shape[:2], dtype=np.uint8)
for box in res[0].boxes:
if detect_model.names[int(box.cls)] == "person":
x1, y1, x2, y2 = map(int, box.xyxy[0])
cv2.rectangle(m, (x1, y1), (x2, y2), 255, -1)
inv = cv2.bitwise_not(m)
masked = cv2.bitwise_and(f, f, mask=inv)
sum_img += masked.astype(np.float32)
count += (inv > 0).astype(np.float32)
count[count == 0] = 1
selective = (sum_img / count[:, :, None]).astype(np.uint8)
cv2.imwrite("fused_board_selective.jpg", selective)
# Step 5: Sharpen
blur = cv2.GaussianBlur(selective, (5, 5), 0)
sharp = cv2.addWeighted(selective, 1.5, blur, -0.5, 0)
cv2.imwrite("sharpened_board_color.jpg", sharp)
return "sharpened_board_color.jpg"
demo = gr.Interface(
fn=process_video,
inputs=[
gr.File(
label="Upload Classroom Video (.mp4)",
file_types=['.mp4'],
file_count="single",
type="filepath"
)
],
outputs=[
gr.Image(label="Sharpened Final Board")
],
title="📹 Classroom Board Cleaner",
description=(
"Upload your classroom video (.mp4). \n"
"Automatic extraction, alignment, masking, fusion & sharpening. \n"
"View three stages of the cleaned board output."
)
)
if __name__ == "__main__":
if device == "cuda":
print(f"[INFO] ✅ Using GPU: {torch.cuda.get_device_name(0)}")
else:
print("[INFO] ⚠️ Using CPU (GPU not available or not assigned)")
demo.launch()
|