MatchVision / app.py
GIGAParviz's picture
Upload 3 files
ce890b6 verified
import gradio as gr
import cv2
import numpy as np
EXAMPLE_VIDEO = "example.mp4"
EXAMPLE_IMAGE = "target (2).png"
def process_video_and_image(video_path, target_image_path):
try:
target_img = cv2.imread(target_image_path)
target_gray = cv2.cvtColor(target_img, cv2.COLOR_BGR2GRAY)
cap = cv2.VideoCapture(video_path)
sift = cv2.SIFT_create()
keypoints_target, descriptors_target = sift.detectAndCompute(target_gray, None)
flann = cv2.FlannBasedMatcher(dict(algorithm=1, trees=5), dict(checks=50))
output_frames = []
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
keypoints_frame, descriptors_frame = sift.detectAndCompute(frame_gray, None)
if descriptors_frame is not None:
matches = flann.knnMatch(descriptors_target, descriptors_frame, k=2)
good_matches = [m for m, n in matches if m.distance < 0.70 * n.distance]
if len(good_matches) > 10:
src_pts = [keypoints_target[m.queryIdx].pt for m in good_matches]
dst_pts = [keypoints_frame[m.trainIdx].pt for m in good_matches]
src_pts = np.float32(src_pts).reshape(-1, 1, 2)
dst_pts = np.float32(dst_pts).reshape(-1, 1, 2)
matrix, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
if matrix is not None:
h, w = target_gray.shape
pts = np.float32([[0, 0], [0, h], [w, h], [w, 0]]).reshape(-1, 1, 2)
dst = cv2.perspectiveTransform(pts, matrix)
frame = cv2.polylines(frame, [np.int32(dst)], isClosed=True, color=(0, 255, 0), thickness=3)
match_frame = cv2.drawMatches(target_img, keypoints_target, frame, keypoints_frame, good_matches, None)
output_frames.append(cv2.resize(match_frame, (1200, 600)))
cap.release()
height, width, _ = output_frames[0].shape
out_video_path = "output_video.avi"
out = cv2.VideoWriter(out_video_path, cv2.VideoWriter_fourcc(*'XVID'), 10, (width, height))
for frame in output_frames:
out.write(frame)
out.release()
return out_video_path
except Exception as e:
return f"Error: {str(e)}"
with gr.Blocks() as demo:
gr.Markdown("## Video and Target Image Matcher")
with gr.Row():
video_input = gr.File(label="Upload Video File", file_types=[".mp4", ".avi", ".mov"])
image_input = gr.File(label="Upload Target Image", file_types=[".png", ".jpg", ".jpeg"])
example_button = gr.Button("Process Example")
output_video = gr.Video(label="Matched Video Output")
process_button = gr.Button("Process")
def process_example():
return process_video_and_image(EXAMPLE_VIDEO, EXAMPLE_IMAGE)
process_button.click(
fn=process_video_and_image,
inputs=[video_input, image_input],
outputs=output_video
)
example_button.click(
fn=process_example,
inputs=[],
outputs=output_video
)
demo.launch()