File size: 3,392 Bytes
ce890b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import gradio as gr
import cv2
import numpy as np

EXAMPLE_VIDEO = "example.mp4"  
EXAMPLE_IMAGE = "target (2).png"  

def process_video_and_image(video_path, target_image_path):
    try:

        target_img = cv2.imread(target_image_path)
        target_gray = cv2.cvtColor(target_img, cv2.COLOR_BGR2GRAY)

        cap = cv2.VideoCapture(video_path)

        sift = cv2.SIFT_create()
        keypoints_target, descriptors_target = sift.detectAndCompute(target_gray, None)
        flann = cv2.FlannBasedMatcher(dict(algorithm=1, trees=5), dict(checks=50))

        output_frames = []

        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            keypoints_frame, descriptors_frame = sift.detectAndCompute(frame_gray, None)

            if descriptors_frame is not None:
                matches = flann.knnMatch(descriptors_target, descriptors_frame, k=2)
                good_matches = [m for m, n in matches if m.distance < 0.70 * n.distance]

                if len(good_matches) > 10:
                    src_pts = [keypoints_target[m.queryIdx].pt for m in good_matches]
                    dst_pts = [keypoints_frame[m.trainIdx].pt for m in good_matches]

                    src_pts = np.float32(src_pts).reshape(-1, 1, 2)
                    dst_pts = np.float32(dst_pts).reshape(-1, 1, 2)
                    matrix, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)

                    if matrix is not None:
                        h, w = target_gray.shape
                        pts = np.float32([[0, 0], [0, h], [w, h], [w, 0]]).reshape(-1, 1, 2)
                        dst = cv2.perspectiveTransform(pts, matrix)
                        frame = cv2.polylines(frame, [np.int32(dst)], isClosed=True, color=(0, 255, 0), thickness=3)

                    match_frame = cv2.drawMatches(target_img, keypoints_target, frame, keypoints_frame, good_matches, None)
                    output_frames.append(cv2.resize(match_frame, (1200, 600)))

        cap.release()

        height, width, _ = output_frames[0].shape
        out_video_path = "output_video.avi"
        out = cv2.VideoWriter(out_video_path, cv2.VideoWriter_fourcc(*'XVID'), 10, (width, height))

        for frame in output_frames:
            out.write(frame)
        out.release()

        return out_video_path
    except Exception as e:
        return f"Error: {str(e)}"

with gr.Blocks() as demo:
    gr.Markdown("## Video and Target Image Matcher")
    
    with gr.Row():
        video_input = gr.File(label="Upload Video File", file_types=[".mp4", ".avi", ".mov"])
        image_input = gr.File(label="Upload Target Image", file_types=[".png", ".jpg", ".jpeg"])
        example_button = gr.Button("Process Example")

    output_video = gr.Video(label="Matched Video Output")
    process_button = gr.Button("Process")

    def process_example():
        return process_video_and_image(EXAMPLE_VIDEO, EXAMPLE_IMAGE)
    
    process_button.click(
        fn=process_video_and_image,
        inputs=[video_input, image_input],
        outputs=output_video
    )
    example_button.click(
        fn=process_example,
        inputs=[],
        outputs=output_video
    )

demo.launch()