File size: 3,379 Bytes
775f01e
 
 
 
ddaf790
9a63c38
775f01e
 
 
 
9429840
 
 
775f01e
48438ba
9a63c38
48438ba
 
 
 
 
 
775f01e
 
 
ddaf790
775f01e
9a63c38
775f01e
 
 
 
ddaf790
 
9a63c38
 
 
 
 
775f01e
9a63c38
 
 
775f01e
9a63c38
ddaf790
 
48438ba
9429840
775f01e
9a63c38
775f01e
9429840
775f01e
9a63c38
9429840
775f01e
9a63c38
9429840
 
 
48438ba
 
 
 
 
 
ddaf790
9a63c38
 
 
ddaf790
775f01e
ddaf790
 
 
 
 
 
 
9a63c38
ddaf790
 
 
775f01e
 
9a63c38
775f01e
ddaf790
 
 
 
 
 
 
9a63c38
ddaf790
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
775f01e
9429840
48438ba
9429840
ddaf790
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import gradio as gr
from PIL import Image
import imagehash
import hashlib
import pandas as pd
import os

# -------------------------
# MD5 HASH FUNCTION
# -------------------------
def get_md5(file_path):
    with open(file_path, "rb") as f:
        return hashlib.md5(f.read()).hexdigest()

# -------------------------
# SIMILARITY SCORE FUNCTION
# -------------------------
def get_similarity_score(hash1, hash2):
    diff = hash1 - hash2
    similarity = (1 - diff / 64) * 100
    return round(similarity, 2)

# -------------------------
# MAIN FUNCTION
# -------------------------
def find_duplicates(files, filter_type):
    if not files:
        return [], pd.DataFrame(columns=["Image 1", "Image 2", "Type", "Similarity (%)"]), None

    md5_map = {}
    dhash_map = {}
    results = []
    images_preview = []

    # Ensure temp folder exists
    temp_folder = "temp_uploads"
    os.makedirs(temp_folder, exist_ok=True)

    # Save uploaded files locally
    for file in files:
        file_path = os.path.join(temp_folder, os.path.basename(file.name))
        with open(file_path, "wb") as f:
            f.write(file.read())

        img = Image.open(file_path).convert("RGB")
        images_preview.append(img)

        # MD5 check
        md5 = get_md5(file_path)
        if md5 in md5_map:
            results.append([file_path, md5_map[md5], "Exact Duplicate", 100])
        else:
            md5_map[md5] = file_path

        # dHash
        dhash_map[file_path] = imagehash.dhash(img)

    # Compare dHash
    file_names = list(dhash_map.keys())
    for i in range(len(file_names)):
        for j in range(i + 1, len(file_names)):
            hash1 = dhash_map[file_names[i]]
            hash2 = dhash_map[file_names[j]]

            diff = hash1 - hash2
            similarity = get_similarity_score(hash1, hash2)

            if diff < 10:
                results.append([file_names[i], file_names[j], "Similar", similarity])

    # Create DataFrame
    df = pd.DataFrame(results, columns=["Image 1", "Image 2", "Type", "Similarity (%)"])

    # Apply filter
    if filter_type == "Exact":
        df = df[df["Type"] == "Exact Duplicate"]
    elif filter_type == "Similar":
        df = df[df["Type"] == "Similar"]

    # Save CSV
    csv_path = os.path.join(temp_folder, "duplicate_results.csv")
    df.to_csv(csv_path, index=False)

    return images_preview, df, csv_path

# -------------------------
# GRADIO UI
# -------------------------
with gr.Blocks() as demo:

    gr.Markdown("## ๐Ÿ–ผ๏ธ Image Duplicate Finder (Advanced)")

    with gr.Row():
        file_input = gr.File(
            file_types=["image"],
            file_count="multiple",
            label="Upload Images"
        )

        filter_option = gr.Radio(
            ["All", "Exact", "Similar"],
            value="All",
            label="Filter Results"
        )

    run_btn = gr.Button("Find Duplicates ๐Ÿ”")

    gr.Markdown("### ๐Ÿ“ธ Uploaded Images")
    gallery = gr.Gallery()

    gr.Markdown("### ๐Ÿ“Š Results Table")
    table = gr.Dataframe()

    download_btn = gr.File(label="Download CSV")

    # Action
    run_btn.click(
        fn=find_duplicates,
        inputs=[file_input, filter_option],
        outputs=[gallery, table, download_btn]
    )

# -------------------------
# LAUNCH
# -------------------------
demo.launch(server_name="0.0.0.0", server_port=7860)