Spaces:
Build error
Build error
File size: 3,379 Bytes
775f01e ddaf790 9a63c38 775f01e 9429840 775f01e 48438ba 9a63c38 48438ba 775f01e ddaf790 775f01e 9a63c38 775f01e ddaf790 9a63c38 775f01e 9a63c38 775f01e 9a63c38 ddaf790 48438ba 9429840 775f01e 9a63c38 775f01e 9429840 775f01e 9a63c38 9429840 775f01e 9a63c38 9429840 48438ba ddaf790 9a63c38 ddaf790 775f01e ddaf790 9a63c38 ddaf790 775f01e 9a63c38 775f01e ddaf790 9a63c38 ddaf790 775f01e 9429840 48438ba 9429840 ddaf790 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 | import gradio as gr
from PIL import Image
import imagehash
import hashlib
import pandas as pd
import os
# -------------------------
# MD5 HASH FUNCTION
# -------------------------
def get_md5(file_path):
with open(file_path, "rb") as f:
return hashlib.md5(f.read()).hexdigest()
# -------------------------
# SIMILARITY SCORE FUNCTION
# -------------------------
def get_similarity_score(hash1, hash2):
diff = hash1 - hash2
similarity = (1 - diff / 64) * 100
return round(similarity, 2)
# -------------------------
# MAIN FUNCTION
# -------------------------
def find_duplicates(files, filter_type):
if not files:
return [], pd.DataFrame(columns=["Image 1", "Image 2", "Type", "Similarity (%)"]), None
md5_map = {}
dhash_map = {}
results = []
images_preview = []
# Ensure temp folder exists
temp_folder = "temp_uploads"
os.makedirs(temp_folder, exist_ok=True)
# Save uploaded files locally
for file in files:
file_path = os.path.join(temp_folder, os.path.basename(file.name))
with open(file_path, "wb") as f:
f.write(file.read())
img = Image.open(file_path).convert("RGB")
images_preview.append(img)
# MD5 check
md5 = get_md5(file_path)
if md5 in md5_map:
results.append([file_path, md5_map[md5], "Exact Duplicate", 100])
else:
md5_map[md5] = file_path
# dHash
dhash_map[file_path] = imagehash.dhash(img)
# Compare dHash
file_names = list(dhash_map.keys())
for i in range(len(file_names)):
for j in range(i + 1, len(file_names)):
hash1 = dhash_map[file_names[i]]
hash2 = dhash_map[file_names[j]]
diff = hash1 - hash2
similarity = get_similarity_score(hash1, hash2)
if diff < 10:
results.append([file_names[i], file_names[j], "Similar", similarity])
# Create DataFrame
df = pd.DataFrame(results, columns=["Image 1", "Image 2", "Type", "Similarity (%)"])
# Apply filter
if filter_type == "Exact":
df = df[df["Type"] == "Exact Duplicate"]
elif filter_type == "Similar":
df = df[df["Type"] == "Similar"]
# Save CSV
csv_path = os.path.join(temp_folder, "duplicate_results.csv")
df.to_csv(csv_path, index=False)
return images_preview, df, csv_path
# -------------------------
# GRADIO UI
# -------------------------
with gr.Blocks() as demo:
gr.Markdown("## ๐ผ๏ธ Image Duplicate Finder (Advanced)")
with gr.Row():
file_input = gr.File(
file_types=["image"],
file_count="multiple",
label="Upload Images"
)
filter_option = gr.Radio(
["All", "Exact", "Similar"],
value="All",
label="Filter Results"
)
run_btn = gr.Button("Find Duplicates ๐")
gr.Markdown("### ๐ธ Uploaded Images")
gallery = gr.Gallery()
gr.Markdown("### ๐ Results Table")
table = gr.Dataframe()
download_btn = gr.File(label="Download CSV")
# Action
run_btn.click(
fn=find_duplicates,
inputs=[file_input, filter_option],
outputs=[gallery, table, download_btn]
)
# -------------------------
# LAUNCH
# -------------------------
demo.launch(server_name="0.0.0.0", server_port=7860) |