mscproject / app.py
mkshari's picture
Update app.py
9a63c38 verified
import gradio as gr
from PIL import Image
import imagehash
import hashlib
import pandas as pd
import os
# -------------------------
# MD5 HASH FUNCTION
# -------------------------
def get_md5(file_path):
with open(file_path, "rb") as f:
return hashlib.md5(f.read()).hexdigest()
# -------------------------
# SIMILARITY SCORE FUNCTION
# -------------------------
def get_similarity_score(hash1, hash2):
diff = hash1 - hash2
similarity = (1 - diff / 64) * 100
return round(similarity, 2)
# -------------------------
# MAIN FUNCTION
# -------------------------
def find_duplicates(files, filter_type):
if not files:
return [], pd.DataFrame(columns=["Image 1", "Image 2", "Type", "Similarity (%)"]), None
md5_map = {}
dhash_map = {}
results = []
images_preview = []
# Ensure temp folder exists
temp_folder = "temp_uploads"
os.makedirs(temp_folder, exist_ok=True)
# Save uploaded files locally
for file in files:
file_path = os.path.join(temp_folder, os.path.basename(file.name))
with open(file_path, "wb") as f:
f.write(file.read())
img = Image.open(file_path).convert("RGB")
images_preview.append(img)
# MD5 check
md5 = get_md5(file_path)
if md5 in md5_map:
results.append([file_path, md5_map[md5], "Exact Duplicate", 100])
else:
md5_map[md5] = file_path
# dHash
dhash_map[file_path] = imagehash.dhash(img)
# Compare dHash
file_names = list(dhash_map.keys())
for i in range(len(file_names)):
for j in range(i + 1, len(file_names)):
hash1 = dhash_map[file_names[i]]
hash2 = dhash_map[file_names[j]]
diff = hash1 - hash2
similarity = get_similarity_score(hash1, hash2)
if diff < 10:
results.append([file_names[i], file_names[j], "Similar", similarity])
# Create DataFrame
df = pd.DataFrame(results, columns=["Image 1", "Image 2", "Type", "Similarity (%)"])
# Apply filter
if filter_type == "Exact":
df = df[df["Type"] == "Exact Duplicate"]
elif filter_type == "Similar":
df = df[df["Type"] == "Similar"]
# Save CSV
csv_path = os.path.join(temp_folder, "duplicate_results.csv")
df.to_csv(csv_path, index=False)
return images_preview, df, csv_path
# -------------------------
# GRADIO UI
# -------------------------
with gr.Blocks() as demo:
gr.Markdown("## πŸ–ΌοΈ Image Duplicate Finder (Advanced)")
with gr.Row():
file_input = gr.File(
file_types=["image"],
file_count="multiple",
label="Upload Images"
)
filter_option = gr.Radio(
["All", "Exact", "Similar"],
value="All",
label="Filter Results"
)
run_btn = gr.Button("Find Duplicates πŸ”")
gr.Markdown("### πŸ“Έ Uploaded Images")
gallery = gr.Gallery()
gr.Markdown("### πŸ“Š Results Table")
table = gr.Dataframe()
download_btn = gr.File(label="Download CSV")
# Action
run_btn.click(
fn=find_duplicates,
inputs=[file_input, filter_option],
outputs=[gallery, table, download_btn]
)
# -------------------------
# LAUNCH
# -------------------------
demo.launch(server_name="0.0.0.0", server_port=7860)