Spaces:
Build error
Build error
| import gradio as gr | |
| from PIL import Image | |
| import imagehash | |
| import hashlib | |
| import pandas as pd | |
| import os | |
| # ------------------------- | |
| # MD5 HASH FUNCTION | |
| # ------------------------- | |
| def get_md5(file_path): | |
| with open(file_path, "rb") as f: | |
| return hashlib.md5(f.read()).hexdigest() | |
| # ------------------------- | |
| # SIMILARITY SCORE FUNCTION | |
| # ------------------------- | |
| def get_similarity_score(hash1, hash2): | |
| diff = hash1 - hash2 | |
| similarity = (1 - diff / 64) * 100 | |
| return round(similarity, 2) | |
| # ------------------------- | |
| # MAIN FUNCTION | |
| # ------------------------- | |
| def find_duplicates(files, filter_type): | |
| if not files: | |
| return [], pd.DataFrame(columns=["Image 1", "Image 2", "Type", "Similarity (%)"]), None | |
| md5_map = {} | |
| dhash_map = {} | |
| results = [] | |
| images_preview = [] | |
| # Ensure temp folder exists | |
| temp_folder = "temp_uploads" | |
| os.makedirs(temp_folder, exist_ok=True) | |
| # Save uploaded files locally | |
| for file in files: | |
| file_path = os.path.join(temp_folder, os.path.basename(file.name)) | |
| with open(file_path, "wb") as f: | |
| f.write(file.read()) | |
| img = Image.open(file_path).convert("RGB") | |
| images_preview.append(img) | |
| # MD5 check | |
| md5 = get_md5(file_path) | |
| if md5 in md5_map: | |
| results.append([file_path, md5_map[md5], "Exact Duplicate", 100]) | |
| else: | |
| md5_map[md5] = file_path | |
| # dHash | |
| dhash_map[file_path] = imagehash.dhash(img) | |
| # Compare dHash | |
| file_names = list(dhash_map.keys()) | |
| for i in range(len(file_names)): | |
| for j in range(i + 1, len(file_names)): | |
| hash1 = dhash_map[file_names[i]] | |
| hash2 = dhash_map[file_names[j]] | |
| diff = hash1 - hash2 | |
| similarity = get_similarity_score(hash1, hash2) | |
| if diff < 10: | |
| results.append([file_names[i], file_names[j], "Similar", similarity]) | |
| # Create DataFrame | |
| df = pd.DataFrame(results, columns=["Image 1", "Image 2", "Type", "Similarity (%)"]) | |
| # Apply filter | |
| if filter_type == "Exact": | |
| df = df[df["Type"] == "Exact Duplicate"] | |
| elif filter_type == "Similar": | |
| df = df[df["Type"] == "Similar"] | |
| # Save CSV | |
| csv_path = os.path.join(temp_folder, "duplicate_results.csv") | |
| df.to_csv(csv_path, index=False) | |
| return images_preview, df, csv_path | |
| # ------------------------- | |
| # GRADIO UI | |
| # ------------------------- | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## πΌοΈ Image Duplicate Finder (Advanced)") | |
| with gr.Row(): | |
| file_input = gr.File( | |
| file_types=["image"], | |
| file_count="multiple", | |
| label="Upload Images" | |
| ) | |
| filter_option = gr.Radio( | |
| ["All", "Exact", "Similar"], | |
| value="All", | |
| label="Filter Results" | |
| ) | |
| run_btn = gr.Button("Find Duplicates π") | |
| gr.Markdown("### πΈ Uploaded Images") | |
| gallery = gr.Gallery() | |
| gr.Markdown("### π Results Table") | |
| table = gr.Dataframe() | |
| download_btn = gr.File(label="Download CSV") | |
| # Action | |
| run_btn.click( | |
| fn=find_duplicates, | |
| inputs=[file_input, filter_option], | |
| outputs=[gallery, table, download_btn] | |
| ) | |
| # ------------------------- | |
| # LAUNCH | |
| # ------------------------- | |
| demo.launch(server_name="0.0.0.0", server_port=7860) |