Spaces:
Sleeping
Sleeping
| import sys, json, os, requests, numpy as np | |
| from PIL import Image | |
| from io import BytesIO | |
| from imagededup.methods import PHash | |
| import tempfile | |
| def download_image(url, output_path='temp_image.jpg'): | |
| """Download image from URL and save locally""" | |
| print(f"DUPLICATE DOWNLOAD Downloading image from {url}", file=sys.stderr) | |
| response = requests.get(url) | |
| response.raise_for_status() | |
| image_data = response.content | |
| print(f"DUPLICATE DOWNLOAD Image data size: {len(image_data)} bytes", file=sys.stderr) | |
| print(f"DUPLICATE DOWNLOAD Saving to {output_path}", file=sys.stderr) | |
| # Process the image | |
| image = Image.open(BytesIO(image_data)) | |
| # Convert to RGB if necessary | |
| if image.mode == 'RGBA': | |
| background = Image.new('RGB', image.size, (255, 255, 255)) | |
| background.paste(image, mask=image.split()[-1]) | |
| image = background | |
| print(f"DUPLICATE DOWNLOAD Converted RGBA to RGB", file=sys.stderr) | |
| elif image.mode != 'RGB': | |
| image = image.convert('RGB') | |
| print(f"DUPLICATE DOWNLOAD Converted {image.mode} to RGB", file=sys.stderr) | |
| # Save as JPG | |
| image.save(output_path, 'JPEG', quality=95) | |
| print(f"DUPLICATE DOWNLOAD Saved image to {output_path}", file=sys.stderr) | |
| return output_path | |
| def download_reference_images(urls, temp_dir): | |
| """Download all reference images to temporary directory""" | |
| downloaded_paths = [] | |
| for i, url in enumerate(urls): | |
| try: | |
| output_path = os.path.join(temp_dir, f"ref_image_{i}.jpg") | |
| download_image(url, output_path) | |
| downloaded_paths.append(output_path) | |
| print(f"[DUPLICATE] Downloaded reference image {i+1}/{len(urls)}", file=sys.stderr) | |
| except Exception as e: | |
| print(f"[DUPLICATE] Failed to download reference image {i+1}: {str(e)}", file=sys.stderr) | |
| continue | |
| return downloaded_paths | |
| def detect_duplicates(target_image_path, reference_image_paths, reference_urls, similarity_threshold=0.85): | |
| """Detect duplicates using perceptual hashing""" | |
| print(f"DUPLICATE ANALYSIS Starting duplicate detection...", file=sys.stderr) | |
| try: | |
| phasher = PHash() | |
| # Encode the target image | |
| print(f"DUPLICATE ANALYSIS Encoding target image...", file=sys.stderr) | |
| target_encoding = phasher.encode_image(target_image_path) | |
| print(f"DUPLICATE ANALYSIS Target image hash: {target_encoding}", file=sys.stderr) | |
| # Encode all reference images | |
| print(f"DUPLICATE ANALYSIS Encoding reference images...", file=sys.stderr) | |
| reference_encodings = {} | |
| for i, ref_path in enumerate(reference_image_paths): | |
| try: | |
| encoding = phasher.encode_image(ref_path) | |
| reference_encodings[ref_path] = encoding | |
| print(f"DUPLICATE ANALYSIS Reference image {i+1} hash: {encoding}", file=sys.stderr) | |
| except Exception as e: | |
| print(f"DUPLICATE ANALYSIS Failed to encode reference image {i+1}: {str(e)}", file=sys.stderr) | |
| continue | |
| if not reference_encodings: | |
| return { | |
| "duplicates_found": False, | |
| "message": "No reference images could be processed", | |
| "similar_images": [], | |
| "highest_similarity": 0.0 | |
| } | |
| # Create a mapping of paths to URLs | |
| path_to_url = dict(zip(reference_image_paths, reference_urls)) | |
| # Calculate similarities manually | |
| print(f"DUPLICATE ANALYSIS Comparing images...", file=sys.stderr) | |
| similar_images = [] | |
| highest_similarity = 0.0 | |
| for ref_path, ref_encoding in reference_encodings.items(): | |
| try: | |
| # Calculate Hamming distance between hashes | |
| hamming_distance = phasher.hamming_distance(target_encoding, ref_encoding) | |
| # Convert to similarity score (lower distance = higher similarity) | |
| # PHash uses 64-bit hashes, so max distance is 64 | |
| similarity = 1 - (hamming_distance / 64.0) | |
| print(f"DUPLICATE ANALYSIS Comparing with {ref_path}: Hamming distance = {hamming_distance}, Similarity = {similarity:.4f}", file=sys.stderr) | |
| if similarity >= similarity_threshold: | |
| similar_images.append({ | |
| "reference_path": path_to_url[ref_path], # Use URL instead of path | |
| "similarity_score": round(float(similarity), 4), | |
| "hamming_distance": int(hamming_distance) | |
| }) | |
| if similarity > highest_similarity: | |
| highest_similarity = similarity | |
| except Exception as e: | |
| print(f"DUPLICATE ANALYSIS Error comparing with reference image: {str(e)}", file=sys.stderr) | |
| continue | |
| # Sort by similarity (highest first) | |
| similar_images.sort(key=lambda x: x["similarity_score"], reverse=True) | |
| print(f"DUPLICATE ANALYSIS Analysis complete. Found {len(similar_images)} similar images.", file=sys.stderr) | |
| return { | |
| "duplicates_found": len(similar_images) > 0, | |
| "message": f"Found {len(similar_images)} similar images" if similar_images else "No duplicates found", | |
| "similar_images": similar_images, | |
| "highest_similarity": round(float(highest_similarity), 4), | |
| "threshold_used": similarity_threshold | |
| } | |
| except Exception as e: | |
| print(f"DUPLICATE ANALYSIS Error in duplicate detection: {str(e)}", file=sys.stderr) | |
| return { | |
| "duplicates_found": False, | |
| "message": f"Error during duplicate detection: {str(e)}", | |
| "similar_images": [], | |
| "highest_similarity": 0.0 | |
| } | |
| # Main execution | |
| if len(sys.argv) < 3: | |
| print(json.dumps({"error": "Usage: python detect_duplicate.py <target_image_url> <reference_url1> [reference_url2] ..."})) | |
| sys.exit(1) | |
| target_image_url = sys.argv[1] | |
| reference_urls = sys.argv[2:] | |
| try: | |
| # Create temporary directory for processing | |
| with tempfile.TemporaryDirectory() as temp_dir: | |
| print(f"[DUPLICATE] Using temporary directory: {temp_dir}", file=sys.stderr) | |
| # Download target image | |
| target_image_path = os.path.join(temp_dir, "target_image.jpg") | |
| download_image(target_image_url, target_image_path) | |
| # Download reference images | |
| reference_image_paths = download_reference_images(reference_urls, temp_dir) | |
| if not reference_image_paths: | |
| print(json.dumps({ | |
| "error": "No reference images could be downloaded" | |
| })) | |
| sys.exit(1) | |
| # Detect duplicates | |
| results = detect_duplicates(target_image_path, reference_image_paths, reference_urls) | |
| # Clean up | |
| if os.path.exists(target_image_path): | |
| os.remove(target_image_path) | |
| for ref_path in reference_image_paths: | |
| if os.path.exists(ref_path): | |
| os.remove(ref_path) | |
| print(json.dumps({ | |
| "success": True, | |
| "result": results | |
| })) | |
| except Exception as e: | |
| print(json.dumps({"error": str(e)})) | |
| sys.exit(1) | |