Spaces:
Runtime error
Runtime error
Commit
·
ad7e92c
1
Parent(s):
7b8c02f
Update main.py
Browse files
main.py
CHANGED
|
@@ -49,27 +49,27 @@ def predict(item: Item):
|
|
| 49 |
# all other images and returns a list with the pairs that have the highest
|
| 50 |
# cosine similarity score
|
| 51 |
processed_images = util.paraphrase_mining_embeddings(encoded_image)
|
| 52 |
-
NUM_SIMILAR_IMAGES = 10
|
| 53 |
|
| 54 |
# =================
|
| 55 |
# DUPLICATES
|
| 56 |
# =================
|
| 57 |
-
print('Finding duplicate images...')
|
| 58 |
# Filter list for duplicates. Results are triplets (score, image_id1, image_id2) and is scorted in decreasing order
|
| 59 |
# A duplicate image will have a score of 1.00
|
| 60 |
# It may be 0.9999 due to lossy image compression (.jpg)
|
| 61 |
-
duplicates = [image for image in processed_images if image[0] >= 0.999]
|
| 62 |
|
| 63 |
# Output the top X duplicate images
|
| 64 |
-
for score, image_id1, image_id2 in duplicates[0:NUM_SIMILAR_IMAGES]:
|
| 65 |
-
print("\nScore: {:.3f}%".format(score * 100))
|
| 66 |
|
| 67 |
# Check if there are any duplicates
|
| 68 |
-
if duplicates:
|
| 69 |
# Find the top score among duplicates
|
| 70 |
-
top_score = max(duplicates, key=lambda x: x[0])[0]
|
| 71 |
-
formatted_score = round(top_score * 100, 3) # Multiplies by 100 and rounds to three decimal places
|
| 72 |
-
return ScoreResponse(score=formatted_score)
|
| 73 |
|
| 74 |
# =================
|
| 75 |
# NEAR DUPLICATES
|
|
@@ -79,7 +79,7 @@ def predict(item: Item):
|
|
| 79 |
# you will get larger clusters which have less similar images in it. Threshold 0 - 1.00
|
| 80 |
# A threshold of 1.00 means the two images are exactly the same. Since we are finding near
|
| 81 |
# duplicate images, we can set it at 0.99 or any number 0 < X < 1.00.
|
| 82 |
-
threshold = 0
|
| 83 |
near_duplicates = [image for image in processed_images if image[0] < threshold]
|
| 84 |
|
| 85 |
#for score, image_id1, image_id2 in near_duplicates[0:NUM_SIMILAR_IMAGES]:
|
|
|
|
| 49 |
# all other images and returns a list with the pairs that have the highest
|
| 50 |
# cosine similarity score
|
| 51 |
processed_images = util.paraphrase_mining_embeddings(encoded_image)
|
| 52 |
+
#NUM_SIMILAR_IMAGES = 10
|
| 53 |
|
| 54 |
# =================
|
| 55 |
# DUPLICATES
|
| 56 |
# =================
|
| 57 |
+
#print('Finding duplicate images...')
|
| 58 |
# Filter list for duplicates. Results are triplets (score, image_id1, image_id2) and is scorted in decreasing order
|
| 59 |
# A duplicate image will have a score of 1.00
|
| 60 |
# It may be 0.9999 due to lossy image compression (.jpg)
|
| 61 |
+
#duplicates = [image for image in processed_images if image[0] >= 0.999]
|
| 62 |
|
| 63 |
# Output the top X duplicate images
|
| 64 |
+
#for score, image_id1, image_id2 in duplicates[0:NUM_SIMILAR_IMAGES]:
|
| 65 |
+
#print("\nScore: {:.3f}%".format(score * 100))
|
| 66 |
|
| 67 |
# Check if there are any duplicates
|
| 68 |
+
#if duplicates:
|
| 69 |
# Find the top score among duplicates
|
| 70 |
+
#top_score = max(duplicates, key=lambda x: x[0])[0]
|
| 71 |
+
#formatted_score = round(top_score * 100, 3) # Multiplies by 100 and rounds to three decimal places
|
| 72 |
+
#return ScoreResponse(score=formatted_score)
|
| 73 |
|
| 74 |
# =================
|
| 75 |
# NEAR DUPLICATES
|
|
|
|
| 79 |
# you will get larger clusters which have less similar images in it. Threshold 0 - 1.00
|
| 80 |
# A threshold of 1.00 means the two images are exactly the same. Since we are finding near
|
| 81 |
# duplicate images, we can set it at 0.99 or any number 0 < X < 1.00.
|
| 82 |
+
threshold = 1.0
|
| 83 |
near_duplicates = [image for image in processed_images if image[0] < threshold]
|
| 84 |
|
| 85 |
#for score, image_id1, image_id2 in near_duplicates[0:NUM_SIMILAR_IMAGES]:
|