indoflaven commited on
Commit
ad7e92c
·
1 Parent(s): 7b8c02f

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +10 -10
main.py CHANGED
@@ -49,27 +49,27 @@ def predict(item: Item):
49
  # all other images and returns a list with the pairs that have the highest
50
  # cosine similarity score
51
  processed_images = util.paraphrase_mining_embeddings(encoded_image)
52
- NUM_SIMILAR_IMAGES = 10
53
 
54
  # =================
55
  # DUPLICATES
56
  # =================
57
- print('Finding duplicate images...')
58
  # Filter list for duplicates. Results are triplets (score, image_id1, image_id2) and is scorted in decreasing order
59
  # A duplicate image will have a score of 1.00
60
  # It may be 0.9999 due to lossy image compression (.jpg)
61
- duplicates = [image for image in processed_images if image[0] >= 0.999]
62
 
63
  # Output the top X duplicate images
64
- for score, image_id1, image_id2 in duplicates[0:NUM_SIMILAR_IMAGES]:
65
- print("\nScore: {:.3f}%".format(score * 100))
66
 
67
  # Check if there are any duplicates
68
- if duplicates:
69
  # Find the top score among duplicates
70
- top_score = max(duplicates, key=lambda x: x[0])[0]
71
- formatted_score = round(top_score * 100, 3) # Multiplies by 100 and rounds to three decimal places
72
- return ScoreResponse(score=formatted_score)
73
 
74
  # =================
75
  # NEAR DUPLICATES
@@ -79,7 +79,7 @@ def predict(item: Item):
79
  # you will get larger clusters which have less similar images in it. Threshold 0 - 1.00
80
  # A threshold of 1.00 means the two images are exactly the same. Since we are finding near
81
  # duplicate images, we can set it at 0.99 or any number 0 < X < 1.00.
82
- threshold = 0.99
83
  near_duplicates = [image for image in processed_images if image[0] < threshold]
84
 
85
  #for score, image_id1, image_id2 in near_duplicates[0:NUM_SIMILAR_IMAGES]:
 
49
  # all other images and returns a list with the pairs that have the highest
50
  # cosine similarity score
51
  processed_images = util.paraphrase_mining_embeddings(encoded_image)
52
+ #NUM_SIMILAR_IMAGES = 10
53
 
54
  # =================
55
  # DUPLICATES
56
  # =================
57
+ #print('Finding duplicate images...')
58
  # Filter list for duplicates. Results are triplets (score, image_id1, image_id2) and is scorted in decreasing order
59
  # A duplicate image will have a score of 1.00
60
  # It may be 0.9999 due to lossy image compression (.jpg)
61
+ #duplicates = [image for image in processed_images if image[0] >= 0.999]
62
 
63
  # Output the top X duplicate images
64
+ #for score, image_id1, image_id2 in duplicates[0:NUM_SIMILAR_IMAGES]:
65
+ #print("\nScore: {:.3f}%".format(score * 100))
66
 
67
  # Check if there are any duplicates
68
+ #if duplicates:
69
  # Find the top score among duplicates
70
+ #top_score = max(duplicates, key=lambda x: x[0])[0]
71
+ #formatted_score = round(top_score * 100, 3) # Multiplies by 100 and rounds to three decimal places
72
+ #return ScoreResponse(score=formatted_score)
73
 
74
  # =================
75
  # NEAR DUPLICATES
 
79
  # you will get larger clusters which have less similar images in it. Threshold 0 - 1.00
80
  # A threshold of 1.00 means the two images are exactly the same. Since we are finding near
81
  # duplicate images, we can set it at 0.99 or any number 0 < X < 1.00.
82
+ threshold = 1.0
83
  near_duplicates = [image for image in processed_images if image[0] < threshold]
84
 
85
  #for score, image_id1, image_id2 in near_duplicates[0:NUM_SIMILAR_IMAGES]: