indoflaven commited on
Commit
b187293
·
1 Parent(s): f7686f5

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +50 -2
main.py CHANGED
@@ -13,7 +13,55 @@ def read_root():
13
  return {"Hello": "World!"}
14
 
15
  @app.put("/")
16
- def update_item(item: Item):
17
- return item
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  #def put_root():
19
  # return {"Hello": "Put!"}
 
13
  return {"Hello": "World!"}
14
 
15
  @app.put("/")
16
+ def predict(Item):
17
+ # Load the OpenAI CLIP Model
18
+ print('Loading CLIP Model...')
19
+ model = SentenceTransformer('clip-ViT-B-32')
20
+
21
+ # Next we compute the embeddings
22
+ # To encode an image, you can use the following code:
23
+ # from PIL import Image
24
+ # encoded_image = model.encode(Image.open(filepath))
25
+ #image_names = list(glob.glob('./*.jpg'))
26
+ #print("Images:", len(image_names))
27
+ encoded_image = model.encode(imageArray, batch_size=128, convert_to_tensor=True, show_progress_bar=True)
28
+
29
+ # Now we run the clustering algorithm. This function compares images aganist
30
+ # all other images and returns a list with the pairs that have the highest
31
+ # cosine similarity score
32
+ processed_images = util.paraphrase_mining_embeddings(encoded_image)
33
+ NUM_SIMILAR_IMAGES = 10
34
+
35
+ # =================
36
+ # DUPLICATES
37
+ # =================
38
+ print('Finding duplicate images...')
39
+ # Filter list for duplicates. Results are triplets (score, image_id1, image_id2) and is scorted in decreasing order
40
+ # A duplicate image will have a score of 1.00
41
+ # It may be 0.9999 due to lossy image compression (.jpg)
42
+ duplicates = [image for image in processed_images if image[0] >= 0.999]
43
+
44
+ # Output the top X duplicate images
45
+ for score, image_id1, image_id2 in duplicates[0:NUM_SIMILAR_IMAGES]:
46
+ print("\nScore: {:.3f}%".format(score * 100))
47
+
48
+ # =================
49
+ # NEAR DUPLICATES
50
+ # =================
51
+ print('Finding near duplicate images...')
52
+ # Use a threshold parameter to identify two images as similar. By setting the threshold lower,
53
+ # you will get larger clusters which have less similar images in it. Threshold 0 - 1.00
54
+ # A threshold of 1.00 means the two images are exactly the same. Since we are finding near
55
+ # duplicate images, we can set it at 0.99 or any number 0 < X < 1.00.
56
+ threshold = 0.99
57
+ near_duplicates = [image for image in processed_images if image[0] < threshold]
58
+
59
+ for score, image_id1, image_id2 in near_duplicates[0:NUM_SIMILAR_IMAGES]:
60
+ print("\nScore: {:.3f}%".format(score * 100))
61
+
62
+ return "Score: {:.3f}%".format(score * 100)
63
+
64
+ #def update_item(item: Item):
65
+ # return item
66
  #def put_root():
67
  # return {"Hello": "Put!"}