Spaces:
Runtime error
Runtime error
Commit ·
b187293
1
Parent(s): f7686f5
Update main.py
Browse files
main.py
CHANGED
|
@@ -13,7 +13,55 @@ def read_root():
|
|
| 13 |
return {"Hello": "World!"}
|
| 14 |
|
| 15 |
@app.put("/")
|
| 16 |
-
def
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
#def put_root():
|
| 19 |
# return {"Hello": "Put!"}
|
|
|
|
| 13 |
return {"Hello": "World!"}
|
| 14 |
|
| 15 |
@app.put("/")
|
| 16 |
+
def predict(Item):
|
| 17 |
+
# Load the OpenAI CLIP Model
|
| 18 |
+
print('Loading CLIP Model...')
|
| 19 |
+
model = SentenceTransformer('clip-ViT-B-32')
|
| 20 |
+
|
| 21 |
+
# Next we compute the embeddings
|
| 22 |
+
# To encode an image, you can use the following code:
|
| 23 |
+
# from PIL import Image
|
| 24 |
+
# encoded_image = model.encode(Image.open(filepath))
|
| 25 |
+
#image_names = list(glob.glob('./*.jpg'))
|
| 26 |
+
#print("Images:", len(image_names))
|
| 27 |
+
encoded_image = model.encode(imageArray, batch_size=128, convert_to_tensor=True, show_progress_bar=True)
|
| 28 |
+
|
| 29 |
+
# Now we run the clustering algorithm. This function compares images aganist
|
| 30 |
+
# all other images and returns a list with the pairs that have the highest
|
| 31 |
+
# cosine similarity score
|
| 32 |
+
processed_images = util.paraphrase_mining_embeddings(encoded_image)
|
| 33 |
+
NUM_SIMILAR_IMAGES = 10
|
| 34 |
+
|
| 35 |
+
# =================
|
| 36 |
+
# DUPLICATES
|
| 37 |
+
# =================
|
| 38 |
+
print('Finding duplicate images...')
|
| 39 |
+
# Filter list for duplicates. Results are triplets (score, image_id1, image_id2) and is scorted in decreasing order
|
| 40 |
+
# A duplicate image will have a score of 1.00
|
| 41 |
+
# It may be 0.9999 due to lossy image compression (.jpg)
|
| 42 |
+
duplicates = [image for image in processed_images if image[0] >= 0.999]
|
| 43 |
+
|
| 44 |
+
# Output the top X duplicate images
|
| 45 |
+
for score, image_id1, image_id2 in duplicates[0:NUM_SIMILAR_IMAGES]:
|
| 46 |
+
print("\nScore: {:.3f}%".format(score * 100))
|
| 47 |
+
|
| 48 |
+
# =================
|
| 49 |
+
# NEAR DUPLICATES
|
| 50 |
+
# =================
|
| 51 |
+
print('Finding near duplicate images...')
|
| 52 |
+
# Use a threshold parameter to identify two images as similar. By setting the threshold lower,
|
| 53 |
+
# you will get larger clusters which have less similar images in it. Threshold 0 - 1.00
|
| 54 |
+
# A threshold of 1.00 means the two images are exactly the same. Since we are finding near
|
| 55 |
+
# duplicate images, we can set it at 0.99 or any number 0 < X < 1.00.
|
| 56 |
+
threshold = 0.99
|
| 57 |
+
near_duplicates = [image for image in processed_images if image[0] < threshold]
|
| 58 |
+
|
| 59 |
+
for score, image_id1, image_id2 in near_duplicates[0:NUM_SIMILAR_IMAGES]:
|
| 60 |
+
print("\nScore: {:.3f}%".format(score * 100))
|
| 61 |
+
|
| 62 |
+
return "Score: {:.3f}%".format(score * 100)
|
| 63 |
+
|
| 64 |
+
#def update_item(item: Item):
|
| 65 |
+
# return item
|
| 66 |
#def put_root():
|
| 67 |
# return {"Hello": "Put!"}
|