Sankie005's picture
Upload 434 files
c446951
import cv2
import inference
from inference.core.utils.postprocess import cosine_similarity
from inference.models import Clip
clip = Clip()
prompt = "an ace of spades playing card"
text_embedding = clip.embed_text(prompt)
def render(result, image):
# get the cosine similarity between the prompt & the image
similarity = cosine_similarity(result["embeddings"][0], text_embedding[0])
# scale the result to 0-100 based on heuristic (~the best & worst values I've observed)
range = (0.15, 0.40)
similarity = (similarity-range[0])/(range[1]-range[0])
similarity = max(min(similarity, 1), 0)*100
# print the similarity
text = f"{similarity:.1f}%"
cv2.putText(image, text, (10, 310), cv2.FONT_HERSHEY_SIMPLEX, 12, (255, 255, 255), 30)
cv2.putText(image, text, (10, 310), cv2.FONT_HERSHEY_SIMPLEX, 12, (206, 6, 103), 16)
# print the prompt
cv2.putText(image, prompt, (20, 1050), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 10)
cv2.putText(image, prompt, (20, 1050), cv2.FONT_HERSHEY_SIMPLEX, 2, (206, 6, 103), 5)
# display the image
cv2.imshow("CLIP", image)
cv2.waitKey(1)
# start the stream
inference.Stream(
source="webcam",
model=clip,
output_channel_order="BGR",
use_main_thread=True,
on_prediction=render
)