| | import cv2 |
| | import inference |
| | from inference.core.utils.postprocess import cosine_similarity |
| |
|
| | from inference.models import Clip |
| | clip = Clip() |
| |
|
| | prompt = "an ace of spades playing card" |
| | text_embedding = clip.embed_text(prompt) |
| |
|
| | def render(result, image): |
| | |
| | similarity = cosine_similarity(result["embeddings"][0], text_embedding[0]) |
| |
|
| | |
| | range = (0.15, 0.40) |
| | similarity = (similarity-range[0])/(range[1]-range[0]) |
| | similarity = max(min(similarity, 1), 0)*100 |
| | |
| | |
| | text = f"{similarity:.1f}%" |
| | cv2.putText(image, text, (10, 310), cv2.FONT_HERSHEY_SIMPLEX, 12, (255, 255, 255), 30) |
| | cv2.putText(image, text, (10, 310), cv2.FONT_HERSHEY_SIMPLEX, 12, (206, 6, 103), 16) |
| |
|
| | |
| | cv2.putText(image, prompt, (20, 1050), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 10) |
| | cv2.putText(image, prompt, (20, 1050), cv2.FONT_HERSHEY_SIMPLEX, 2, (206, 6, 103), 5) |
| |
|
| | |
| | cv2.imshow("CLIP", image) |
| | cv2.waitKey(1) |
| |
|
| | |
| | inference.Stream( |
| | source="webcam", |
| | model=clip, |
| |
|
| | output_channel_order="BGR", |
| | use_main_thread=True, |
| | |
| | on_prediction=render |
| | ) |
| |
|