Commit
·
7619e31
1
Parent(s):
d3f81aa
working
Browse files- app.py +56 -0
- requirements.txt +6 -0
app.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import degirum as dg
|
| 3 |
+
from PIL import Image
|
| 4 |
+
import torch
|
| 5 |
+
import numpy as np
|
| 6 |
+
import torch.nn.functional as F
|
| 7 |
+
import clip
|
| 8 |
+
import cv2
|
| 9 |
+
|
| 10 |
+
# Compute the cosine similarity between two vectors.
|
| 11 |
+
def cosine_similarity(a, b):
|
| 12 |
+
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
|
| 13 |
+
|
| 14 |
+
def compute_text_embeddings(text_prompts):
|
| 15 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 16 |
+
clip_model, _ = clip.load("RN50", device=device)
|
| 17 |
+
|
| 18 |
+
text_embeddings = []
|
| 19 |
+
for text_prompt in text_prompts:
|
| 20 |
+
text = clip.tokenize(text_prompt).to(device)
|
| 21 |
+
text_embedding = clip_model.encode_text(text)
|
| 22 |
+
text_embeddings.append(text_embedding.cpu().detach().numpy().tolist())
|
| 23 |
+
|
| 24 |
+
return text_embeddings
|
| 25 |
+
|
| 26 |
+
zoo=dg.connect(dg.CLOUD,zoo_url='https://cs.degirum.com/degirum/kvk_upload_test', token="dg_ZPNjDPnAPpoZiykpFz2CnoYaAYf6ewBxwDCBo")#,token=st.secrets["DG_TOKEN"])
|
| 27 |
+
|
| 28 |
+
st.title('DeGirum CLIP model Demo')
|
| 29 |
+
|
| 30 |
+
with st.sidebar:
|
| 31 |
+
st.header('Specify Model Options Below')
|
| 32 |
+
prompts = st.text_area("Enter text prompts (comma-separated):", value="People Running, People Talking, People Fighting, People Laughing, People Dancing")
|
| 33 |
+
prompts = [prompt.strip() for prompt in prompts.split(',')]
|
| 34 |
+
st.text('Upload an image. Then click on the submit button')
|
| 35 |
+
with st.form("model_form"):
|
| 36 |
+
uploaded_file=st.file_uploader('input image')
|
| 37 |
+
submitted = st.form_submit_button("Submit")
|
| 38 |
+
embeddings = compute_text_embeddings(prompts)
|
| 39 |
+
if submitted:
|
| 40 |
+
model=zoo.load_model('clip--224x224_float_openvino_cpu_4',
|
| 41 |
+
input_image_format = "RAW"
|
| 42 |
+
)
|
| 43 |
+
image = Image.open(uploaded_file)
|
| 44 |
+
opencv_image = np.array(image)
|
| 45 |
+
opencv_image = cv2.cvtColor(opencv_image, cv2.COLOR_RGB2BGR)
|
| 46 |
+
predictions=model(opencv_image).results[0]["data"]
|
| 47 |
+
dg_cloud_output_reshaped = predictions.reshape(-1)
|
| 48 |
+
similarities = [cosine_similarity(dg_cloud_output_reshaped, np.array(embedding).reshape(-1)) for embedding in embeddings]
|
| 49 |
+
similarities_tensor = torch.tensor(similarities, dtype=torch.float32)
|
| 50 |
+
softmax_scores = F.softmax(similarities_tensor, dim=0)
|
| 51 |
+
max_index = torch.argmax(softmax_scores).item()
|
| 52 |
+
st.image(image, caption="Uploaded Image", use_column_width=True)
|
| 53 |
+
for index, prompt in enumerate(prompts):
|
| 54 |
+
st.write(f"{prompt} - {softmax_scores[index]*100:.2f}%")
|
| 55 |
+
|
| 56 |
+
# st.write(predictions.results)
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
degirum
|
| 2 |
+
altair<5
|
| 3 |
+
clip
|
| 4 |
+
opencv-python
|
| 5 |
+
torch
|
| 6 |
+
numpy
|