Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -15,7 +15,7 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
| 15 |
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
|
| 16 |
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
|
| 17 |
|
| 18 |
-
orig_clip_model, orig_clip_processor = clip.load("ViT-B/32", device=device, jit=False)
|
| 19 |
|
| 20 |
|
| 21 |
# Load the Unsplash dataset
|
|
@@ -31,15 +31,15 @@ def predict(image, labels):
|
|
| 31 |
return {k: float(v) for k, v in zip(labels, probs[0])}
|
| 32 |
|
| 33 |
|
| 34 |
-
def predict2(image, labels):
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
|
| 44 |
def rand_image():
|
| 45 |
n = dataset.num_rows
|
|
@@ -64,15 +64,11 @@ with open(emb_filename, 'rb') as emb:
|
|
| 64 |
|
| 65 |
def search(search_query):
|
| 66 |
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
with torch.no_grad():
|
| 72 |
|
| 73 |
# Encode and normalize the description using CLIP (HF CLIP)
|
| 74 |
-
inputs = processor(text=
|
| 75 |
-
text_encoded =
|
| 76 |
|
| 77 |
# # Encode and normalize the description using CLIP (original CLIP)
|
| 78 |
# text_encoded = orig_clip_model.encode_text(clip.tokenize(search_query))
|
|
@@ -135,7 +131,7 @@ with gr.Blocks(css=".caption-text {font-size: 40px !important;}") as demo:
|
|
| 135 |
"day, night, dawn, dusk"], inputs=label_text)
|
| 136 |
with gr.Row():
|
| 137 |
with gr.Column(variant="panel"):
|
| 138 |
-
im = gr.Image(interactive=False
|
| 139 |
with gr.Row():
|
| 140 |
get_btn = gr.Button("Get Random Image").style(full_width=False)
|
| 141 |
reclass_btn = gr.Button("Re-Classify Image").style(full_width=False)
|
|
|
|
| 15 |
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
|
| 16 |
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
|
| 17 |
|
| 18 |
+
#orig_clip_model, orig_clip_processor = clip.load("ViT-B/32", device=device, jit=False)
|
| 19 |
|
| 20 |
|
| 21 |
# Load the Unsplash dataset
|
|
|
|
| 31 |
return {k: float(v) for k, v in zip(labels, probs[0])}
|
| 32 |
|
| 33 |
|
| 34 |
+
# def predict2(image, labels):
|
| 35 |
+
# image = orig_clip_processor(image).unsqueeze(0).to(device)
|
| 36 |
+
# text = clip.tokenize(labels).to(device)
|
| 37 |
+
# with torch.no_grad():
|
| 38 |
+
# image_features = orig_clip_model.encode_image(image)
|
| 39 |
+
# text_features = orig_clip_model.encode_text(text)
|
| 40 |
+
# logits_per_image, logits_per_text = orig_clip_model(image, text)
|
| 41 |
+
# probs = logits_per_image.softmax(dim=-1).cpu().numpy()
|
| 42 |
+
# return {k: float(v) for k, v in zip(labels, probs[0])}
|
| 43 |
|
| 44 |
def rand_image():
|
| 45 |
n = dataset.num_rows
|
|
|
|
| 64 |
|
| 65 |
def search(search_query):
|
| 66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
with torch.no_grad():
|
| 68 |
|
| 69 |
# Encode and normalize the description using CLIP (HF CLIP)
|
| 70 |
+
inputs = processor(text=search_query, images=None, return_tensors="pt", padding=True)
|
| 71 |
+
text_encoded = model.get_text_features(**inputs)
|
| 72 |
|
| 73 |
# # Encode and normalize the description using CLIP (original CLIP)
|
| 74 |
# text_encoded = orig_clip_model.encode_text(clip.tokenize(search_query))
|
|
|
|
| 131 |
"day, night, dawn, dusk"], inputs=label_text)
|
| 132 |
with gr.Row():
|
| 133 |
with gr.Column(variant="panel"):
|
| 134 |
+
im = gr.Image(interactive=False).style(height=height)
|
| 135 |
with gr.Row():
|
| 136 |
get_btn = gr.Button("Get Random Image").style(full_width=False)
|
| 137 |
reclass_btn = gr.Button("Re-Classify Image").style(full_width=False)
|