Spaces:

flax-community
/

koclip

Build error

App Files Files Community

tree-park commited on Jul 19, 2021

Commit

84c806e

1 Parent(s): b21e1da

Add most relevant part

Browse files

Files changed (3) hide show

app.py +2 -1
most_relevant_part.py +80 -0
text2image.py +1 -1

app.py CHANGED Viewed

@@ -2,8 +2,9 @@ import streamlit as st
 import image2text
 import text2image
-PAGES = {"Text to Image": text2image, "Image to Text": image2text}
 st.sidebar.title("Navigation")
 model = st.sidebar.selectbox("Choose a model", ["koclip-base", "koclip-large"])

 import image2text
 import text2image
+import most_relevant_part
+PAGES = {"Text to Image": text2image, "Image to Text": image2text, "Most Relevant Part of Image": most_relevant_part}
 st.sidebar.title("Navigation")
 model = st.sidebar.selectbox("Choose a model", ["koclip-base", "koclip-large"])

most_relevant_part.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import os
+import requests
+import streamlit as st
+from PIL import Image
+import jax
+import jax.numpy as jnp
+import numpy as np
+from utils import load_model
+def split_image(im):
+    im = np.array(im)
+    M = im.shape[0] // 3
+    N = im.shape[1] // 3
+    tiles = [
+        im[x:x + M, y:y + N]
+        for x in range(0, im.shape[0], M)
+        for y in range(0, im.shape[1], N)
+    ]
+    return tiles
+# def split_image(X):
+#     num_rows = X.shape[0] // 224
+#     num_cols = X.shape[1] // 224
+#     Xc = X[0:num_rows * 224, 0:num_cols * 224, :]
+#     patches = []
+#     for j in range(num_rows):
+#         for i in range(num_cols):
+#             patches.append(Xc[j * 224:(j + 1) * 224, i * 224:(i + 1) * 224, :])
+#     return patches
+def app(model_name):
+    model, processor = load_model(f"koclip/{model_name}")
+    st.title("Most Relevant Part of Image")
+    st.markdown("""
+        Given a piece of text, the CLIP model finds the part of an image that best explains the text.
+        To try it out, you can
+        1) Upload an image
+        2) Explain a part of the image in text
+        Which will yield the most relevant image tile from a 3x3 grid of the image
+        """)
+    query1 = st.text_input(
+        "Enter a URL to an image...",
+        value="https://img.sbs.co.kr/newimg/news/20200823/201463830_1280.jpg")
+    query2 = st.file_uploader("or upload an image...",
+                              type=["jpg", "jpeg", "png"])
+    captions = st.text_input(
+        "Enter query to find most relevant part of image ",
+        value="이건 서울의 경복궁 사진이다.",
+    )
+    if st.button("질문 (Query)"):
+        if not any([query1, query2]):
+            st.error("Please upload an image or paste an image URL.")
+        else:
+            image_data = (query2 if query2 is not None else requests.get(
+                query1, stream=True).raw)
+            image = Image.open(image_data)
+            st.image(image)
+            images = split_image(image)
+            inputs = processor(text=captions,
+                               images=images,
+                               return_tensors="jax",
+                               padding=True)
+            inputs["pixel_values"] = jnp.transpose(inputs["pixel_values"],
+                                                   axes=[0, 2, 3, 1])
+            outputs = model(**inputs)
+            probs = jax.nn.softmax(outputs.logits_per_image, axis=0)
+            for idx, prob in sorted(enumerate(probs),
+                                    key=lambda x: x[1],
+                                    reverse=True):
+                st.text(f"Score: {prob[0]:.3f}")
+                st.image(images[idx])

text2image.py CHANGED Viewed

@@ -40,7 +40,7 @@ def app(model_name):
         result_imgs, result_captions = [], []
         for file, dist in zip(result_files, dists):
             result_imgs.append(plt.imread(os.path.join(images_directory, file)))
-            result_captions.append("{:s} (유사도: {:.3f})".format(file, 1.0 - dist))
         st.image(result_imgs[:3], caption=result_captions[:3], width=200)
         st.image(result_imgs[3:6], caption=result_captions[3:6], width=200)

         result_imgs, result_captions = [], []
         for file, dist in zip(result_files, dists):
             result_imgs.append(plt.imread(os.path.join(images_directory, file)))
+            result_captions.append("Score: {:.3f}".format(1.0 - dist))
         st.image(result_imgs[:3], caption=result_captions[:3], width=200)
         st.image(result_imgs[3:6], caption=result_captions[3:6], width=200)