Spaces:

flax-community
/

koclip

Build error

App Files Files Community

jaketae commited on Jul 19, 2021

Commit

6525b03

1 Parent(s): 84c806e

fix: discard remaining pixels

Browse files

Files changed (1) hide show

most_relevant_part.py +27 -25

most_relevant_part.py CHANGED Viewed

@@ -1,22 +1,23 @@
 import os
-import requests
-import streamlit as st
-from PIL import Image
 import jax
 import jax.numpy as jnp
 import numpy as np
 from utils import load_model
-def split_image(im):
     im = np.array(im)
-    M = im.shape[0] // 3
-    N = im.shape[1] // 3
     tiles = [
-        im[x:x + M, y:y + N]
-        for x in range(0, im.shape[0], M)
-        for y in range(0, im.shape[1], N)
     ]
     return tiles
@@ -36,19 +37,21 @@ def app(model_name):
     model, processor = load_model(f"koclip/{model_name}")
     st.title("Most Relevant Part of Image")
-    st.markdown("""
         Given a piece of text, the CLIP model finds the part of an image that best explains the text.
         To try it out, you can
         1) Upload an image
         2) Explain a part of the image in text
         Which will yield the most relevant image tile from a 3x3 grid of the image
-        """)
     query1 = st.text_input(
         "Enter a URL to an image...",
-        value="https://img.sbs.co.kr/newimg/news/20200823/201463830_1280.jpg")
-    query2 = st.file_uploader("or upload an image...",
-                              type=["jpg", "jpeg", "png"])
     captions = st.text_input(
         "Enter query to find most relevant part of image ",
         value="이건 서울의 경복궁 사진이다.",
@@ -58,23 +61,22 @@ def app(model_name):
         if not any([query1, query2]):
             st.error("Please upload an image or paste an image URL.")
         else:
-            image_data = (query2 if query2 is not None else requests.get(
-                query1, stream=True).raw)
             image = Image.open(image_data)
             st.image(image)
             images = split_image(image)
-            inputs = processor(text=captions,
-                               images=images,
-                               return_tensors="jax",
-                               padding=True)
-            inputs["pixel_values"] = jnp.transpose(inputs["pixel_values"],
-                                                   axes=[0, 2, 3, 1])
             outputs = model(**inputs)
             probs = jax.nn.softmax(outputs.logits_per_image, axis=0)
-            for idx, prob in sorted(enumerate(probs),
-                                    key=lambda x: x[1],
-                                    reverse=True):
                 st.text(f"Score: {prob[0]:.3f}")
                 st.image(images[idx])

 import os
 import jax
 import jax.numpy as jnp
 import numpy as np
+import requests
+import streamlit as st
+from PIL import Image
 from utils import load_model
+def split_image(im, num_rows=3, num_cols=3):
     im = np.array(im)
+    row_size = im.shape[0] // num_rows
+    col_size = im.shape[1] // num_cols
     tiles = [
+        im[x : x + M, y : y + N]
+        for x in range(0, num_rows * row_size, row_size)
+        for y in range(0, num_cols * col_size, col_size)
     ]
     return tiles
     model, processor = load_model(f"koclip/{model_name}")
     st.title("Most Relevant Part of Image")
+    st.markdown(
+        """
         Given a piece of text, the CLIP model finds the part of an image that best explains the text.
         To try it out, you can
         1) Upload an image
         2) Explain a part of the image in text
         Which will yield the most relevant image tile from a 3x3 grid of the image
+        """
+    )
     query1 = st.text_input(
         "Enter a URL to an image...",
+        value="https://img.sbs.co.kr/newimg/news/20200823/201463830_1280.jpg",
+    )
+    query2 = st.file_uploader("or upload an image...", type=["jpg", "jpeg", "png"])
     captions = st.text_input(
         "Enter query to find most relevant part of image ",
         value="이건 서울의 경복궁 사진이다.",
         if not any([query1, query2]):
             st.error("Please upload an image or paste an image URL.")
         else:
+            image_data = (
+                query2 if query2 is not None else requests.get(query1, stream=True).raw
+            )
             image = Image.open(image_data)
             st.image(image)
             images = split_image(image)
+            inputs = processor(
+                text=captions, images=images, return_tensors="jax", padding=True
+            )
+            inputs["pixel_values"] = jnp.transpose(
+                inputs["pixel_values"], axes=[0, 2, 3, 1]
+            )
             outputs = model(**inputs)
             probs = jax.nn.softmax(outputs.logits_per_image, axis=0)
+            for idx, prob in sorted(enumerate(probs), key=lambda x: x[1], reverse=True):
                 st.text(f"Score: {prob[0]:.3f}")
                 st.image(images[idx])