Spaces:

umichVision
/

virtex-redcaps

Runtime error

App Files Files Community

zamborg commited on Nov 18, 2021

Commit

5650fb4

1 Parent(s): 0de1d2d

update model to reflect dev changes

Browse files

Files changed (2) hide show

app.py +41 -36
model.py +38 -3

app.py CHANGED Viewed

@@ -8,31 +8,33 @@ from model import *
 # # TODO:
 # - Reformat the model introduction
-# - Center the images using the 3 column method
 # - Make the iterative text generation
 def gen_show_caption(sub_prompt=None, cap_prompt = ""):
     with st.spinner("Generating Caption"):
-        if sub_prompt is None and cap_prompt is not "":
-            st.write("Without a specified subreddit we default to /r/pics")
-        subreddit, caption = virtexModel.predict(image_dict, sub_prompt=sub_prompt, prompt = cap_prompt)
         st.markdown(
             f"""
             <style>
                 red{{
                     color:#c62828
                 }}
                 mono{{
                     font-family: "Inconsolata";
                 }}
             </style>
-            ### <red> r/{subreddit} </red>  {caption}
             """,
             unsafe_allow_html=True)
-st.title("Image Captioning Demo from RedCaps")
 st.sidebar.markdown(
     """
     ### Image Captioning Model from VirTex trained on RedCaps
@@ -41,20 +43,15 @@ st.sidebar.markdown(
     You can also generate captions as if they are from specific subreddits,
     as if they start with a particular prompt, or even both.
-    Share your results on twitter with #redcaps or with a friend.
     """
 )
 with st.spinner("Loading Model"):
     virtexModel, imageLoader, sample_images, valid_subs = create_objects()
-# staggered = st.sidebar.checkbox("Iteratively Generate Captions")
-# if staggered:
-#     pass
-# else:
 select_idx = None
 st.sidebar.title("Select a sample image")
@@ -102,38 +99,46 @@ cap_prompt = st.sidebar.text_input(
 _ = st.sidebar.button("Regenerate Caption")
-advanced = st.sidebar.checkbox("Advanced Options")
-num_captions=1
-if advanced:
-    num_captions = st.sidebar.select_slider("Number of Captions to Predict", options=[1,2,3,4,5], value=1)
-    nuc_size = st.sidebar.slider("Nucelus Size:", min_value=0.0, max_value=1.0, value=0.8, step=0.05)
-    virtexModel.model.decoder.nucleus_size = nuc_size
-if False: #uploaded_image is None:# and submitted:
-    st.write("Please select a file to upload")
-else:
-    image_file = sample_image
-    # LOAD AND CACHE THE IMAGE
-    if uploaded_image is not None:
-        image = uploaded_image
-    elif select_idx is None and 'image' in st.session_state:
-        image = st.session_state['image']
-    else:
-        image = Image.open(image_file)
-    image = image.convert("RGB")
-    st.session_state['image'] = image
-    image_dict = imageLoader.transform(image)
-    show_image = imageLoader.show_resize(image)
     show = st.image(show_image)
-    show.image(show_image, "Your Image")
     for i in range(num_captions):
-        gen_show_caption(sub, imageLoader.text_transform(cap_prompt))

 # # TODO:
 # - Reformat the model introduction
 # - Make the iterative text generation
 def gen_show_caption(sub_prompt=None, cap_prompt = ""):
     with st.spinner("Generating Caption"):
+        subreddit, caption = virtexModel.predict(image_dict, sub_prompt=sub_prompt, prompt=cap_prompt)
         st.markdown(
             f"""
             <style>
                 red{{
                     color:#c62828
                 }}
+                blue{{
+                    color:#2a72d5
+                }}
                 mono{{
                     font-family: "Inconsolata";
                 }}
             </style>
+            ### <red> r/{subreddit} </red> <blue> {cap_prompt} </blue> {caption}
             """,
             unsafe_allow_html=True)
+_, center, _ = st.columns([1,8,1])
+with center:
+    st.title("Image Captioning Demo from RedCaps")
 st.sidebar.markdown(
     """
     ### Image Captioning Model from VirTex trained on RedCaps
     You can also generate captions as if they are from specific subreddits,
     as if they start with a particular prompt, or even both.
+    Share your results on twitter with #redcaps or with a friend*.
     """
 )
+# st.markdown(footer,unsafe_allow_html=True)
 with st.spinner("Loading Model"):
     virtexModel, imageLoader, sample_images, valid_subs = create_objects()
 select_idx = None
 st.sidebar.title("Select a sample image")
 _ = st.sidebar.button("Regenerate Caption")
+st.sidebar.write("Advanced Options:")
+num_captions = st.sidebar.select_slider("Number of Captions to Predict", options=[1,2,3,4,5], value=1)
+nuc_size = st.sidebar.slider("Nucelus Size:\nLarger values lead to more diverse captions", min_value=0.0, max_value=1.0, value=0.8, step=0.05)
+virtexModel.model.decoder.nucleus_size = nuc_size
+image_file = sample_image
+# LOAD AND CACHE THE IMAGE
+if uploaded_image is not None:
+    image = uploaded_image
+elif select_idx is None and 'image' in st.session_state:
+    image = st.session_state['image']
+else:
+    image = Image.open(image_file)
+image = image.convert("RGB")
+st.session_state['image'] = image
+image_dict = imageLoader.transform(image)
+show_image = imageLoader.show_resize(image)
+with center:
     show = st.image(show_image)
+    show.image(show_image)
+    if sub is None and imageLoader.text_transform(cap_prompt) is not "":
+        st.write("Without a specified subreddit we default to /r/pics")
     for i in range(num_captions):
+        gen_show_caption(sub, imageLoader.text_transform(cap_prompt))
+st.sidebar.markdown(
+    """
+*Please note that this model was explicitly not trained on images of people, and as a result is not designed to caption images with humans.
+This demo accompanies our paper RedCaps.
+Created by Karan Desai, Gaurav Kaul, Zubin Aysola, Justin Johnson
+    """
+)

model.py CHANGED Viewed

@@ -92,7 +92,6 @@ class VirTexModel():
             subreddit_tokens = torch.cat(
                 [
                     subreddit_tokens,
-                    torch.tensor([self.tokenizer.token_to_id("[SEP]")], device=self.device).long(),
                     cap_tokens
                 ])
@@ -118,11 +117,14 @@ class VirTexModel():
                 subreddit = "".join(subreddit.split())
                 rest_of_caption = rest_of_caption.strip()
             else:
-                subreddit, rest_of_caption = "", caption
             is_valid_subreddit = subreddit in self.valid_subs
         return subreddit, rest_of_caption
 def download_files():
@@ -147,3 +149,36 @@ def create_objects():
     valid_subs.insert(0, None)
     return virtexModel, imageLoader, sample_images, valid_subs

             subreddit_tokens = torch.cat(
                 [
                     subreddit_tokens,
                     cap_tokens
                 ])
                 subreddit = "".join(subreddit.split())
                 rest_of_caption = rest_of_caption.strip()
             else:
+                subreddit, rest_of_caption = "", caption.strip()
+            # split prompt for coloring:
+            if prompt is not "":
+                _, rest_of_caption = caption.split(prompt.strip())
             is_valid_subreddit = subreddit in self.valid_subs
         return subreddit, rest_of_caption
 def download_files():
     valid_subs.insert(0, None)
     return virtexModel, imageLoader, sample_images, valid_subs
+footer="""<style>
+a:link , a:visited{
+color: blue;
+background-color: transparent;
+text-decoration: underline;
+}
+a:hover,  a:active {
+color: red;
+background-color: transparent;
+text-decoration: underline;
+}
+.footer {
+position: fixed;
+left: 0;
+bottom: 0;
+width: 100%;
+background-color: white;
+color: black;
+text-align: center;
+}
+</style>
+<div class="footer">
+<p>
+*Please note that this model was explicitly not trained on images of people, and as a result is not designed to caption images with humans.
+This demo accompanies our paper RedCaps.
+Created by Karan Desai, Gaurav Kaul, Zubin Aysola, Justin Johnson
+</p>
+</div>
+"""