Spaces:
Sleeping
Sleeping
File size: 11,294 Bytes
498cace c0480d4 748e371 bcdac45 c0480d4 498cace 9d25cb7 946ff9d 7aec679 d77ffda 2e00ec0 150f131 cbbe9c2 0ff9eb8 51624ae 9d25cb7 c0480d4 498cace c0480d4 498cace eaf7d18 f1cc621 498cace c0480d4 498cace 5b2d1ad c0480d4 8391248 c9e981d 8391248 c0480d4 5fcad60 c0480d4 84a4f1f c0480d4 399f0b6 748e371 399f0b6 75367ea c0480d4 399f0b6 b3edfb8 c0480d4 5b2d1ad c10a46f 748e371 c10a46f c0480d4 399f0b6 b3edfb8 b896fc7 b3edfb8 b896fc7 b3edfb8 b896fc7 748e371 d9f4a7a 6b2623b b41523d 748e371 500f095 f1cc621 b896fc7 c0480d4 c10a46f 498cace 748e371 498cace 748e371 399f0b6 b3edfb8 c0480d4 748e371 399f0b6 c0480d4 14126c2 c0480d4 eb103d6 b3edfb8 c0480d4 399f0b6 0ff9eb8 c0480d4 6b2623b c0480d4 748e371 e1e7d11 399f0b6 0ff9eb8 c0480d4 6b2623b c0480d4 748e371 e1e7d11 399f0b6 0ff9eb8 c0480d4 6b2623b c0480d4 748e371 e1e7d11 399f0b6 0ff9eb8 c0480d4 6b2623b c0480d4 748e371 e1e7d11 399f0b6 0ff9eb8 c0480d4 6b2623b c0480d4 748e371 e1e7d11 399f0b6 0ff9eb8 c0480d4 6b2623b c0480d4 748e371 e1e7d11 399f0b6 0ff9eb8 c0480d4 6b2623b c0480d4 748e371 e1e7d11 399f0b6 0ff9eb8 c0480d4 6b2623b 748e371 e1e7d11 7c8aab6 498cace bcdac45 498cace cbbe9c2 14126c2 51fcd25 2ea293d 150f131 399f0b6 7f1d581 d77ffda 0c09562 277b364 f00dee0 0c09562 277b364 24c2710 14126c2 399f0b6 4fdf119 6b2623b 399f0b6 7039748 a14971f 399f0b6 7039748 eaf7d18 24040a0 eaf7d18 399f0b6 7039748 1f4e833 9cd379e eaf7d18 52b1761 1542b47 b3edfb8 8391248 0109403 10961ac 2be7744 e5d39de 2be7744 e5d39de 2be7744 01b1288 2be7744 10961ac eaf7d18 b3edfb8 4d1a312 faf7691 564d528 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 |
#####
###
# 00 Imports
# 01 Setup
# 02 Screen 0 - Introduction
# 03 Screen 1 - Upload or Select Image
# 04 Screen 2 - Output and Interaction
###
#####
#####
###
# 00 Imports
###
#####
import streamlit as st
import streamlit.components.v1 as components
from PIL import ImageOps, Image
import cv2
import numpy as np
import torch
from ultralytics import YOLO
from src.tiles import *
from src.image_transform import *
from time import sleep
#####
###
# 01 Setup
###
#####
# This loads general code for use by the tool
st.set_page_config(
page_title="Find Waldo and His Friends Using Computer Vision",
layout="wide"
)
if 'screen' not in st.session_state:
st.session_state.screen = 0
if 'image' not in st.session_state:
st.session_state.image = None
if 'image_counter' not in st.session_state:
st.session_state.image_counter = 0
if 'confidence' not in st.session_state:
st.session_state.confidence = 0.50
if 'model' not in st.session_state:
st.session_state.model = YOLO("/home/user/app/models/head_model/best.pt") # load
_, row0_col1, _ = st.columns([2,3,2])
_, row1_col1, _ = st.columns([2,3,2])
_, row2_col1, _ = st.columns([2,3,2])
_, row3_col1, row3_col2, row3_col3, row3_col4, _ = st.columns([8,3,3,3,3,8])
_, row4_col1, row4_col2, row4_col3, row4_col4, _ = st.columns([8,3,3,3,3,8])
_, row5_col2, row5_col3, row5_col4 = st.columns([1,4,4,1], gap="medium")
# heading
with row0_col1:
st.markdown("<h1 style='text-align: center;'>Find Waldo and His Friends</h1>", unsafe_allow_html=True)
st.markdown("<h3 style='text-align: center;'>Using Computer Vision</h3>", unsafe_allow_html=True)
#####
###
# 02 Screen 0 - Introduction
###
#####
# This displays to inform the user about the tool
if st.session_state.screen == 0:
with row1_col1:
# overview
st.markdown("<p style='text-align: left;'>This tool allows you to take a photo with your phone of a <i>Where's Waldo?</i> book page. Using the computer vision model YOLOv8-large, this tool finds Waldo and his friends Wenda, Odlaw, Wizard, and Woof.</p>", unsafe_allow_html=True)
# purchase option
st.markdown("<p style='text-align: center;'><a href='https://www.amazon.com/Wheres-Waldo-Ultimate-Watcher-Collection/dp/1536215112/ref=asc_df_1536215112/?tag=hyprod-20&linkCode=df0&hvadid=496186854683&hvpos=&hvnetw=g&hvrand=13774168920524356768&hvpone=&hvptwo=&hvqmt=&hvdev=c&hvdvcmdl=&hvlocint=&hvlocphy=9009670&hvtargid=pla-1185030597547&psc=1&mcid=a517c47662b13f96a80fbf3bfe8b30fe&gclid=CjwKCAjwuJ2xBhA3EiwAMVjkVADBaOb87R0kxhnpKqL-S_gJWme0DJJAwSTe-VlUv2p19zZKak3cshoCR34QAvD_BwE'>Purchase the Books</a></p>", unsafe_allow_html=True)
# email
st.markdown("<p style='text-align: center;'><a href=mailto:'Jared.L.Bailey@duke.edu'>Jared.L.Bailey@duke.edu</a></p>", unsafe_allow_html=True)
# proceed button
_, row1a_col1, _ = st.columns([2,1,2])
with row1a_col1:
if st.button("Proceed", key="Proceed", use_container_width=True):
st.session_state.screen = 1
st.rerun()
# image of Waldo and his friends
st.image("/home/user/app/home_page_images/Waldo_Friends.jpg", use_column_width=True)
#####
###
# 03 Screen 1 - Upload or Select Image
###
#####
if st.session_state.screen == 1:
# heading and instructions
with row1_col1:
st.markdown("<p style='text-align: center;'>Upload an image using your cell phone (single book page), or select an image from the below list</p>", unsafe_allow_html=True)
# image uploader
with row2_col1:
uploaded_image = st.file_uploader(label="",
type=['jpg'],
accept_multiple_files=False,
key=None,
help='Due to image resolution limitations of the tool, only photos of a single page are expected to produce the intended result',
on_change=None,
args=None,
kwargs=None,
disabled=False,
label_visibility="visible"
)
if uploaded_image is not None:
uploaded_image = Image.open(uploaded_image)
uploaded_image = ImageOps.exif_transpose(uploaded_image)
st.session_state.image = uploaded_image
st.session_state.screen = 2
st.rerun()
# image selector
st.markdown("<h4 style='text-align: center;'>Or Select One of the Following</h4>", unsafe_allow_html=True)
image_path = "/home/user/app/test_images/"
with row3_col1:
img_1 = image_transform("IMG_5356.JPG", image_path)
st.image(img_1, use_column_width=True)
if st.button("Select Image β", key="button_1", use_container_width=True):
st.session_state.image = img_1
st.session_state.screen = 2
st.rerun()
with row3_col2:
img_2 = image_transform("IMG_5357.JPG", image_path)
st.image(img_2, use_column_width=True)
if st.button("Select Image β", key="button_2", use_container_width=True):
st.session_state.image = img_2
st.session_state.screen = 2
st.rerun()
with row3_col3:
img_3 = image_transform("IMG_5368.JPG", image_path)
st.image(img_3, use_column_width=True)
if st.button("Select Image β", key="button_3", use_container_width=True):
st.session_state.image = img_3
st.session_state.screen = 2
st.rerun()
with row3_col4:
img_4 = image_transform("IMG_5369.JPG", image_path)
st.image(img_4, use_column_width=True)
if st.button("Select Image β", key="button_4", use_container_width=True):
st.session_state.image = img_4
st.session_state.screen = 2
st.rerun()
with row4_col1:
img_5 = image_transform("IMG_5382.JPG", image_path)
st.image(img_5, use_column_width=True)
if st.button("Select Image β", key="button_5", use_container_width=True):
st.session_state.image = img_5
st.session_state.screen = 2
st.rerun()
with row4_col2:
img_6 = image_transform("IMG_5383.JPG", image_path)
st.image(img_6, use_column_width=True)
if st.button("Select Image β", key="button_6", use_container_width=True):
st.session_state.image = img_6
st.session_state.screen = 2
st.rerun()
with row4_col3:
img_7 = image_transform("IMG_5408.JPG", image_path)
st.image(img_7, use_column_width=True)
if st.button("Select Image β", key="button_7", use_container_width=True):
st.session_state.image = img_7
st.session_state.screen = 2
st.rerun()
with row4_col4:
img_8 = image_transform("IMG_5409.JPG", image_path)
st.image(img_8, use_column_width=True)
if st.button("Select Image β", key="button_8", use_container_width=True):
st.session_state.image = img_8
st.session_state.screen = 2
st.rerun()
#####
###
# 04 Screen 2 - Output and Interaction
###
#####
if st.session_state.screen == 2:
# tile images
st.session_state.tiles = tile_image(image=st.session_state.image, tile_size=640, overlap=40)
with row5_col2:
st.session_state.boxed_image = np.array(st.session_state.image)
height, width, channels = st.session_state.boxed_image.shape
st.session_state.blank_img = np.zeros((height + 640, width + 640, 3), np.uint8)
st.session_state.blank_img[0:height, 0:width] = st.session_state.boxed_image
counter = 1
for i in range(0, width, 600):
for j in range(0, height, 600):
# draw the rectangle
cv2.rectangle(st.session_state.blank_img, (i, j), (i + 640, j + 640), (0, 255, 0), 10)
# add the number of the rectangle
cv2.putText(st.session_state.blank_img, str(str(counter)), (i + 200, j + 320), cv2.FONT_HERSHEY_SIMPLEX, 10, (0, 255, 0), 10)
counter += 1
st.image(st.session_state.blank_img, use_column_width=True)
with row5_col3:
# navigation
row5a_col0, row5a_col1, row5a_col2= st.columns([1,1,1], gap="large")
with row5a_col0:
if st.button("Back", use_container_width=True):
if st.session_state.image_counter > 0:
st.session_state.image_counter -= 1
with row5a_col1:
if st.button("Select New Photo", use_container_width=True):
st.session_state.screen = 1
st.session_state.results = None
st.session_state.image_counter = 0
st.rerun()
with row5a_col2:
if st.button("Next", use_container_width=True):
if st.session_state.image_counter < len(st.session_state.results) - 1:
st.session_state.image_counter += 1
# predictions
if 'results' not in st.session_state or st.session_state.results == None:
st.write("\n\n")
st.markdown("<p style='text-align: center;'>The model is working. Please be patient...</p>", unsafe_allow_html=True)
st.markdown("<p style='text-align: center;'>This process can take up to 30 seconds.</p>", unsafe_allow_html=True)
st.session_state.results = st.session_state.model.predict(st.session_state.tiles, conf=st.session_state.confidence) # predict
st.rerun()
with row5_col4:
# show character locations
class_names = {0: "Waldo", 1: "Wenda", 2: "Odlaw", 3: "Wizard", 4: "Woof"}
character_dict = {
"Waldo": [],
"Wenda": [],
"Odlaw": [],
"Wizard": [],
"Woof": []
}
for idx, pred in enumerate(st.session_state.results):
try:
for boxes in pred.boxes:
if boxes.conf >= 0.5:
character_dict[class_names[int(boxes.cls.item())]].append(idx + 1)
except:
pass
st.session_state.output = " \n\n\n\n\n Tile Numbers (There may be more than one example of a character in each image): \n"
for key, value in character_dict.items():
if value != []:
st.session_state.output += str(key) + ": "+ str(value) + " \n"
st.write(st.session_state.output)
# plot predicted image
st.markdown(f"<h4 style='text-align: center;'>Image Tile {str(st.session_state.image_counter + 1)}</h4>", unsafe_allow_html=True)
im_bgr = st.session_state.results[st.session_state.image_counter].plot() # BGR-order numpy array
im_rgb = Image.fromarray(im_bgr[..., ::-1]) # RGB-order PIL image
st.image(im_rgb) |