WheresWaldo / app.py
JaredBailey's picture
Update app.py
e5d39de verified
#####
###
# 00 Imports
# 01 Setup
# 02 Screen 0 - Introduction
# 03 Screen 1 - Upload or Select Image
# 04 Screen 2 - Output and Interaction
###
#####
#####
###
# 00 Imports
###
#####
import streamlit as st
import streamlit.components.v1 as components
from PIL import ImageOps, Image
import cv2
import numpy as np
import torch
from ultralytics import YOLO
from src.tiles import *
from src.image_transform import *
from time import sleep
#####
###
# 01 Setup
###
#####
# This loads general code for use by the tool
st.set_page_config(
page_title="Find Waldo and His Friends Using Computer Vision",
layout="wide"
)
if 'screen' not in st.session_state:
st.session_state.screen = 0
if 'image' not in st.session_state:
st.session_state.image = None
if 'image_counter' not in st.session_state:
st.session_state.image_counter = 0
if 'confidence' not in st.session_state:
st.session_state.confidence = 0.50
if 'model' not in st.session_state:
st.session_state.model = YOLO("/home/user/app/models/head_model/best.pt") # load
_, row0_col1, _ = st.columns([2,3,2])
_, row1_col1, _ = st.columns([2,3,2])
_, row2_col1, _ = st.columns([2,3,2])
_, row3_col1, row3_col2, row3_col3, row3_col4, _ = st.columns([8,3,3,3,3,8])
_, row4_col1, row4_col2, row4_col3, row4_col4, _ = st.columns([8,3,3,3,3,8])
_, row5_col2, row5_col3, row5_col4 = st.columns([1,4,4,1], gap="medium")
# heading
with row0_col1:
st.markdown("<h1 style='text-align: center;'>Find Waldo and His Friends</h1>", unsafe_allow_html=True)
st.markdown("<h3 style='text-align: center;'>Using Computer Vision</h3>", unsafe_allow_html=True)
#####
###
# 02 Screen 0 - Introduction
###
#####
# This displays to inform the user about the tool
if st.session_state.screen == 0:
with row1_col1:
# overview
st.markdown("<p style='text-align: left;'>This tool allows you to take a photo with your phone of a <i>Where's Waldo?</i> book page. Using the computer vision model YOLOv8-large, this tool finds Waldo and his friends Wenda, Odlaw, Wizard, and Woof.</p>", unsafe_allow_html=True)
# purchase option
st.markdown("<p style='text-align: center;'><a href='https://www.amazon.com/Wheres-Waldo-Ultimate-Watcher-Collection/dp/1536215112/ref=asc_df_1536215112/?tag=hyprod-20&linkCode=df0&hvadid=496186854683&hvpos=&hvnetw=g&hvrand=13774168920524356768&hvpone=&hvptwo=&hvqmt=&hvdev=c&hvdvcmdl=&hvlocint=&hvlocphy=9009670&hvtargid=pla-1185030597547&psc=1&mcid=a517c47662b13f96a80fbf3bfe8b30fe&gclid=CjwKCAjwuJ2xBhA3EiwAMVjkVADBaOb87R0kxhnpKqL-S_gJWme0DJJAwSTe-VlUv2p19zZKak3cshoCR34QAvD_BwE'>Purchase the Books</a></p>", unsafe_allow_html=True)
# email
st.markdown("<p style='text-align: center;'><a href=mailto:'Jared.L.Bailey@duke.edu'>Jared.L.Bailey@duke.edu</a></p>", unsafe_allow_html=True)
# proceed button
_, row1a_col1, _ = st.columns([2,1,2])
with row1a_col1:
if st.button("Proceed", key="Proceed", use_container_width=True):
st.session_state.screen = 1
st.rerun()
# image of Waldo and his friends
st.image("/home/user/app/home_page_images/Waldo_Friends.jpg", use_column_width=True)
#####
###
# 03 Screen 1 - Upload or Select Image
###
#####
if st.session_state.screen == 1:
# heading and instructions
with row1_col1:
st.markdown("<p style='text-align: center;'>Upload an image using your cell phone (single book page), or select an image from the below list</p>", unsafe_allow_html=True)
# image uploader
with row2_col1:
uploaded_image = st.file_uploader(label="",
type=['jpg'],
accept_multiple_files=False,
key=None,
help='Due to image resolution limitations of the tool, only photos of a single page are expected to produce the intended result',
on_change=None,
args=None,
kwargs=None,
disabled=False,
label_visibility="visible"
)
if uploaded_image is not None:
uploaded_image = Image.open(uploaded_image)
uploaded_image = ImageOps.exif_transpose(uploaded_image)
st.session_state.image = uploaded_image
st.session_state.screen = 2
st.rerun()
# image selector
st.markdown("<h4 style='text-align: center;'>Or Select One of the Following</h4>", unsafe_allow_html=True)
image_path = "/home/user/app/test_images/"
with row3_col1:
img_1 = image_transform("IMG_5356.JPG", image_path)
st.image(img_1, use_column_width=True)
if st.button("Select Image ↑", key="button_1", use_container_width=True):
st.session_state.image = img_1
st.session_state.screen = 2
st.rerun()
with row3_col2:
img_2 = image_transform("IMG_5357.JPG", image_path)
st.image(img_2, use_column_width=True)
if st.button("Select Image ↑", key="button_2", use_container_width=True):
st.session_state.image = img_2
st.session_state.screen = 2
st.rerun()
with row3_col3:
img_3 = image_transform("IMG_5368.JPG", image_path)
st.image(img_3, use_column_width=True)
if st.button("Select Image ↑", key="button_3", use_container_width=True):
st.session_state.image = img_3
st.session_state.screen = 2
st.rerun()
with row3_col4:
img_4 = image_transform("IMG_5369.JPG", image_path)
st.image(img_4, use_column_width=True)
if st.button("Select Image ↑", key="button_4", use_container_width=True):
st.session_state.image = img_4
st.session_state.screen = 2
st.rerun()
with row4_col1:
img_5 = image_transform("IMG_5382.JPG", image_path)
st.image(img_5, use_column_width=True)
if st.button("Select Image ↑", key="button_5", use_container_width=True):
st.session_state.image = img_5
st.session_state.screen = 2
st.rerun()
with row4_col2:
img_6 = image_transform("IMG_5383.JPG", image_path)
st.image(img_6, use_column_width=True)
if st.button("Select Image ↑", key="button_6", use_container_width=True):
st.session_state.image = img_6
st.session_state.screen = 2
st.rerun()
with row4_col3:
img_7 = image_transform("IMG_5408.JPG", image_path)
st.image(img_7, use_column_width=True)
if st.button("Select Image ↑", key="button_7", use_container_width=True):
st.session_state.image = img_7
st.session_state.screen = 2
st.rerun()
with row4_col4:
img_8 = image_transform("IMG_5409.JPG", image_path)
st.image(img_8, use_column_width=True)
if st.button("Select Image ↑", key="button_8", use_container_width=True):
st.session_state.image = img_8
st.session_state.screen = 2
st.rerun()
#####
###
# 04 Screen 2 - Output and Interaction
###
#####
if st.session_state.screen == 2:
# tile images
st.session_state.tiles = tile_image(image=st.session_state.image, tile_size=640, overlap=40)
with row5_col2:
st.session_state.boxed_image = np.array(st.session_state.image)
height, width, channels = st.session_state.boxed_image.shape
st.session_state.blank_img = np.zeros((height + 640, width + 640, 3), np.uint8)
st.session_state.blank_img[0:height, 0:width] = st.session_state.boxed_image
counter = 1
for i in range(0, width, 600):
for j in range(0, height, 600):
# draw the rectangle
cv2.rectangle(st.session_state.blank_img, (i, j), (i + 640, j + 640), (0, 255, 0), 10)
# add the number of the rectangle
cv2.putText(st.session_state.blank_img, str(str(counter)), (i + 200, j + 320), cv2.FONT_HERSHEY_SIMPLEX, 10, (0, 255, 0), 10)
counter += 1
st.image(st.session_state.blank_img, use_column_width=True)
with row5_col3:
# navigation
row5a_col0, row5a_col1, row5a_col2= st.columns([1,1,1], gap="large")
with row5a_col0:
if st.button("Back", use_container_width=True):
if st.session_state.image_counter > 0:
st.session_state.image_counter -= 1
with row5a_col1:
if st.button("Select New Photo", use_container_width=True):
st.session_state.screen = 1
st.session_state.results = None
st.session_state.image_counter = 0
st.rerun()
with row5a_col2:
if st.button("Next", use_container_width=True):
if st.session_state.image_counter < len(st.session_state.results) - 1:
st.session_state.image_counter += 1
# predictions
if 'results' not in st.session_state or st.session_state.results == None:
st.write("\n\n")
st.markdown("<p style='text-align: center;'>The model is working. Please be patient...</p>", unsafe_allow_html=True)
st.markdown("<p style='text-align: center;'>This process can take up to 30 seconds.</p>", unsafe_allow_html=True)
st.session_state.results = st.session_state.model.predict(st.session_state.tiles, conf=st.session_state.confidence) # predict
st.rerun()
with row5_col4:
# show character locations
class_names = {0: "Waldo", 1: "Wenda", 2: "Odlaw", 3: "Wizard", 4: "Woof"}
character_dict = {
"Waldo": [],
"Wenda": [],
"Odlaw": [],
"Wizard": [],
"Woof": []
}
for idx, pred in enumerate(st.session_state.results):
try:
for boxes in pred.boxes:
if boxes.conf >= 0.5:
character_dict[class_names[int(boxes.cls.item())]].append(idx + 1)
except:
pass
st.session_state.output = " \n\n\n\n\n Tile Numbers (There may be more than one example of a character in each image): \n"
for key, value in character_dict.items():
if value != []:
st.session_state.output += str(key) + ": "+ str(value) + " \n"
st.write(st.session_state.output)
# plot predicted image
st.markdown(f"<h4 style='text-align: center;'>Image Tile {str(st.session_state.image_counter + 1)}</h4>", unsafe_allow_html=True)
im_bgr = st.session_state.results[st.session_state.image_counter].plot() # BGR-order numpy array
im_rgb = Image.fromarray(im_bgr[..., ::-1]) # RGB-order PIL image
st.image(im_rgb)