import hashlib import os import streamlit as st import requests from PIL import Image from io import BytesIO CACHE_DIR = os.path.join(os.path.dirname(__file__), "image_cache") os.makedirs(CACHE_DIR, exist_ok=True) def _cache_path(url: str) -> str: """Return the local file path for a cached image URL.""" key = hashlib.md5(url.encode()).hexdigest() return os.path.join(CACHE_DIR, f"{key}.png") @st.cache_data(show_spinner=False) def resolve_image_url(url: str) -> str: """If the URL is a Wikimedia Commons File: page, resolve it to the direct image URL via the MediaWiki API. Otherwise return the URL unchanged. Cached in-memory so each File: page is only looked up once per session.""" if "commons.wikimedia.org/wiki/File:" in url: filename = url.split("/wiki/File:")[-1] api_url = ( "https://commons.wikimedia.org/w/api.php" f"?action=query&titles=File:{filename}" "&prop=imageinfo&iiprop=url&format=json" ) headers = {"User-Agent": "Mozilla/5.0 (compatible; StreamlitApp/1.0; +https://streamlit.io)"} try: r = requests.get(api_url, headers=headers, timeout=10) pages = r.json()["query"]["pages"] page = next(iter(pages.values())) return page["imageinfo"][0]["url"] except Exception: return url return url def load_image(url: str): """Load an image from disk cache if available, otherwise download it, save it to the cache folder, and return a PIL Image. The cache persists across app restarts — images are only downloaded once.""" path = _cache_path(url) if os.path.exists(path): try: return Image.open(path) except Exception: pass # corrupted cache file — re-download below headers = { "User-Agent": ( "Mozilla/5.0 (compatible; StreamlitApp/1.0; " "+https://streamlit.io)" ) } try: direct_url = resolve_image_url(url) response = requests.get(direct_url, headers=headers, timeout=10) response.raise_for_status() img = Image.open(BytesIO(response.content)) img.save(path, format="PNG") # persist to disk return img except Exception: return None # List of 8 scenarios based on the spreadsheet rules # Images sourced from Wikimedia Commons (public domain / CC licensed) # Supports both commons.wikimedia.org/wiki/File: page URLs and direct upload URLs scenarios = [ { # Scenario 1: Different pictures of the same person (Messi) "label": "Different pictures of the same person", "image1_url": "https://commons.wikimedia.org/wiki/File:Lionel-Messi-Argentina-2022-FIFA-World-Cup_(cropped).jpg", "image2_url": "https://commons.wikimedia.org/wiki/File:Lionel_Messi_in_2018.jpg", "answer": "Yes", "feedback": "Different photos of the same subject should be marked Yes." }, { # Scenario 2: Identical or resized picture (same image, two sizes) "label": "Identical or resized picture", "image1_url": "https://commons.wikimedia.org/wiki/File:Kevin_Garnett_2008-01-13.jpg", "image2_url": "https://commons.wikimedia.org/wiki/File:Kevin_Garnett_2008-01-13.jpg", "answer": "Yes", "feedback": "Identical images (even at different sizes) require a Yes response." }, { # Scenario 3: Different pictures of the same landmark (Eiffel Tower) "label": "Different pictures of the same landmark", "image1_url": "https://commons.wikimedia.org/wiki/File:Tour_Eiffel_Wikimedia_Commons.jpg", "image2_url": "https://commons.wikimedia.org/wiki/File:Tour_eiffel_at_sunrise_from_the_trocadero.jpg", "answer": "Yes", "feedback": "Different photos of the same landmark are the same subject." }, { # Scenario 4: Subject vs. representation (Eiffel Tower vs. keychain) "label": "Subject vs. representation (landmark vs. keychain)", "image1_url": "https://commons.wikimedia.org/wiki/File:Tour_Eiffel_Wikimedia_Commons.jpg", "image2_url": "https://commons.wikimedia.org/wiki/File:Eiffel_Tower_Keychain.jpg", "answer": "No", "feedback": "A landmark and a keychain are not the same subject." }, { # Scenario 5: Person vs. associated item (player vs. jersey) "label": "Person vs. associated item (player vs. jersey)", "image1_url": "https://commons.wikimedia.org/wiki/File:Lionel_Messi_in_2018.jpg", "image2_url": "https://commons.wikimedia.org/wiki/File:Adidas_Messi_shirt_rear.JPG", "answer": "No", "feedback": "A person and an associated item are not the same subject." }, { # Scenario 6: Person vs. their signature "label": "Person vs. their signature", "image1_url": "https://commons.wikimedia.org/wiki/File:President_Barack_Obama.jpg", "image2_url": "https://commons.wikimedia.org/wiki/File:Health_insurance_reform_bill_signature_20100323_(1).jpg", "answer": "No", "feedback": "A person and a signature are not the same subject." }, { # Scenario 7: Person vs. their tombstone "label": "Person vs. their tombstone", "image1_url": "https://commons.wikimedia.org/wiki/File:Oscar_Wilde_by_Napoleon_Sarony._Three-quarter-length_photograph,_seated.jpg", "image2_url": "https://commons.wikimedia.org/wiki/File:Oscar_Wilde_%C3%A9_mort_dans_cette_maison.jpg", "answer": "No", "feedback": "A person and their tombstone are not the same subject." }, { # Scenario 8: Same person at different ages (Einstein young vs old) "label": "Same person at different ages", "image1_url": "https://commons.wikimedia.org/wiki/File:JimmyCarterPortrait2.jpg", "image2_url": "https://commons.wikimedia.org/wiki/File:Jimmy_Carter_and_Rosalynn_Carter_on_Plains_Peanut_Festival_(cropped).jpg", "answer": "Yes", "feedback": "The same person at different ages is still the same subject." } ] def check_answer(scenario_idx, user_answer): scenario = scenarios[scenario_idx] correct = scenario["answer"] feedback = scenario["feedback"] if user_answer == correct: return f"✅ Correct! {feedback}" else: return f"❌ Incorrect. The correct answer is **{correct}**. {feedback}" # --- Session state init --- if "scenario_idx" not in st.session_state: st.session_state.scenario_idx = 0 if "feedback" not in st.session_state: st.session_state.feedback = None if "answered" not in st.session_state: st.session_state.answered = False # Preload all images once at startup if "images" not in st.session_state: with st.spinner("Loading all images, please wait…"): st.session_state.images = [ ( load_image(s["image1_url"]), load_image(s["image2_url"]), ) for s in scenarios ] st.title("Image Subject Comparison Tutorial") st.markdown( "This is a small tutorial for Depictor. It is meant to provide examples for how to make " "the decisions for the questions asked." ) st.markdown("---") st.markdown( "This tutorial demonstrates rules for determining if two images depict the same subject. " "Answer each scenario to advance to the next one." ) idx = st.session_state.scenario_idx total = len(scenarios) if idx >= total: st.success("🎉 You've completed all scenarios! Well done.") if st.button("🔄 Restart"): st.session_state.scenario_idx = 0 st.session_state.feedback = None st.session_state.answered = False st.rerun() else: scenario = scenarios[idx] st.markdown(f"### Scenario {idx + 1} of {total}: *{scenario['label']}*") st.progress(idx / total) img1, img2 = st.session_state.images[idx] col1, col2 = st.columns(2) with col1: if img1: st.image(img1, caption="Image 1", use_container_width=True) else: st.error("Image 1 could not be loaded.") st.markdown(f"[🔗 View source]({scenario['image1_url']})") with col2: if img2: st.image(img2, caption="Image 2", use_container_width=True) else: st.error("Image 2 could not be loaded.") st.markdown(f"[🔗 View source]({scenario['image2_url']})") st.markdown("### Are these the same subject?") if not st.session_state.answered: btn_col1, btn_col2 = st.columns(2) with btn_col1: if st.button("✅ Yes", use_container_width=True): st.session_state.feedback = check_answer(idx, "Yes") st.session_state.answered = True st.rerun() with btn_col2: if st.button("❌ No", use_container_width=True): st.session_state.feedback = check_answer(idx, "No") st.session_state.answered = True st.rerun() else: st.info(st.session_state.feedback) if st.button("➡️ Next Scenario", use_container_width=True): st.session_state.scenario_idx += 1 st.session_state.feedback = None st.session_state.answered = False st.rerun()