File size: 9,567 Bytes
07219c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82e6343
 
 
 
 
07219c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82e6343
07219c6
 
 
 
 
82e6343
07219c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
import hashlib
import os
import streamlit as st
import requests
from PIL import Image
from io import BytesIO

CACHE_DIR = os.path.join(os.path.dirname(__file__), "image_cache")
os.makedirs(CACHE_DIR, exist_ok=True)


def _cache_path(url: str) -> str:
    """Return the local file path for a cached image URL."""
    key = hashlib.md5(url.encode()).hexdigest()
    return os.path.join(CACHE_DIR, f"{key}.png")


@st.cache_data(show_spinner=False)
def resolve_image_url(url: str) -> str:
    """If the URL is a Wikimedia Commons File: page, resolve it to the direct

    image URL via the MediaWiki API. Otherwise return the URL unchanged.

    Cached in-memory so each File: page is only looked up once per session."""
    if "commons.wikimedia.org/wiki/File:" in url:
        filename = url.split("/wiki/File:")[-1]
        api_url = (
            "https://commons.wikimedia.org/w/api.php"
            f"?action=query&titles=File:{filename}"
            "&prop=imageinfo&iiprop=url&format=json"
        )
        headers = {"User-Agent": "Mozilla/5.0 (compatible; StreamlitApp/1.0; +https://streamlit.io)"}
        try:
            r = requests.get(api_url, headers=headers, timeout=10)
            pages = r.json()["query"]["pages"]
            page = next(iter(pages.values()))
            return page["imageinfo"][0]["url"]
        except Exception:
            return url
    return url


def load_image(url: str):
    """Load an image from disk cache if available, otherwise download it,

    save it to the cache folder, and return a PIL Image.

    The cache persists across app restarts — images are only downloaded once."""
    path = _cache_path(url)
    if os.path.exists(path):
        try:
            return Image.open(path)
        except Exception:
            pass  # corrupted cache file — re-download below

    headers = {
        "User-Agent": (
            "Mozilla/5.0 (compatible; StreamlitApp/1.0; "
            "+https://streamlit.io)"
        )
    }
    try:
        direct_url = resolve_image_url(url)
        response = requests.get(direct_url, headers=headers, timeout=10)
        response.raise_for_status()
        img = Image.open(BytesIO(response.content))
        img.save(path, format="PNG")  # persist to disk
        return img
    except Exception:
        return None


# List of 8 scenarios based on the spreadsheet rules
# Images sourced from Wikimedia Commons (public domain / CC licensed)
# Supports both commons.wikimedia.org/wiki/File: page URLs and direct upload URLs
scenarios = [
    {
        # Scenario 1: Different pictures of the same person (Messi)
        "label": "Different pictures of the same person",
        "image1_url": "https://commons.wikimedia.org/wiki/File:Lionel-Messi-Argentina-2022-FIFA-World-Cup_(cropped).jpg",
        "image2_url": "https://commons.wikimedia.org/wiki/File:Lionel_Messi_in_2018.jpg",
        "answer": "Yes",
        "feedback": "Different photos of the same subject should be marked Yes."
    },
    {
        # Scenario 2: Identical or resized picture (same image, two sizes)
        "label": "Identical or resized picture",
        "image1_url": "https://commons.wikimedia.org/wiki/File:Kevin_Garnett_2008-01-13.jpg",
        "image2_url": "https://commons.wikimedia.org/wiki/File:Kevin_Garnett_2008-01-13.jpg",
        "answer": "Yes",
        "feedback": "Identical images (even at different sizes) require a Yes response."
    },
    {
        # Scenario 3: Different pictures of the same landmark (Eiffel Tower)
        "label": "Different pictures of the same landmark",
        "image1_url": "https://commons.wikimedia.org/wiki/File:Tour_Eiffel_Wikimedia_Commons.jpg",
        "image2_url": "https://commons.wikimedia.org/wiki/File:Tour_eiffel_at_sunrise_from_the_trocadero.jpg",
        "answer": "Yes",
        "feedback": "Different photos of the same landmark are the same subject."
    },
    {
        # Scenario 4: Subject vs. representation (Eiffel Tower vs. keychain)
        "label": "Subject vs. representation (landmark vs. keychain)",
        "image1_url": "https://commons.wikimedia.org/wiki/File:Tour_Eiffel_Wikimedia_Commons.jpg",
        "image2_url": "https://commons.wikimedia.org/wiki/File:Eiffel_Tower_Keychain.jpg",
        "answer": "No",
        "feedback": "A landmark and a keychain are not the same subject."
    },
    {
        # Scenario 5: Person vs. associated item (player vs. jersey)
        "label": "Person vs. associated item (player vs. jersey)",
        "image1_url": "https://commons.wikimedia.org/wiki/File:Lionel_Messi_in_2018.jpg",
        "image2_url": "https://commons.wikimedia.org/wiki/File:Adidas_Messi_shirt_rear.JPG",
        "answer": "No",
        "feedback": "A person and an associated item are not the same subject."
    },
    {
        # Scenario 6: Person vs. their signature
        "label": "Person vs. their signature",
        "image1_url": "https://commons.wikimedia.org/wiki/File:President_Barack_Obama.jpg",
        "image2_url": "https://commons.wikimedia.org/wiki/File:Health_insurance_reform_bill_signature_20100323_(1).jpg",
        "answer": "No",
        "feedback": "A person and a signature are not the same subject."
    },
    {
        # Scenario 7: Person vs. their tombstone
        "label": "Person vs. their tombstone",
        "image1_url": "https://commons.wikimedia.org/wiki/File:Oscar_Wilde_by_Napoleon_Sarony._Three-quarter-length_photograph,_seated.jpg",
        "image2_url": "https://commons.wikimedia.org/wiki/File:Oscar_Wilde_%C3%A9_mort_dans_cette_maison.jpg",
        "answer": "No",
        "feedback": "A person and their tombstone are not the same subject."
    },
    {
        # Scenario 8: Same person at different ages (Einstein young vs old)
        "label": "Same person at different ages",
        "image1_url": "https://commons.wikimedia.org/wiki/File:JimmyCarterPortrait2.jpg",
        "image2_url": "https://commons.wikimedia.org/wiki/File:Jimmy_Carter_and_Rosalynn_Carter_on_Plains_Peanut_Festival_(cropped).jpg",
        "answer": "Yes",
        "feedback": "The same person at different ages is still the same subject."
    }
]


def check_answer(scenario_idx, user_answer):
    scenario = scenarios[scenario_idx]
    correct = scenario["answer"]
    feedback = scenario["feedback"]
    if user_answer == correct:
        return f"✅ Correct! {feedback}"
    else:
        return f"❌ Incorrect. The correct answer is **{correct}**. {feedback}"


# --- Session state init ---
if "scenario_idx" not in st.session_state:
    st.session_state.scenario_idx = 0
if "feedback" not in st.session_state:
    st.session_state.feedback = None
if "answered" not in st.session_state:
    st.session_state.answered = False

# Preload all images once at startup
if "images" not in st.session_state:
    with st.spinner("Loading all images, please wait…"):
        st.session_state.images = [
            (
                load_image(s["image1_url"]),
                load_image(s["image2_url"]),
            )
            for s in scenarios
        ]

st.title("Image Subject Comparison Tutorial")
st.markdown(
    "This is a small tutorial for Depictor. It is meant to provide examples for how to make "
    "the decisions for the questions asked."
)
st.markdown("---")
st.markdown(
    "This tutorial demonstrates rules for determining if two images depict the same subject. "
    "Answer each scenario to advance to the next one."
)

idx = st.session_state.scenario_idx
total = len(scenarios)

if idx >= total:
    st.success("🎉 You've completed all scenarios! Well done.")
    if st.button("🔄 Restart"):
        st.session_state.scenario_idx = 0
        st.session_state.feedback = None
        st.session_state.answered = False
        st.rerun()
else:
    scenario = scenarios[idx]

    st.markdown(f"### Scenario {idx + 1} of {total}: *{scenario['label']}*")
    st.progress(idx / total)

    img1, img2 = st.session_state.images[idx]
    col1, col2 = st.columns(2)
    with col1:
        if img1:
            st.image(img1, caption="Image 1", use_container_width=True)
        else:
            st.error("Image 1 could not be loaded.")
        st.markdown(f"[🔗 View source]({scenario['image1_url']})")
    with col2:
        if img2:
            st.image(img2, caption="Image 2", use_container_width=True)
        else:
            st.error("Image 2 could not be loaded.")
        st.markdown(f"[🔗 View source]({scenario['image2_url']})")

    st.markdown("### Are these the same subject?")

    if not st.session_state.answered:
        btn_col1, btn_col2 = st.columns(2)
        with btn_col1:
            if st.button("✅ Yes", use_container_width=True):
                st.session_state.feedback = check_answer(idx, "Yes")
                st.session_state.answered = True
                st.rerun()
        with btn_col2:
            if st.button("❌ No", use_container_width=True):
                st.session_state.feedback = check_answer(idx, "No")
                st.session_state.answered = True
                st.rerun()
    else:
        st.info(st.session_state.feedback)
        if st.button("➡️ Next Scenario", use_container_width=True):
            st.session_state.scenario_idx += 1
            st.session_state.feedback = None
            st.session_state.answered = False
            st.rerun()