File size: 13,632 Bytes
1a4057a
6ddf716
1a4057a
 
 
 
 
 
6ddf716
33a7a1e
 
 
 
 
1a4057a
33a7a1e
 
1a4057a
33a7a1e
1a4057a
 
6ddf716
 
 
1a4057a
33a7a1e
1a4057a
 
 
 
 
 
 
 
 
 
6ddf716
1a4057a
33a7a1e
6ddf716
33a7a1e
 
 
 
6ddf716
33a7a1e
 
6ddf716
1a4057a
33a7a1e
1a4057a
33a7a1e
 
1391e47
 
33a7a1e
 
 
 
 
 
1a4057a
 
 
 
33a7a1e
 
 
 
1a4057a
33a7a1e
1a4057a
 
 
 
 
 
1391e47
 
 
33a7a1e
1391e47
33a7a1e
5d836cc
33a7a1e
1a4057a
33a7a1e
 
1a4057a
33a7a1e
 
 
 
 
 
 
5d836cc
33a7a1e
 
 
 
 
 
 
 
 
 
5d836cc
 
33a7a1e
 
5d836cc
33a7a1e
5d836cc
 
33a7a1e
1a4057a
33a7a1e
 
 
 
 
 
1391e47
 
 
 
6ddf716
 
1391e47
33a7a1e
1391e47
33a7a1e
1391e47
33a7a1e
1391e47
 
33a7a1e
 
6ddf716
 
 
33a7a1e
 
6ddf716
 
33a7a1e
 
 
 
 
6ddf716
 
33a7a1e
 
1391e47
 
33a7a1e
 
 
1391e47
 
33a7a1e
 
 
1391e47
 
 
 
 
33a7a1e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d90e164
1391e47
1a4057a
33a7a1e
1391e47
6ddf716
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a4057a
 
33a7a1e
1a4057a
 
 
 
 
33a7a1e
1391e47
33a7a1e
1391e47
 
33a7a1e
1391e47
 
33a7a1e
1391e47
 
1a4057a
33a7a1e
 
 
1a4057a
33a7a1e
1a4057a
33a7a1e
 
6ddf716
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a4057a
6ddf716
48ae325
6ddf716
1a4057a
33a7a1e
 
 
 
 
 
 
 
 
 
1391e47
33a7a1e
48ae325
33a7a1e
1391e47
33a7a1e
 
 
 
 
 
 
 
 
 
 
 
48ae325
 
 
33a7a1e
1391e47
33a7a1e
1a4057a
33a7a1e
 
6ddf716
 
33a7a1e
 
 
1391e47
33a7a1e
1391e47
33a7a1e
 
1391e47
 
6ddf716
1391e47
 
 
 
 
 
 
6ddf716
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
import streamlit as st
import streamlit.components.v1 as components
import pandas as pd
import json
import os
from pathlib import Path
from PIL import Image
import datetime

try:
    from huggingface_hub import HfApi, hf_hub_download
    HF_HUB_AVAILABLE = True
except ImportError:
    HF_HUB_AVAILABLE = False

# --- PAGE CONFIG ---
st.set_page_config(layout="wide", page_title="Object-centric Composition Evaluation")

# --- CUSTOM CSS ---
st.markdown("""
    <style>
    [data-testid="stAppViewContainer"] {
        overflow-y: scroll;
    }
    .main {
        background-color: #f8f9fa;
    }
    .stButton>button {
        width: 100%;
        border-radius: 5px;
        height: 3em;
        background-color: #000000;
        color: white;
    }
    .metric-card {
        background-color: white;
        padding: 10px;
        border-radius: 10px;
        box-shadow: 0 4px 6px rgba(0,0,0,0.05);
        margin-bottom: 10px;
        border: 1px solid #e9ecef;
    }
    .stage-card {
        background-color: #ffffff;
        padding: 10px;
        border-radius: 8px;
        border-left: 5px solid #000000;
        margin-bottom: 10px;
    }
    .stage-title {
        font-weight: bold;
        font-size: 1.1em;
        color: #1f1f1f;
        margin-bottom: 10px;
    }
    .ref-title {
        font-weight: bold;
        font-size: 0.9em;
        text-transform: uppercase;
        color: #666;
        margin-bottom: 5px;
    }
    </style>
    """, unsafe_allow_html=True)

# --- CONSTANTS ---
EVAL_DATA_DIR = Path("src/evaluation_data_comp")
FEEDBACK_FILE = Path("src/feedback_3stage.csv")
HF_REPO_ID = "Beegbrain/armor-composition-feedback"

# --- HELPERS ---
def load_pairs():
    if not EVAL_DATA_DIR.exists():
        return []
    pairs = sorted([d for d in EVAL_DATA_DIR.iterdir() if d.is_dir() and d.name.startswith("pair_")])
    return pairs

def save_feedback(pair_id, data_dict):
    data_dict["timestamp"] = [datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")]
    data_dict["pair_id"] = [pair_id]
    new_df = pd.DataFrame(data_dict)
    
    token = os.environ.get("HF_TOKEN")
    
    # 1. Local append
    if FEEDBACK_FILE.exists():
        df = pd.read_csv(FEEDBACK_FILE)
        df = pd.concat([df, new_df], ignore_index=True)
    else:
        df = new_df
    
    FEEDBACK_FILE.parent.mkdir(parents=True, exist_ok=True)
    df.to_csv(FEEDBACK_FILE, index=False)
    
    # 2. Hugging Face Sync
    if HF_HUB_AVAILABLE and token:
        try:
            api = HfApi()
            # Try to pull latest version first to merge
            try:
                remote_path = hf_hub_download(repo_id=HF_REPO_ID, filename="feedback_3stage.csv", repo_type="dataset", token=token)
                remote_df = pd.read_csv(remote_path)
                df = pd.concat([remote_df, new_df], ignore_index=True)
                df.to_csv(FEEDBACK_FILE, index=False)
            except:
                pass 
                
            api.upload_file(
                path_or_fileobj=str(FEEDBACK_FILE),
                path_in_repo="feedback_3stage.csv",
                repo_id=HF_REPO_ID,
                repo_type="dataset",
                token=token
            )
        except Exception as e:
            st.error(f"HF Sync Error: {e}")

def score_format(x):
    if x == 1: return "1 (Poor)"
    if x == 5: return "5 (Excellent)"
    return str(x)

# --- NAVIGATION ---
if 'page' not in st.session_state:
    st.session_state.page = "Overview"
if 'user_consent' not in st.session_state:
    st.session_state.user_consent = False
if 'scroll_to_top' not in st.session_state:
    st.session_state.scroll_to_top = False

# --- PAGE: OVERVIEW ---
def show_overview():
    st.title("πŸ›‘οΈ Progressive Compositionality Study")
    
    col1, col2 = st.columns([1.5, 1.2])
    
    with col1:
        st.subheader("🧬 Evaluation Methodology")
        st.write("This study evaluates the Compositional capabilities of a novel **Object-Centric** model.")
        st.write("**Compositionability** is the ability to manipulate individual objects in a scene without affecting others, and to recombine them across scenes.")

        st.write("The evaluation process will consist of examining the composition of 50 pairs of images, and rating the quality of the model's outputs at each stage on a 1-5 scale.")
        st.write("You will assess a **3-stage progression** of scene manipulation for each case:")
        st.markdown("""
        *   **Stage 1: Decomposition & Compositionality**  
            How well does the model separate the original scene into distinct object slots, and are all extracted parts accurately present in the reconstruction?
        *   **Stage 2: Isolability**  
            Does removing specific objects leave the rest of the scene perfectly intact?
        *   **Stage 3: Recombinability**  
            Can objects from a different image be seamlessly inserted into the gap?
        """)
        st.write("The overall process should take around 30-45 minutes, depending on how much you choose to comment on specific cases.")

        st.subheader("πŸ“Š Scoring Guide")
        st.write("Use the radio buttons to rate each dimension from **1 (Poor)** to **5 (Excellent)**.")
        
        st.info("""
        - **5 (Excellent):** Indistinguishable from the target or baseline.
        - **3 (Fair):** Recognizable with some minor artifacts/blurring.
        - **1 (Poor):** Major artifacts, broken geometry, or identity lost.
        """)
        
        st.divider()
        st.session_state.user_consent = st.checkbox("I consent to share my ratings for research purposes.")
        if st.button("πŸš€ Start Evaluation", disabled=not st.session_state.user_consent):
            st.session_state.page = "Evaluation"
            st.rerun()

    with col2:
        st.markdown("<div class='metric-card'>", unsafe_allow_html=True)
        st.markdown("<div class='ref-title'>Study Workflow Example</div>", unsafe_allow_html=True)
        
        pairs = load_pairs()
        if not pairs:
             # Try fallback to evaluation_data
             backup_dir = Path("evaluation_data")
             if backup_dir.exists():
                  pairs = sorted([d for d in backup_dir.iterdir() if d.is_dir() and d.name.startswith("pair_")])
        
        if pairs:
            ex_p = pairs[0]
            try:
                # Show the progression as a vertical set of images with reduced width
                st.image(Image.open(ex_p / "orig1.png"), caption="1. Original Scene", width=300)
                st.image(Image.open(ex_p / "full_recon1.png"), caption="2. Model Reconstruction", width=300)
                st.image(Image.open(ex_p / "mixed_composition.png"), caption="3. Mixed Composition (Final Result)", width=300)
                st.success("The images above show the 3 key stages you will evaluate.")
            except Exception as e:
                st.error(f"Error loading example images: {e}")
        else:
            st.image("assets/dog2.jpeg", width=300)
            st.warning("No generated cases found yet. Examples will appear here once you run the generator.")
            
        st.markdown("</div>", unsafe_allow_html=True)

# --- PAGE: EVALUATION ---
def show_evaluation():
    # Handle auto-scroll flag
    if st.session_state.scroll_to_top:
        components.html(
            """
            <script>
                var appContainer = window.parent.document.querySelector('[data-testid="stAppViewContainer"]');
                var mainContainer = window.parent.document.querySelector('.main');
                if (appContainer) { appContainer.scrollTo({ top: 0, behavior: 'instant' }); }
                if (mainContainer) { mainContainer.scrollTo({ top: 0, behavior: 'instant' }); }
            </script>
            """,
            height=0
        )
        st.session_state.scroll_to_top = False

    pairs = load_pairs()
    if not pairs:
        st.error(f"No data in `{EVAL_DATA_DIR}`. Run `generate_compositions.py` first.")
        return

    if 'pair_idx' not in st.session_state:
        st.session_state.pair_idx = 0

    st.sidebar.title("Study Progress")
    st.session_state.pair_idx = st.sidebar.select_slider(
        "Current Case",
        options=list(range(len(pairs))),
        value=st.session_state.pair_idx,
        format_func=lambda x: f"Case {x+1}"
    )
    
    if st.sidebar.button("🏠 Exit to Overview"):
        st.session_state.page = "Overview"
        st.rerun()

    p_path = pairs[st.session_state.pair_idx]
    with open(p_path / "metadata.json", "r") as f:
        meta = json.load(f)

    st.header(f"Evaluation Case {st.session_state.pair_idx + 1}")
    
    # --- STAGE 1 ---
    with st.container():
        st.markdown("<div class='stage-card'><div class='stage-title'>Stage 1: Decomposition & Compositionality</div>", unsafe_allow_html=True)
        
        # Part A - Original Image
        st.markdown("#### Part A: Decomposition Quality")
        colA1, colA2, colA3 = st.columns([1, 2, 1])
        with colA2:
            st.image(Image.open(p_path / "orig1.png"), caption="Original Scene", use_container_width=True)
        
        decomposition_rating = st.radio(
            "How well does the model separate the scene into distinct objects/parts? (1: Poor, 5: Excellent)",
            [1, 2, 3, 4, 5], index=2, horizontal=True, key=f"decomp_{st.session_state.pair_idx}", format_func=score_format
        )

        st.divider()

        # Full-width Extracted Components
        st.markdown("#### Extracted Components")
        st.image(Image.open(p_path / "all_slots1_vis.png"), caption="Extracted Components (All Slots)", use_container_width=True)

        st.divider()

        # Part B - Final Reconstruction
        st.markdown("#### Part B: Compositionality")
        colB1, colB2, colB3 = st.columns([1, 2, 1])
        with colB2:
            st.image(Image.open(p_path / "full_recon1.png"), caption="Final Reconstruction", use_container_width=True)
        
        reconstruction_rating = st.radio(
            "Are all the extracted objects and parts present in the final reconstructed image? (1: Poor, 5: Excellent)",
            [1, 2, 3, 4, 5], index=2, horizontal=True, key=f"recon_{st.session_state.pair_idx}", format_func=score_format
        )
        st.markdown("</div>", unsafe_allow_html=True)

    # --- STAGE 2 ---
    with st.container():
        st.markdown("<div class='stage-card'><div class='stage-title'>Stage 2: Independent Manipulation</div>", unsafe_allow_html=True)
        st.caption(f"Removed Slots: {meta['removed_from_1']}")
        c1, c2, c3 = st.columns(3)
        c1.image(Image.open(p_path / "full_recon1.png"), caption="Full Baseline", use_container_width=True)
        c2.image(Image.open(p_path / "selected1_vis.png"), caption="Remaining Slots", use_container_width=True)
        c3.image(Image.open(p_path / "partial_recon1.png"), caption="Partial (Slots Removed)", use_container_width=True)
        
        isolability_rating = st.radio(
            "**Isolability:** Does the reconstructed image coherently represent the content of the isolated slot? (1: Nothing related to the slot, 5: Perfect)",
            [1, 2, 3, 4, 5], index=2, horizontal=True, key=f"iso_{st.session_state.pair_idx}", format_func=score_format
        )
        st.markdown("</div>", unsafe_allow_html=True)

    # --- STAGE 3 ---
    with st.container():
        st.markdown("<div class='stage-card'><div class='stage-title'>Stage 3: Cross-Image Composition</div>", unsafe_allow_html=True)
        st.caption(f"Added Slots: {meta['added_from_2']}")
        c1, c2, c3 = st.columns(3)
        c1.image(Image.open(p_path / "orig2.png"), caption="Source Image 2", use_container_width=True)
        c2.image(Image.open(p_path / "selected2_vis.png"), caption="New Slots from Img 2", use_container_width=True)
        c3.image(Image.open(p_path / "mixed_composition.png"), caption="Final Mixed Scene", use_container_width=True)
        
        q_cols = st.columns(3)
        recomb_rating = q_cols[0].radio("**Recombinability** (are the concept correctly combined) (1: Poor, 5: Good)", [1, 2, 3, 4, 5], index=2, horizontal=True, format_func=score_format)
        ident_rating = q_cols[1].radio("**Identity** (can we recognize the concepts ?) (1: Unrecognizable, 5: Sharp)", [1, 2, 3, 4, 5], index=2, horizontal=True, format_func=score_format)
        spatial_rating = q_cols[2].radio("**Spatial** (are the concepts placed like in their original images ?) (1: Wrong Scale, 5: Coherent)", [1, 2, 3, 4, 5], index=2, horizontal=True, format_func=score_format)
        st.markdown("</div>", unsafe_allow_html=True)

    comments = st.text_area("Observations", placeholder="Any specific artifacts or successes...")
    
    if st.button("πŸ’Ύ Submit Feedback & Next"):
        data = {
            "decomposition": [decomposition_rating],
            "reconstruction": [reconstruction_rating],
            "isolability": [isolability_rating],
            "recombinability": [recomb_rating],
            "identity_preservation": [ident_rating],
            "spatial_coherence": [spatial_rating],
            "comments": [comments]
        }
        save_feedback(p_path.name, data)
        st.success("Rating submitted!")
        if st.session_state.pair_idx < len(pairs) - 1:
            st.session_state.pair_idx += 1
            st.session_state.scroll_to_top = True  # Trigger auto-scroll on next render
            st.rerun()
        else:
            st.balloons()

if st.session_state.page == "Overview":
    show_overview()
else:
    show_evaluation()