import streamlit as st import streamlit.components.v1 as components import pandas as pd import json import os from pathlib import Path from PIL import Image import datetime try: from huggingface_hub import HfApi, hf_hub_download HF_HUB_AVAILABLE = True except ImportError: HF_HUB_AVAILABLE = False # --- PAGE CONFIG --- st.set_page_config(layout="wide", page_title="Object-centric Composition Evaluation") # --- CUSTOM CSS --- st.markdown(""" """, unsafe_allow_html=True) # --- CONSTANTS --- EVAL_DATA_DIR = Path("src/evaluation_data_comp") FEEDBACK_FILE = Path("src/feedback_3stage.csv") HF_REPO_ID = "Beegbrain/armor-composition-feedback" # --- HELPERS --- def load_pairs(): if not EVAL_DATA_DIR.exists(): return [] pairs = sorted([d for d in EVAL_DATA_DIR.iterdir() if d.is_dir() and d.name.startswith("pair_")]) return pairs def save_feedback(pair_id, data_dict): data_dict["timestamp"] = [datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")] data_dict["pair_id"] = [pair_id] new_df = pd.DataFrame(data_dict) token = os.environ.get("HF_TOKEN") # 1. Local append if FEEDBACK_FILE.exists(): df = pd.read_csv(FEEDBACK_FILE) df = pd.concat([df, new_df], ignore_index=True) else: df = new_df FEEDBACK_FILE.parent.mkdir(parents=True, exist_ok=True) df.to_csv(FEEDBACK_FILE, index=False) # 2. Hugging Face Sync if HF_HUB_AVAILABLE and token: try: api = HfApi() # Try to pull latest version first to merge try: remote_path = hf_hub_download(repo_id=HF_REPO_ID, filename="feedback_3stage.csv", repo_type="dataset", token=token) remote_df = pd.read_csv(remote_path) df = pd.concat([remote_df, new_df], ignore_index=True) df.to_csv(FEEDBACK_FILE, index=False) except: pass api.upload_file( path_or_fileobj=str(FEEDBACK_FILE), path_in_repo="feedback_3stage.csv", repo_id=HF_REPO_ID, repo_type="dataset", token=token ) except Exception as e: st.error(f"HF Sync Error: {e}") def score_format(x): if x == 1: return "1 (Poor)" if x == 5: return "5 (Excellent)" return str(x) # --- NAVIGATION --- if 'page' not in st.session_state: st.session_state.page = "Overview" if 'user_consent' not in st.session_state: st.session_state.user_consent = False if 'scroll_to_top' not in st.session_state: st.session_state.scroll_to_top = False # --- PAGE: OVERVIEW --- def show_overview(): st.title("🛡️ Progressive Compositionality Study") col1, col2 = st.columns([1.5, 1.2]) with col1: st.subheader("🧬 Evaluation Methodology") st.write("This study evaluates the Compositional capabilities of a novel **Object-Centric** model.") st.write("**Compositionability** is the ability to manipulate individual objects in a scene without affecting others, and to recombine them across scenes.") st.write("The evaluation process will consist of examining the composition of 50 pairs of images, and rating the quality of the model's outputs at each stage on a 1-5 scale.") st.write("You will assess a **3-stage progression** of scene manipulation for each case:") st.markdown(""" * **Stage 1: Decomposition & Compositionality** How well does the model separate the original scene into distinct object slots, and are all extracted parts accurately present in the reconstruction? * **Stage 2: Isolability** Does removing specific objects leave the rest of the scene perfectly intact? * **Stage 3: Recombinability** Can objects from a different image be seamlessly inserted into the gap? """) st.write("The overall process should take around 30-45 minutes, depending on how much you choose to comment on specific cases.") st.subheader("📊 Scoring Guide") st.write("Use the radio buttons to rate each dimension from **1 (Poor)** to **5 (Excellent)**.") st.info(""" - **5 (Excellent):** Indistinguishable from the target or baseline. - **3 (Fair):** Recognizable with some minor artifacts/blurring. - **1 (Poor):** Major artifacts, broken geometry, or identity lost. """) st.divider() st.session_state.user_consent = st.checkbox("I consent to share my ratings for research purposes.") if st.button("🚀 Start Evaluation", disabled=not st.session_state.user_consent): st.session_state.page = "Evaluation" st.rerun() with col2: st.markdown("
", unsafe_allow_html=True) st.markdown("
Study Workflow Example
", unsafe_allow_html=True) pairs = load_pairs() if not pairs: # Try fallback to evaluation_data backup_dir = Path("evaluation_data") if backup_dir.exists(): pairs = sorted([d for d in backup_dir.iterdir() if d.is_dir() and d.name.startswith("pair_")]) if pairs: ex_p = pairs[0] try: # Show the progression as a vertical set of images with reduced width st.image(Image.open(ex_p / "orig1.png"), caption="1. Original Scene", width=300) st.image(Image.open(ex_p / "full_recon1.png"), caption="2. Model Reconstruction", width=300) st.image(Image.open(ex_p / "mixed_composition.png"), caption="3. Mixed Composition (Final Result)", width=300) st.success("The images above show the 3 key stages you will evaluate.") except Exception as e: st.error(f"Error loading example images: {e}") else: st.image("assets/dog2.jpeg", width=300) st.warning("No generated cases found yet. Examples will appear here once you run the generator.") st.markdown("
", unsafe_allow_html=True) # --- PAGE: EVALUATION --- def show_evaluation(): # Handle auto-scroll flag if st.session_state.scroll_to_top: components.html( """ """, height=0 ) st.session_state.scroll_to_top = False pairs = load_pairs() if not pairs: st.error(f"No data in `{EVAL_DATA_DIR}`. Run `generate_compositions.py` first.") return if 'pair_idx' not in st.session_state: st.session_state.pair_idx = 0 st.sidebar.title("Study Progress") st.session_state.pair_idx = st.sidebar.select_slider( "Current Case", options=list(range(len(pairs))), value=st.session_state.pair_idx, format_func=lambda x: f"Case {x+1}" ) if st.sidebar.button("🏠 Exit to Overview"): st.session_state.page = "Overview" st.rerun() p_path = pairs[st.session_state.pair_idx] with open(p_path / "metadata.json", "r") as f: meta = json.load(f) st.header(f"Evaluation Case {st.session_state.pair_idx + 1}") # --- STAGE 1 --- with st.container(): st.markdown("
Stage 1: Decomposition & Compositionality
", unsafe_allow_html=True) # Part A - Original Image st.markdown("#### Part A: Decomposition Quality") colA1, colA2, colA3 = st.columns([1, 2, 1]) with colA2: st.image(Image.open(p_path / "orig1.png"), caption="Original Scene", use_container_width=True) decomposition_rating = st.radio( "How well does the model separate the scene into distinct objects/parts? (1: Poor, 5: Excellent)", [1, 2, 3, 4, 5], index=2, horizontal=True, key=f"decomp_{st.session_state.pair_idx}", format_func=score_format ) st.divider() # Full-width Extracted Components st.markdown("#### Extracted Components") st.image(Image.open(p_path / "all_slots1_vis.png"), caption="Extracted Components (All Slots)", use_container_width=True) st.divider() # Part B - Final Reconstruction st.markdown("#### Part B: Compositionality") colB1, colB2, colB3 = st.columns([1, 2, 1]) with colB2: st.image(Image.open(p_path / "full_recon1.png"), caption="Final Reconstruction", use_container_width=True) reconstruction_rating = st.radio( "Are all the extracted objects and parts present in the final reconstructed image? (1: Poor, 5: Excellent)", [1, 2, 3, 4, 5], index=2, horizontal=True, key=f"recon_{st.session_state.pair_idx}", format_func=score_format ) st.markdown("
", unsafe_allow_html=True) # --- STAGE 2 --- with st.container(): st.markdown("
Stage 2: Independent Manipulation
", unsafe_allow_html=True) st.caption(f"Removed Slots: {meta['removed_from_1']}") c1, c2, c3 = st.columns(3) c1.image(Image.open(p_path / "full_recon1.png"), caption="Full Baseline", use_container_width=True) c2.image(Image.open(p_path / "selected1_vis.png"), caption="Remaining Slots", use_container_width=True) c3.image(Image.open(p_path / "partial_recon1.png"), caption="Partial (Slots Removed)", use_container_width=True) isolability_rating = st.radio( "**Isolability:** Does the reconstructed image coherently represent the content of the isolated slot? (1: Nothing related to the slot, 5: Perfect)", [1, 2, 3, 4, 5], index=2, horizontal=True, key=f"iso_{st.session_state.pair_idx}", format_func=score_format ) st.markdown("
", unsafe_allow_html=True) # --- STAGE 3 --- with st.container(): st.markdown("
Stage 3: Cross-Image Composition
", unsafe_allow_html=True) st.caption(f"Added Slots: {meta['added_from_2']}") c1, c2, c3 = st.columns(3) c1.image(Image.open(p_path / "orig2.png"), caption="Source Image 2", use_container_width=True) c2.image(Image.open(p_path / "selected2_vis.png"), caption="New Slots from Img 2", use_container_width=True) c3.image(Image.open(p_path / "mixed_composition.png"), caption="Final Mixed Scene", use_container_width=True) q_cols = st.columns(3) recomb_rating = q_cols[0].radio("**Recombinability** (are the concept correctly combined) (1: Poor, 5: Good)", [1, 2, 3, 4, 5], index=2, horizontal=True, format_func=score_format) ident_rating = q_cols[1].radio("**Identity** (can we recognize the concepts ?) (1: Unrecognizable, 5: Sharp)", [1, 2, 3, 4, 5], index=2, horizontal=True, format_func=score_format) spatial_rating = q_cols[2].radio("**Spatial** (are the concepts placed like in their original images ?) (1: Wrong Scale, 5: Coherent)", [1, 2, 3, 4, 5], index=2, horizontal=True, format_func=score_format) st.markdown("
", unsafe_allow_html=True) comments = st.text_area("Observations", placeholder="Any specific artifacts or successes...") if st.button("💾 Submit Feedback & Next"): data = { "decomposition": [decomposition_rating], "reconstruction": [reconstruction_rating], "isolability": [isolability_rating], "recombinability": [recomb_rating], "identity_preservation": [ident_rating], "spatial_coherence": [spatial_rating], "comments": [comments] } save_feedback(p_path.name, data) st.success("Rating submitted!") if st.session_state.pair_idx < len(pairs) - 1: st.session_state.pair_idx += 1 st.session_state.scroll_to_top = True # Trigger auto-scroll on next render st.rerun() else: st.balloons() if st.session_state.page == "Overview": show_overview() else: show_evaluation()