Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import streamlit.components.v1 as components | |
| import pandas as pd | |
| import json | |
| import os | |
| from pathlib import Path | |
| from PIL import Image | |
| import datetime | |
| try: | |
| from huggingface_hub import HfApi, hf_hub_download | |
| HF_HUB_AVAILABLE = True | |
| except ImportError: | |
| HF_HUB_AVAILABLE = False | |
| # --- PAGE CONFIG --- | |
| st.set_page_config(layout="wide", page_title="Object-centric Composition Evaluation") | |
| # --- CUSTOM CSS --- | |
| st.markdown(""" | |
| <style> | |
| [data-testid="stAppViewContainer"] { | |
| overflow-y: scroll; | |
| } | |
| .main { | |
| background-color: #f8f9fa; | |
| } | |
| .stButton>button { | |
| width: 100%; | |
| border-radius: 5px; | |
| height: 3em; | |
| background-color: #000000; | |
| color: white; | |
| } | |
| .metric-card { | |
| background-color: white; | |
| padding: 10px; | |
| border-radius: 10px; | |
| box-shadow: 0 4px 6px rgba(0,0,0,0.05); | |
| margin-bottom: 10px; | |
| border: 1px solid #e9ecef; | |
| } | |
| .stage-card { | |
| background-color: #ffffff; | |
| padding: 10px; | |
| border-radius: 8px; | |
| border-left: 5px solid #000000; | |
| margin-bottom: 10px; | |
| } | |
| .stage-title { | |
| font-weight: bold; | |
| font-size: 1.1em; | |
| color: #1f1f1f; | |
| margin-bottom: 10px; | |
| } | |
| .ref-title { | |
| font-weight: bold; | |
| font-size: 0.9em; | |
| text-transform: uppercase; | |
| color: #666; | |
| margin-bottom: 5px; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # --- CONSTANTS --- | |
| EVAL_DATA_DIR = Path("src/evaluation_data_comp") | |
| FEEDBACK_FILE = Path("src/feedback_3stage.csv") | |
| HF_REPO_ID = "Beegbrain/armor-composition-feedback" | |
| # --- HELPERS --- | |
| def load_pairs(): | |
| if not EVAL_DATA_DIR.exists(): | |
| return [] | |
| pairs = sorted([d for d in EVAL_DATA_DIR.iterdir() if d.is_dir() and d.name.startswith("pair_")]) | |
| return pairs | |
| def save_feedback(pair_id, data_dict): | |
| data_dict["timestamp"] = [datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")] | |
| data_dict["pair_id"] = [pair_id] | |
| new_df = pd.DataFrame(data_dict) | |
| token = os.environ.get("HF_TOKEN") | |
| # 1. Local append | |
| if FEEDBACK_FILE.exists(): | |
| df = pd.read_csv(FEEDBACK_FILE) | |
| df = pd.concat([df, new_df], ignore_index=True) | |
| else: | |
| df = new_df | |
| FEEDBACK_FILE.parent.mkdir(parents=True, exist_ok=True) | |
| df.to_csv(FEEDBACK_FILE, index=False) | |
| # 2. Hugging Face Sync | |
| if HF_HUB_AVAILABLE and token: | |
| try: | |
| api = HfApi() | |
| # Try to pull latest version first to merge | |
| try: | |
| remote_path = hf_hub_download(repo_id=HF_REPO_ID, filename="feedback_3stage.csv", repo_type="dataset", token=token) | |
| remote_df = pd.read_csv(remote_path) | |
| df = pd.concat([remote_df, new_df], ignore_index=True) | |
| df.to_csv(FEEDBACK_FILE, index=False) | |
| except: | |
| pass | |
| api.upload_file( | |
| path_or_fileobj=str(FEEDBACK_FILE), | |
| path_in_repo="feedback_3stage.csv", | |
| repo_id=HF_REPO_ID, | |
| repo_type="dataset", | |
| token=token | |
| ) | |
| except Exception as e: | |
| st.error(f"HF Sync Error: {e}") | |
| def score_format(x): | |
| if x == 1: return "1 (Poor)" | |
| if x == 5: return "5 (Excellent)" | |
| return str(x) | |
| # --- NAVIGATION --- | |
| if 'page' not in st.session_state: | |
| st.session_state.page = "Overview" | |
| if 'user_consent' not in st.session_state: | |
| st.session_state.user_consent = False | |
| if 'scroll_to_top' not in st.session_state: | |
| st.session_state.scroll_to_top = False | |
| # --- PAGE: OVERVIEW --- | |
| def show_overview(): | |
| st.title("🛡️ Progressive Compositionality Study") | |
| col1, col2 = st.columns([1.5, 1.2]) | |
| with col1: | |
| st.subheader("🧬 Evaluation Methodology") | |
| st.write("This study evaluates the Compositional capabilities of a novel **Object-Centric** model.") | |
| st.write("**Compositionability** is the ability to manipulate individual objects in a scene without affecting others, and to recombine them across scenes.") | |
| st.write("The evaluation process will consist of examining the composition of 50 pairs of images, and rating the quality of the model's outputs at each stage on a 1-5 scale.") | |
| st.write("You will assess a **3-stage progression** of scene manipulation for each case:") | |
| st.markdown(""" | |
| * **Stage 1: Decomposition & Compositionality** | |
| How well does the model separate the original scene into distinct object slots, and are all extracted parts accurately present in the reconstruction? | |
| * **Stage 2: Isolability** | |
| Does removing specific objects leave the rest of the scene perfectly intact? | |
| * **Stage 3: Recombinability** | |
| Can objects from a different image be seamlessly inserted into the gap? | |
| """) | |
| st.write("The overall process should take around 30-45 minutes, depending on how much you choose to comment on specific cases.") | |
| st.subheader("📊 Scoring Guide") | |
| st.write("Use the radio buttons to rate each dimension from **1 (Poor)** to **5 (Excellent)**.") | |
| st.info(""" | |
| - **5 (Excellent):** Indistinguishable from the target or baseline. | |
| - **3 (Fair):** Recognizable with some minor artifacts/blurring. | |
| - **1 (Poor):** Major artifacts, broken geometry, or identity lost. | |
| """) | |
| st.divider() | |
| st.session_state.user_consent = st.checkbox("I consent to share my ratings for research purposes.") | |
| if st.button("🚀 Start Evaluation", disabled=not st.session_state.user_consent): | |
| st.session_state.page = "Evaluation" | |
| st.rerun() | |
| with col2: | |
| st.markdown("<div class='metric-card'>", unsafe_allow_html=True) | |
| st.markdown("<div class='ref-title'>Study Workflow Example</div>", unsafe_allow_html=True) | |
| pairs = load_pairs() | |
| if not pairs: | |
| # Try fallback to evaluation_data | |
| backup_dir = Path("evaluation_data") | |
| if backup_dir.exists(): | |
| pairs = sorted([d for d in backup_dir.iterdir() if d.is_dir() and d.name.startswith("pair_")]) | |
| if pairs: | |
| ex_p = pairs[0] | |
| try: | |
| # Show the progression as a vertical set of images with reduced width | |
| st.image(Image.open(ex_p / "orig1.png"), caption="1. Original Scene", width=300) | |
| st.image(Image.open(ex_p / "full_recon1.png"), caption="2. Model Reconstruction", width=300) | |
| st.image(Image.open(ex_p / "mixed_composition.png"), caption="3. Mixed Composition (Final Result)", width=300) | |
| st.success("The images above show the 3 key stages you will evaluate.") | |
| except Exception as e: | |
| st.error(f"Error loading example images: {e}") | |
| else: | |
| st.image("assets/dog2.jpeg", width=300) | |
| st.warning("No generated cases found yet. Examples will appear here once you run the generator.") | |
| st.markdown("</div>", unsafe_allow_html=True) | |
| # --- PAGE: EVALUATION --- | |
| def show_evaluation(): | |
| # Handle auto-scroll flag | |
| if st.session_state.scroll_to_top: | |
| components.html( | |
| """ | |
| <script> | |
| var appContainer = window.parent.document.querySelector('[data-testid="stAppViewContainer"]'); | |
| var mainContainer = window.parent.document.querySelector('.main'); | |
| if (appContainer) { appContainer.scrollTo({ top: 0, behavior: 'instant' }); } | |
| if (mainContainer) { mainContainer.scrollTo({ top: 0, behavior: 'instant' }); } | |
| </script> | |
| """, | |
| height=0 | |
| ) | |
| st.session_state.scroll_to_top = False | |
| pairs = load_pairs() | |
| if not pairs: | |
| st.error(f"No data in `{EVAL_DATA_DIR}`. Run `generate_compositions.py` first.") | |
| return | |
| if 'pair_idx' not in st.session_state: | |
| st.session_state.pair_idx = 0 | |
| st.sidebar.title("Study Progress") | |
| st.session_state.pair_idx = st.sidebar.select_slider( | |
| "Current Case", | |
| options=list(range(len(pairs))), | |
| value=st.session_state.pair_idx, | |
| format_func=lambda x: f"Case {x+1}" | |
| ) | |
| if st.sidebar.button("🏠 Exit to Overview"): | |
| st.session_state.page = "Overview" | |
| st.rerun() | |
| p_path = pairs[st.session_state.pair_idx] | |
| with open(p_path / "metadata.json", "r") as f: | |
| meta = json.load(f) | |
| st.header(f"Evaluation Case {st.session_state.pair_idx + 1}") | |
| # --- STAGE 1 --- | |
| with st.container(): | |
| st.markdown("<div class='stage-card'><div class='stage-title'>Stage 1: Decomposition & Compositionality</div>", unsafe_allow_html=True) | |
| # Part A - Original Image | |
| st.markdown("#### Part A: Decomposition Quality") | |
| colA1, colA2, colA3 = st.columns([1, 2, 1]) | |
| with colA2: | |
| st.image(Image.open(p_path / "orig1.png"), caption="Original Scene", use_container_width=True) | |
| decomposition_rating = st.radio( | |
| "How well does the model separate the scene into distinct objects/parts? (1: Poor, 5: Excellent)", | |
| [1, 2, 3, 4, 5], index=2, horizontal=True, key=f"decomp_{st.session_state.pair_idx}", format_func=score_format | |
| ) | |
| st.divider() | |
| # Full-width Extracted Components | |
| st.markdown("#### Extracted Components") | |
| st.image(Image.open(p_path / "all_slots1_vis.png"), caption="Extracted Components (All Slots)", use_container_width=True) | |
| st.divider() | |
| # Part B - Final Reconstruction | |
| st.markdown("#### Part B: Compositionality") | |
| colB1, colB2, colB3 = st.columns([1, 2, 1]) | |
| with colB2: | |
| st.image(Image.open(p_path / "full_recon1.png"), caption="Final Reconstruction", use_container_width=True) | |
| reconstruction_rating = st.radio( | |
| "Are all the extracted objects and parts present in the final reconstructed image? (1: Poor, 5: Excellent)", | |
| [1, 2, 3, 4, 5], index=2, horizontal=True, key=f"recon_{st.session_state.pair_idx}", format_func=score_format | |
| ) | |
| st.markdown("</div>", unsafe_allow_html=True) | |
| # --- STAGE 2 --- | |
| with st.container(): | |
| st.markdown("<div class='stage-card'><div class='stage-title'>Stage 2: Independent Manipulation</div>", unsafe_allow_html=True) | |
| st.caption(f"Removed Slots: {meta['removed_from_1']}") | |
| c1, c2, c3 = st.columns(3) | |
| c1.image(Image.open(p_path / "full_recon1.png"), caption="Full Baseline", use_container_width=True) | |
| c2.image(Image.open(p_path / "selected1_vis.png"), caption="Remaining Slots", use_container_width=True) | |
| c3.image(Image.open(p_path / "partial_recon1.png"), caption="Partial (Slots Removed)", use_container_width=True) | |
| isolability_rating = st.radio( | |
| "**Isolability:** Does the reconstructed image coherently represent the content of the isolated slot? (1: Nothing related to the slot, 5: Perfect)", | |
| [1, 2, 3, 4, 5], index=2, horizontal=True, key=f"iso_{st.session_state.pair_idx}", format_func=score_format | |
| ) | |
| st.markdown("</div>", unsafe_allow_html=True) | |
| # --- STAGE 3 --- | |
| with st.container(): | |
| st.markdown("<div class='stage-card'><div class='stage-title'>Stage 3: Cross-Image Composition</div>", unsafe_allow_html=True) | |
| st.caption(f"Added Slots: {meta['added_from_2']}") | |
| c1, c2, c3 = st.columns(3) | |
| c1.image(Image.open(p_path / "orig2.png"), caption="Source Image 2", use_container_width=True) | |
| c2.image(Image.open(p_path / "selected2_vis.png"), caption="New Slots from Img 2", use_container_width=True) | |
| c3.image(Image.open(p_path / "mixed_composition.png"), caption="Final Mixed Scene", use_container_width=True) | |
| q_cols = st.columns(3) | |
| recomb_rating = q_cols[0].radio("**Recombinability** (are the concept correctly combined) (1: Poor, 5: Good)", [1, 2, 3, 4, 5], index=2, horizontal=True, format_func=score_format) | |
| ident_rating = q_cols[1].radio("**Identity** (can we recognize the concepts ?) (1: Unrecognizable, 5: Sharp)", [1, 2, 3, 4, 5], index=2, horizontal=True, format_func=score_format) | |
| spatial_rating = q_cols[2].radio("**Spatial** (are the concepts placed like in their original images ?) (1: Wrong Scale, 5: Coherent)", [1, 2, 3, 4, 5], index=2, horizontal=True, format_func=score_format) | |
| st.markdown("</div>", unsafe_allow_html=True) | |
| comments = st.text_area("Observations", placeholder="Any specific artifacts or successes...") | |
| if st.button("💾 Submit Feedback & Next"): | |
| data = { | |
| "decomposition": [decomposition_rating], | |
| "reconstruction": [reconstruction_rating], | |
| "isolability": [isolability_rating], | |
| "recombinability": [recomb_rating], | |
| "identity_preservation": [ident_rating], | |
| "spatial_coherence": [spatial_rating], | |
| "comments": [comments] | |
| } | |
| save_feedback(p_path.name, data) | |
| st.success("Rating submitted!") | |
| if st.session_state.pair_idx < len(pairs) - 1: | |
| st.session_state.pair_idx += 1 | |
| st.session_state.scroll_to_top = True # Trigger auto-scroll on next render | |
| st.rerun() | |
| else: | |
| st.balloons() | |
| if st.session_state.page == "Overview": | |
| show_overview() | |
| else: | |
| show_evaluation() |