Spaces:

Beegbrain
/

Evaluation_compos

Sleeping

File size: 13,632 Bytes

1a4057a
6ddf716
1a4057a
 
 
 
 
 
6ddf716
33a7a1e
 
 
 
 
1a4057a
33a7a1e
 
1a4057a
33a7a1e
1a4057a
 
6ddf716
 
 
1a4057a
33a7a1e
1a4057a
 
 
 
 
 
 
 
 
 
6ddf716
1a4057a
33a7a1e
6ddf716
33a7a1e
 
 
 
6ddf716
33a7a1e
 
6ddf716
1a4057a
33a7a1e
1a4057a
33a7a1e
 
1391e47
 
33a7a1e
 
 
 
 
 
1a4057a
 
 
 
33a7a1e
 
 
 
1a4057a
33a7a1e
1a4057a
 
 
 
 
 
1391e47
 
 
33a7a1e
1391e47
33a7a1e
5d836cc
33a7a1e
1a4057a
33a7a1e
 
1a4057a
33a7a1e
 
 
 
 
 
 
5d836cc
33a7a1e
 
 
 
 
 
 
 
 
 
5d836cc
 
33a7a1e
 
5d836cc
33a7a1e
5d836cc
 
33a7a1e
1a4057a
33a7a1e
 
 
 
 
 
1391e47
 
 
 
6ddf716
 
1391e47
33a7a1e
1391e47
33a7a1e
1391e47
33a7a1e
1391e47
 
33a7a1e
 
6ddf716
 
 
33a7a1e
 
6ddf716
 
33a7a1e
 
 
 
 
6ddf716
 
33a7a1e
 
1391e47
 
33a7a1e
 
 
1391e47
 
33a7a1e
 
 
1391e47
 
 
 
 
33a7a1e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d90e164
1391e47
1a4057a
33a7a1e
1391e47
6ddf716
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a4057a
 
33a7a1e
1a4057a
 
 
 
 
33a7a1e
1391e47
33a7a1e
1391e47
 
33a7a1e
1391e47
 
33a7a1e
1391e47
 
1a4057a
33a7a1e
 
 
1a4057a
33a7a1e
1a4057a
33a7a1e
 
6ddf716
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a4057a
6ddf716
48ae325
6ddf716
1a4057a
33a7a1e
 
 
 
 
 
 
 
 
 
1391e47
33a7a1e
48ae325
33a7a1e
1391e47
33a7a1e
 
 
 
 
 
 
 
 
 
 
 
48ae325
 
 
33a7a1e
1391e47
33a7a1e
1a4057a
33a7a1e
 
6ddf716
 
33a7a1e
 
 
1391e47
33a7a1e
1391e47
33a7a1e
 
1391e47
 
6ddf716
1391e47
 
 
 
 
 
 
6ddf716

import streamlit as st
import streamlit.components.v1 as components
import pandas as pd
import json
import os
from pathlib import Path
from PIL import Image
import datetime

try:
    from huggingface_hub import HfApi, hf_hub_download
    HF_HUB_AVAILABLE = True
except ImportError:
    HF_HUB_AVAILABLE = False

# --- PAGE CONFIG ---
st.set_page_config(layout="wide", page_title="Object-centric Composition Evaluation")

# --- CUSTOM CSS ---
st.markdown("""
    <style>
    [data-testid="stAppViewContainer"] {
        overflow-y: scroll;
    }
    .main {
        background-color: #f8f9fa;
    }
    .stButton>button {
        width: 100%;
        border-radius: 5px;
        height: 3em;
        background-color: #000000;
        color: white;
    }
    .metric-card {
        background-color: white;
        padding: 10px;
        border-radius: 10px;
        box-shadow: 0 4px 6px rgba(0,0,0,0.05);
        margin-bottom: 10px;
        border: 1px solid #e9ecef;
    }
    .stage-card {
        background-color: #ffffff;
        padding: 10px;
        border-radius: 8px;
        border-left: 5px solid #000000;
        margin-bottom: 10px;
    }
    .stage-title {
        font-weight: bold;
        font-size: 1.1em;
        color: #1f1f1f;
        margin-bottom: 10px;
    }
    .ref-title {
        font-weight: bold;
        font-size: 0.9em;
        text-transform: uppercase;
        color: #666;
        margin-bottom: 5px;
    }
    </style>
    """, unsafe_allow_html=True)

# --- CONSTANTS ---
EVAL_DATA_DIR = Path("src/evaluation_data_comp")
FEEDBACK_FILE = Path("src/feedback_3stage.csv")
HF_REPO_ID = "Beegbrain/armor-composition-feedback"

# --- HELPERS ---
def load_pairs():
    if not EVAL_DATA_DIR.exists():
        return []
    pairs = sorted([d for d in EVAL_DATA_DIR.iterdir() if d.is_dir() and d.name.startswith("pair_")])
    return pairs

def save_feedback(pair_id, data_dict):
    data_dict["timestamp"] = [datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")]
    data_dict["pair_id"] = [pair_id]
    new_df = pd.DataFrame(data_dict)
    
    token = os.environ.get("HF_TOKEN")
    
    # 1. Local append
    if FEEDBACK_FILE.exists():
        df = pd.read_csv(FEEDBACK_FILE)
        df = pd.concat([df, new_df], ignore_index=True)
    else:
        df = new_df
    
    FEEDBACK_FILE.parent.mkdir(parents=True, exist_ok=True)
    df.to_csv(FEEDBACK_FILE, index=False)
    
    # 2. Hugging Face Sync
    if HF_HUB_AVAILABLE and token:
        try:
            api = HfApi()
            # Try to pull latest version first to merge
            try:
                remote_path = hf_hub_download(repo_id=HF_REPO_ID, filename="feedback_3stage.csv", repo_type="dataset", token=token)
                remote_df = pd.read_csv(remote_path)
                df = pd.concat([remote_df, new_df], ignore_index=True)
                df.to_csv(FEEDBACK_FILE, index=False)
            except:
                pass 
                
            api.upload_file(
                path_or_fileobj=str(FEEDBACK_FILE),
                path_in_repo="feedback_3stage.csv",
                repo_id=HF_REPO_ID,
                repo_type="dataset",
                token=token
            )
        except Exception as e:
            st.error(f"HF Sync Error: {e}")

def score_format(x):
    if x == 1: return "1 (Poor)"
    if x == 5: return "5 (Excellent)"
    return str(x)

# --- NAVIGATION ---
if 'page' not in st.session_state:
    st.session_state.page = "Overview"
if 'user_consent' not in st.session_state:
    st.session_state.user_consent = False
if 'scroll_to_top' not in st.session_state:
    st.session_state.scroll_to_top = False

# --- PAGE: OVERVIEW ---
def show_overview():
    st.title("🛡️ Progressive Compositionality Study")
    
    col1, col2 = st.columns([1.5, 1.2])
    
    with col1:
        st.subheader("🧬 Evaluation Methodology")
        st.write("This study evaluates the Compositional capabilities of a novel **Object-Centric** model.")
        st.write("**Compositionability** is the ability to manipulate individual objects in a scene without affecting others, and to recombine them across scenes.")

        st.write("The evaluation process will consist of examining the composition of 50 pairs of images, and rating the quality of the model's outputs at each stage on a 1-5 scale.")
        st.write("You will assess a **3-stage progression** of scene manipulation for each case:")
        st.markdown("""
        *   **Stage 1: Decomposition & Compositionality**  
            How well does the model separate the original scene into distinct object slots, and are all extracted parts accurately present in the reconstruction?
        *   **Stage 2: Isolability**  
            Does removing specific objects leave the rest of the scene perfectly intact?
        *   **Stage 3: Recombinability**  
            Can objects from a different image be seamlessly inserted into the gap?
        """)
        st.write("The overall process should take around 30-45 minutes, depending on how much you choose to comment on specific cases.")

        st.subheader("📊 Scoring Guide")
        st.write("Use the radio buttons to rate each dimension from **1 (Poor)** to **5 (Excellent)**.")
        
        st.info("""
        - **5 (Excellent):** Indistinguishable from the target or baseline.
        - **3 (Fair):** Recognizable with some minor artifacts/blurring.
        - **1 (Poor):** Major artifacts, broken geometry, or identity lost.
        """)
        
        st.divider()
        st.session_state.user_consent = st.checkbox("I consent to share my ratings for research purposes.")
        if st.button("🚀 Start Evaluation", disabled=not st.session_state.user_consent):
            st.session_state.page = "Evaluation"
            st.rerun()

    with col2:
        st.markdown("<div class='metric-card'>", unsafe_allow_html=True)
        st.markdown("<div class='ref-title'>Study Workflow Example</div>", unsafe_allow_html=True)
        
        pairs = load_pairs()
        if not pairs:
             # Try fallback to evaluation_data
             backup_dir = Path("evaluation_data")
             if backup_dir.exists():
                  pairs = sorted([d for d in backup_dir.iterdir() if d.is_dir() and d.name.startswith("pair_")])
        
        if pairs:
            ex_p = pairs[0]
            try:
                # Show the progression as a vertical set of images with reduced width
                st.image(Image.open(ex_p / "orig1.png"), caption="1. Original Scene", width=300)
                st.image(Image.open(ex_p / "full_recon1.png"), caption="2. Model Reconstruction", width=300)
                st.image(Image.open(ex_p / "mixed_composition.png"), caption="3. Mixed Composition (Final Result)", width=300)
                st.success("The images above show the 3 key stages you will evaluate.")
            except Exception as e:
                st.error(f"Error loading example images: {e}")
        else:
            st.image("assets/dog2.jpeg", width=300)
            st.warning("No generated cases found yet. Examples will appear here once you run the generator.")
            
        st.markdown("</div>", unsafe_allow_html=True)

# --- PAGE: EVALUATION ---
def show_evaluation():
    # Handle auto-scroll flag
    if st.session_state.scroll_to_top:
        components.html(
            """
            <script>
                var appContainer = window.parent.document.querySelector('[data-testid="stAppViewContainer"]');
                var mainContainer = window.parent.document.querySelector('.main');
                if (appContainer) { appContainer.scrollTo({ top: 0, behavior: 'instant' }); }
                if (mainContainer) { mainContainer.scrollTo({ top: 0, behavior: 'instant' }); }
            </script>
            """,
            height=0
        )
        st.session_state.scroll_to_top = False

    pairs = load_pairs()
    if not pairs:
        st.error(f"No data in `{EVAL_DATA_DIR}`. Run `generate_compositions.py` first.")
        return

    if 'pair_idx' not in st.session_state:
        st.session_state.pair_idx = 0

    st.sidebar.title("Study Progress")
    st.session_state.pair_idx = st.sidebar.select_slider(
        "Current Case",
        options=list(range(len(pairs))),
        value=st.session_state.pair_idx,
        format_func=lambda x: f"Case {x+1}"
    )
    
    if st.sidebar.button("🏠 Exit to Overview"):
        st.session_state.page = "Overview"
        st.rerun()

    p_path = pairs[st.session_state.pair_idx]
    with open(p_path / "metadata.json", "r") as f:
        meta = json.load(f)

    st.header(f"Evaluation Case {st.session_state.pair_idx + 1}")
    
    # --- STAGE 1 ---
    with st.container():
        st.markdown("<div class='stage-card'><div class='stage-title'>Stage 1: Decomposition & Compositionality</div>", unsafe_allow_html=True)
        
        # Part A - Original Image
        st.markdown("#### Part A: Decomposition Quality")
        colA1, colA2, colA3 = st.columns([1, 2, 1])
        with colA2:
            st.image(Image.open(p_path / "orig1.png"), caption="Original Scene", use_container_width=True)
        
        decomposition_rating = st.radio(
            "How well does the model separate the scene into distinct objects/parts? (1: Poor, 5: Excellent)",
            [1, 2, 3, 4, 5], index=2, horizontal=True, key=f"decomp_{st.session_state.pair_idx}", format_func=score_format
        )

        st.divider()

        # Full-width Extracted Components
        st.markdown("#### Extracted Components")
        st.image(Image.open(p_path / "all_slots1_vis.png"), caption="Extracted Components (All Slots)", use_container_width=True)

        st.divider()

        # Part B - Final Reconstruction
        st.markdown("#### Part B: Compositionality")
        colB1, colB2, colB3 = st.columns([1, 2, 1])
        with colB2:
            st.image(Image.open(p_path / "full_recon1.png"), caption="Final Reconstruction", use_container_width=True)
        
        reconstruction_rating = st.radio(
            "Are all the extracted objects and parts present in the final reconstructed image? (1: Poor, 5: Excellent)",
            [1, 2, 3, 4, 5], index=2, horizontal=True, key=f"recon_{st.session_state.pair_idx}", format_func=score_format
        )
        st.markdown("</div>", unsafe_allow_html=True)

    # --- STAGE 2 ---
    with st.container():
        st.markdown("<div class='stage-card'><div class='stage-title'>Stage 2: Independent Manipulation</div>", unsafe_allow_html=True)
        st.caption(f"Removed Slots: {meta['removed_from_1']}")
        c1, c2, c3 = st.columns(3)
        c1.image(Image.open(p_path / "full_recon1.png"), caption="Full Baseline", use_container_width=True)
        c2.image(Image.open(p_path / "selected1_vis.png"), caption="Remaining Slots", use_container_width=True)
        c3.image(Image.open(p_path / "partial_recon1.png"), caption="Partial (Slots Removed)", use_container_width=True)
        
        isolability_rating = st.radio(
            "**Isolability:** Does the reconstructed image coherently represent the content of the isolated slot? (1: Nothing related to the slot, 5: Perfect)",
            [1, 2, 3, 4, 5], index=2, horizontal=True, key=f"iso_{st.session_state.pair_idx}", format_func=score_format
        )
        st.markdown("</div>", unsafe_allow_html=True)

    # --- STAGE 3 ---
    with st.container():
        st.markdown("<div class='stage-card'><div class='stage-title'>Stage 3: Cross-Image Composition</div>", unsafe_allow_html=True)
        st.caption(f"Added Slots: {meta['added_from_2']}")
        c1, c2, c3 = st.columns(3)
        c1.image(Image.open(p_path / "orig2.png"), caption="Source Image 2", use_container_width=True)
        c2.image(Image.open(p_path / "selected2_vis.png"), caption="New Slots from Img 2", use_container_width=True)
        c3.image(Image.open(p_path / "mixed_composition.png"), caption="Final Mixed Scene", use_container_width=True)
        
        q_cols = st.columns(3)
        recomb_rating = q_cols[0].radio("**Recombinability** (are the concept correctly combined) (1: Poor, 5: Good)", [1, 2, 3, 4, 5], index=2, horizontal=True, format_func=score_format)
        ident_rating = q_cols[1].radio("**Identity** (can we recognize the concepts ?) (1: Unrecognizable, 5: Sharp)", [1, 2, 3, 4, 5], index=2, horizontal=True, format_func=score_format)
        spatial_rating = q_cols[2].radio("**Spatial** (are the concepts placed like in their original images ?) (1: Wrong Scale, 5: Coherent)", [1, 2, 3, 4, 5], index=2, horizontal=True, format_func=score_format)
        st.markdown("</div>", unsafe_allow_html=True)

    comments = st.text_area("Observations", placeholder="Any specific artifacts or successes...")
    
    if st.button("💾 Submit Feedback & Next"):
        data = {
            "decomposition": [decomposition_rating],
            "reconstruction": [reconstruction_rating],
            "isolability": [isolability_rating],
            "recombinability": [recomb_rating],
            "identity_preservation": [ident_rating],
            "spatial_coherence": [spatial_rating],
            "comments": [comments]
        }
        save_feedback(p_path.name, data)
        st.success("Rating submitted!")
        if st.session_state.pair_idx < len(pairs) - 1:
            st.session_state.pair_idx += 1
            st.session_state.scroll_to_top = True  # Trigger auto-scroll on next render
            st.rerun()
        else:
            st.balloons()

if st.session_state.page == "Overview":
    show_overview()
else:
    show_evaluation()