Spaces:

Beegbrain
/

Evaluation_compos

Sleeping

App Files Files Community

Evaluation_compos / src /app_eval.py

Beegbrain

Update src/app_eval.py

48ae325 verified about 1 month ago

Raw

History Blame Contribute Delete

13.6 kB

	import streamlit as st
	import streamlit.components.v1 as components
	import pandas as pd
	import json
	import os
	from pathlib import Path
	from PIL import Image
	import datetime

	try:
	from huggingface_hub import HfApi, hf_hub_download
	HF_HUB_AVAILABLE = True
	except ImportError:
	HF_HUB_AVAILABLE = False

	# --- PAGE CONFIG ---
	st.set_page_config(layout="wide", page_title="Object-centric Composition Evaluation")

	# --- CUSTOM CSS ---
	st.markdown("""
	<style>
	[data-testid="stAppViewContainer"] {
	overflow-y: scroll;
	}
	.main {
	background-color: #f8f9fa;
	}
	.stButton>button {
	width: 100%;
	border-radius: 5px;
	height: 3em;
	background-color: #000000;
	color: white;
	}
	.metric-card {
	background-color: white;
	padding: 10px;
	border-radius: 10px;
	box-shadow: 0 4px 6px rgba(0,0,0,0.05);
	margin-bottom: 10px;
	border: 1px solid #e9ecef;
	}
	.stage-card {
	background-color: #ffffff;
	padding: 10px;
	border-radius: 8px;
	border-left: 5px solid #000000;
	margin-bottom: 10px;
	}
	.stage-title {
	font-weight: bold;
	font-size: 1.1em;
	color: #1f1f1f;
	margin-bottom: 10px;
	}
	.ref-title {
	font-weight: bold;
	font-size: 0.9em;
	text-transform: uppercase;
	color: #666;
	margin-bottom: 5px;
	}
	</style>
	""", unsafe_allow_html=True)

	# --- CONSTANTS ---
	EVAL_DATA_DIR = Path("src/evaluation_data_comp")
	FEEDBACK_FILE = Path("src/feedback_3stage.csv")
	HF_REPO_ID = "Beegbrain/armor-composition-feedback"

	# --- HELPERS ---
	def load_pairs():
	if not EVAL_DATA_DIR.exists():
	return []
	pairs = sorted([d for d in EVAL_DATA_DIR.iterdir() if d.is_dir() and d.name.startswith("pair_")])
	return pairs

	def save_feedback(pair_id, data_dict):
	data_dict["timestamp"] = [datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")]
	data_dict["pair_id"] = [pair_id]
	new_df = pd.DataFrame(data_dict)

	token = os.environ.get("HF_TOKEN")

	# 1. Local append
	if FEEDBACK_FILE.exists():
	df = pd.read_csv(FEEDBACK_FILE)
	df = pd.concat([df, new_df], ignore_index=True)
	else:
	df = new_df

	FEEDBACK_FILE.parent.mkdir(parents=True, exist_ok=True)
	df.to_csv(FEEDBACK_FILE, index=False)

	# 2. Hugging Face Sync
	if HF_HUB_AVAILABLE and token:
	try:
	api = HfApi()
	# Try to pull latest version first to merge
	try:
	remote_path = hf_hub_download(repo_id=HF_REPO_ID, filename="feedback_3stage.csv", repo_type="dataset", token=token)
	remote_df = pd.read_csv(remote_path)
	df = pd.concat([remote_df, new_df], ignore_index=True)
	df.to_csv(FEEDBACK_FILE, index=False)
	except:
	pass

	api.upload_file(
	path_or_fileobj=str(FEEDBACK_FILE),
	path_in_repo="feedback_3stage.csv",
	repo_id=HF_REPO_ID,
	repo_type="dataset",
	token=token
	)
	except Exception as e:
	st.error(f"HF Sync Error: {e}")

	def score_format(x):
	if x == 1: return "1 (Poor)"
	if x == 5: return "5 (Excellent)"
	return str(x)

	# --- NAVIGATION ---
	if 'page' not in st.session_state:
	st.session_state.page = "Overview"
	if 'user_consent' not in st.session_state:
	st.session_state.user_consent = False
	if 'scroll_to_top' not in st.session_state:
	st.session_state.scroll_to_top = False

	# --- PAGE: OVERVIEW ---
	def show_overview():
	st.title("🛡️ Progressive Compositionality Study")

	col1, col2 = st.columns([1.5, 1.2])

	with col1:
	st.subheader("🧬 Evaluation Methodology")
	st.write("This study evaluates the Compositional capabilities of a novel Object-Centric model.")
	st.write("Compositionability is the ability to manipulate individual objects in a scene without affecting others, and to recombine them across scenes.")

	st.write("The evaluation process will consist of examining the composition of 50 pairs of images, and rating the quality of the model's outputs at each stage on a 1-5 scale.")
	st.write("You will assess a 3-stage progression of scene manipulation for each case:")
	st.markdown("""
	* Stage 1: Decomposition & Compositionality
	How well does the model separate the original scene into distinct object slots, and are all extracted parts accurately present in the reconstruction?
	* Stage 2: Isolability
	Does removing specific objects leave the rest of the scene perfectly intact?
	* Stage 3: Recombinability
	Can objects from a different image be seamlessly inserted into the gap?
	""")
	st.write("The overall process should take around 30-45 minutes, depending on how much you choose to comment on specific cases.")

	st.subheader("📊 Scoring Guide")
	st.write("Use the radio buttons to rate each dimension from 1 (Poor) to 5 (Excellent).")

	st.info("""
	- 5 (Excellent): Indistinguishable from the target or baseline.
	- 3 (Fair): Recognizable with some minor artifacts/blurring.
	- 1 (Poor): Major artifacts, broken geometry, or identity lost.
	""")

	st.divider()
	st.session_state.user_consent = st.checkbox("I consent to share my ratings for research purposes.")
	if st.button("🚀 Start Evaluation", disabled=not st.session_state.user_consent):
	st.session_state.page = "Evaluation"
	st.rerun()

	with col2:
	st.markdown("<div class='metric-card'>", unsafe_allow_html=True)
	st.markdown("<div class='ref-title'>Study Workflow Example</div>", unsafe_allow_html=True)

	pairs = load_pairs()
	if not pairs:
	# Try fallback to evaluation_data
	backup_dir = Path("evaluation_data")
	if backup_dir.exists():
	pairs = sorted([d for d in backup_dir.iterdir() if d.is_dir() and d.name.startswith("pair_")])

	if pairs:
	ex_p = pairs[0]
	try:
	# Show the progression as a vertical set of images with reduced width
	st.image(Image.open(ex_p / "orig1.png"), caption="1. Original Scene", width=300)
	st.image(Image.open(ex_p / "full_recon1.png"), caption="2. Model Reconstruction", width=300)
	st.image(Image.open(ex_p / "mixed_composition.png"), caption="3. Mixed Composition (Final Result)", width=300)
	st.success("The images above show the 3 key stages you will evaluate.")
	except Exception as e:
	st.error(f"Error loading example images: {e}")
	else:
	st.image("assets/dog2.jpeg", width=300)
	st.warning("No generated cases found yet. Examples will appear here once you run the generator.")

	st.markdown("</div>", unsafe_allow_html=True)

	# --- PAGE: EVALUATION ---
	def show_evaluation():
	# Handle auto-scroll flag
	if st.session_state.scroll_to_top:
	components.html(
	"""
	<script>
	var appContainer = window.parent.document.querySelector('[data-testid="stAppViewContainer"]');
	var mainContainer = window.parent.document.querySelector('.main');
	if (appContainer) { appContainer.scrollTo({ top: 0, behavior: 'instant' }); }
	if (mainContainer) { mainContainer.scrollTo({ top: 0, behavior: 'instant' }); }
	</script>
	""",
	height=0
	)
	st.session_state.scroll_to_top = False

	pairs = load_pairs()
	if not pairs:
	st.error(f"No data in `{EVAL_DATA_DIR}`. Run `generate_compositions.py` first.")
	return

	if 'pair_idx' not in st.session_state:
	st.session_state.pair_idx = 0

	st.sidebar.title("Study Progress")
	st.session_state.pair_idx = st.sidebar.select_slider(
	"Current Case",
	options=list(range(len(pairs))),
	value=st.session_state.pair_idx,
	format_func=lambda x: f"Case {x+1}"
	)

	if st.sidebar.button("🏠 Exit to Overview"):
	st.session_state.page = "Overview"
	st.rerun()

	p_path = pairs[st.session_state.pair_idx]
	with open(p_path / "metadata.json", "r") as f:
	meta = json.load(f)

	st.header(f"Evaluation Case {st.session_state.pair_idx + 1}")

	# --- STAGE 1 ---
	with st.container():
	st.markdown("<div class='stage-card'><div class='stage-title'>Stage 1: Decomposition & Compositionality</div>", unsafe_allow_html=True)

	# Part A - Original Image
	st.markdown("#### Part A: Decomposition Quality")
	colA1, colA2, colA3 = st.columns([1, 2, 1])
	with colA2:
	st.image(Image.open(p_path / "orig1.png"), caption="Original Scene", use_container_width=True)

	decomposition_rating = st.radio(
	"How well does the model separate the scene into distinct objects/parts? (1: Poor, 5: Excellent)",
	[1, 2, 3, 4, 5], index=2, horizontal=True, key=f"decomp_{st.session_state.pair_idx}", format_func=score_format
	)

	st.divider()

	# Full-width Extracted Components
	st.markdown("#### Extracted Components")
	st.image(Image.open(p_path / "all_slots1_vis.png"), caption="Extracted Components (All Slots)", use_container_width=True)

	st.divider()

	# Part B - Final Reconstruction
	st.markdown("#### Part B: Compositionality")
	colB1, colB2, colB3 = st.columns([1, 2, 1])
	with colB2:
	st.image(Image.open(p_path / "full_recon1.png"), caption="Final Reconstruction", use_container_width=True)

	reconstruction_rating = st.radio(
	"Are all the extracted objects and parts present in the final reconstructed image? (1: Poor, 5: Excellent)",
	[1, 2, 3, 4, 5], index=2, horizontal=True, key=f"recon_{st.session_state.pair_idx}", format_func=score_format
	)
	st.markdown("</div>", unsafe_allow_html=True)

	# --- STAGE 2 ---
	with st.container():
	st.markdown("<div class='stage-card'><div class='stage-title'>Stage 2: Independent Manipulation</div>", unsafe_allow_html=True)
	st.caption(f"Removed Slots: {meta['removed_from_1']}")
	c1, c2, c3 = st.columns(3)
	c1.image(Image.open(p_path / "full_recon1.png"), caption="Full Baseline", use_container_width=True)
	c2.image(Image.open(p_path / "selected1_vis.png"), caption="Remaining Slots", use_container_width=True)
	c3.image(Image.open(p_path / "partial_recon1.png"), caption="Partial (Slots Removed)", use_container_width=True)

	isolability_rating = st.radio(
	"Isolability: Does the reconstructed image coherently represent the content of the isolated slot? (1: Nothing related to the slot, 5: Perfect)",
	[1, 2, 3, 4, 5], index=2, horizontal=True, key=f"iso_{st.session_state.pair_idx}", format_func=score_format
	)
	st.markdown("</div>", unsafe_allow_html=True)

	# --- STAGE 3 ---
	with st.container():
	st.markdown("<div class='stage-card'><div class='stage-title'>Stage 3: Cross-Image Composition</div>", unsafe_allow_html=True)
	st.caption(f"Added Slots: {meta['added_from_2']}")
	c1, c2, c3 = st.columns(3)
	c1.image(Image.open(p_path / "orig2.png"), caption="Source Image 2", use_container_width=True)
	c2.image(Image.open(p_path / "selected2_vis.png"), caption="New Slots from Img 2", use_container_width=True)
	c3.image(Image.open(p_path / "mixed_composition.png"), caption="Final Mixed Scene", use_container_width=True)

	q_cols = st.columns(3)
	recomb_rating = q_cols[0].radio("Recombinability (are the concept correctly combined) (1: Poor, 5: Good)", [1, 2, 3, 4, 5], index=2, horizontal=True, format_func=score_format)
	ident_rating = q_cols[1].radio("Identity (can we recognize the concepts ?) (1: Unrecognizable, 5: Sharp)", [1, 2, 3, 4, 5], index=2, horizontal=True, format_func=score_format)
	spatial_rating = q_cols[2].radio("Spatial (are the concepts placed like in their original images ?) (1: Wrong Scale, 5: Coherent)", [1, 2, 3, 4, 5], index=2, horizontal=True, format_func=score_format)
	st.markdown("</div>", unsafe_allow_html=True)

	comments = st.text_area("Observations", placeholder="Any specific artifacts or successes...")

	if st.button("💾 Submit Feedback & Next"):
	data = {
	"decomposition": [decomposition_rating],
	"reconstruction": [reconstruction_rating],
	"isolability": [isolability_rating],
	"recombinability": [recomb_rating],
	"identity_preservation": [ident_rating],
	"spatial_coherence": [spatial_rating],
	"comments": [comments]
	}
	save_feedback(p_path.name, data)
	st.success("Rating submitted!")
	if st.session_state.pair_idx < len(pairs) - 1:
	st.session_state.pair_idx += 1
	st.session_state.scroll_to_top = True # Trigger auto-scroll on next render
	st.rerun()
	else:
	st.balloons()

	if st.session_state.page == "Overview":
	show_overview()
	else:
	show_evaluation()