Spaces:

dwzhu
/

PaperBanana

Running

PaperBanana / visualize /show_pipeline_evolution.py

dwzhu

Initial deployment: Gradio app + PaperBananaBench data

587f33e 28 days ago

13.8 kB

	# Copyright 2026 Google LLC
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	"""
	Streamlit Visualizer for Pipeline Evolution
	Shows the progression of diagrams through Planner → Stylist → Critic stages
	"""

	import streamlit as st
	import json
	import base64
	from io import BytesIO
	from PIL import Image
	import os
	import sys

	# Ensure local imports work
	sys.path.append(os.getcwd())

	st.set_page_config(layout="wide", page_title="PaperVizAgent Pipeline Evolution", page_icon="🍌")



	@st.cache_data
	def load_data(path):
	"""Read JSON or JSONL data."""
	data = []
	if not os.path.exists(path):
	return []
	try:
	with open(path, "r", encoding="utf-8") as f:
	content = f.read().strip()

	# Try to load as JSON array first
	if content.startswith("["):
	try:
	data = json.loads(content)
	if isinstance(data, list):
	return data
	except json.JSONDecodeError:
	pass

	# If that fails, try JSONL format
	lines = content.split("\n")
	for line in lines:
	line = line.strip()
	if not line:
	continue
	try:
	data.append(json.loads(line))
	except json.JSONDecodeError:
	continue

	except Exception as e:
	st.error(f"Error reading file: {e}")
	return []
	return data

	def base64_to_image(b64_str):
	if not b64_str:
	return None
	try:
	if "," in b64_str:
	b64_str = b64_str.split(",")[1]
	image_data = base64.b64decode(b64_str)
	return Image.open(BytesIO(image_data))
	except Exception:
	return None

	def detect_task_type(item):
	"""Detect whether data is for diagram or plot task."""
	# Check for plot-specific fields
	if "target_plot_desc0" in item or "target_plot_stylist_desc0" in item:
	return "plot"
	return "diagram"

	def display_stage_comparison(item):
	"""Display 2x2 grid comparison: Ground Truth + three pipeline stages."""
	st.markdown("### 📊 Pipeline Evolution Comparison")

	task_type = detect_task_type(item)
	prefix = "target_plot" if task_type == "plot" else "target_diagram"

	# Create two rows with two columns each
	row1_col1, row1_col2 = st.columns(2)
	row2_col1, row2_col2 = st.columns(2)

	# Detect available stages dynamically
	available_stages = []

	# Human (Ground Truth) - always first
	available_stages.append({
	"title": "🎯 Human (Ground Truth)",
	"desc_key": None,
	"img_key": "annotation_info",
	"color": "orange",
	"is_human": True
	})

	# Planner / Vanilla
	planner_key = f"{prefix}_desc0"
	if planner_key in item:
	available_stages.append({
	"title": "📝 Planner / Vanilla",
	"desc_key": planner_key,
	"img_key": f"{planner_key}_base64_jpg",
	"color": "blue",
	"is_human": False
	})

	# Stylist
	stylist_key = f"{prefix}_stylist_desc0"
	if stylist_key in item:
	available_stages.append({
	"title": "✨ Stylist",
	"desc_key": stylist_key,
	"img_key": f"{stylist_key}_base64_jpg",
	"color": "violet",
	"is_human": False
	})

	# Critic rounds (0, 1, 2)
	for round_idx in range(3):
	critic_desc_key = f"{prefix}_critic_desc{round_idx}"
	if critic_desc_key in item:
	emoji = ["🔍", "🔍🔍", "🔍🔍🔍"][round_idx]
	available_stages.append({
	"title": f"{emoji} Critic Round {round_idx}",
	"desc_key": critic_desc_key,
	"img_key": f"{critic_desc_key}_base64_jpg",
	"suggestions_key": f"{prefix}_critic_suggestions{round_idx}",
	"color": "green",
	"is_human": False,
	"round_idx": round_idx
	})

	# Create dynamic grid based on number of stages
	num_stages = len(available_stages)
	cols_per_row = 2
	stages = available_stages

	# Display stages in a grid
	for row_start in range(0, num_stages, cols_per_row):
	cols = st.columns(cols_per_row)
	for col_idx in range(cols_per_row):
	stage_idx = row_start + col_idx
	if stage_idx >= num_stages:
	break

	stage = stages[stage_idx]
	with cols[col_idx]:
	st.markdown(f"{stage['title']}")

	# Display image
	if stage["is_human"]:
	# Handle Human (Ground Truth) image
	human_path = item.get("path_to_gt_image")
	if human_path and os.path.exists(human_path):
	try:
	img = Image.open(human_path)
	st.image(img, use_container_width=True)
	except Exception as e:
	st.error(f"Failed to load Human image: {e}")
	else:
	st.info("No Human image available")

	# Show caption instead of description
	caption = item.get("brief_desc", "No caption available")
	with st.expander("View Caption", expanded=False):
	st.write(caption)
	else:
	# Handle pipeline stage images
	img_b64 = item.get(stage["img_key"])
	if img_b64:
	img = base64_to_image(img_b64)
	if img:
	st.image(img, use_container_width=True)
	else:
	st.error("Failed to decode image")
	else:
	st.info("No image available")

	# Display description in expander
	desc = item.get(stage["desc_key"], "No description available")
	with st.expander("View Description", expanded=False):
	if task_type == "plot" and desc:
	# Try to format as code if it looks like code, or just text
	st.code(desc, language="python") # Plots are usually python code
	else:
	st.write(desc)

	# Display critic suggestions if this is a critic stage
	if "suggestions_key" in stage:
	suggestions = item.get(stage["suggestions_key"], "")
	if suggestions and suggestions.strip() != "No changes needed.":
	with st.expander("💬 Critic Suggestions", expanded=False):
	st.write(suggestions)

	def display_critique(item):
	"""Display the critique if available."""
	if "critique0" in item and item["critique0"]:
	st.markdown("### 💬 Critic's Feedback")
	with st.expander("View Critique", expanded=False):
	st.write(item["critique0"])

	def display_evaluation_results(item):
	"""Display evaluation results if available."""
	dimensions = ["Faithfulness", "Conciseness", "Readability", "Aesthetics", "Overall"]

	has_eval = any(f"{dim.lower()}_outcome" in item for dim in dimensions)

	if has_eval:
	st.markdown("### 📈 Evaluation Results")
	cols = st.columns(len(dimensions))

	for i, dim in enumerate(dimensions):
	outcome_key = f"{dim.lower()}_outcome"
	reasoning_key = f"{dim.lower()}_reasoning"
	outcome = item.get(outcome_key, "N/A")
	reasoning = item.get(reasoning_key, "N/A")

	with cols[i]:
	st.markdown(f"{dim}")
	if outcome == "Model":
	st.success(outcome)
	elif outcome == "Human":
	st.info(outcome)
	elif outcome == "Tie":
	st.warning(outcome)
	else:
	st.text(outcome)

	with st.expander("View Reasoning", expanded=False):
	st.write(reasoning)

	def main():
	st.sidebar.title("🍌 Pipeline Evolution Viewer")
	file_path = st.sidebar.text_input("Results JSONL Path", placeholder="Enter path to results file...")

	if st.sidebar.button("🔄 Refresh Data"):
	load_data.clear()
	st.rerun()

	if not file_path:
	st.info("👆 Please enter a file path to begin")
	st.stop()

	if not os.path.exists(file_path):
	st.error(f"File not found: {file_path}")
	st.stop()

	data = load_data(file_path)

	# --- Search Functionality ---
	search_query = st.sidebar.text_input("🔍 Search ID", value="", help="Filter by ID (case-insensitive)")
	if search_query:
	data = [item for item in data if search_query.lower() in item.get("id", "").lower()]
	st.sidebar.caption(f"Found {len(data)} matching cases")

	total_items = len(data)

	if total_items == 0:
	if search_query:
	st.warning(f"No samples found matching '{search_query}'.")
	else:
	st.warning("Data is empty or format is incorrect.")
	return

	st.title("🍌 PaperVizAgent Pipeline Evolution Viewer")
	st.markdown(f"Visualizing the progression through Planner → Stylist → Critic stages")

	st.divider()

	# --- Global Statistics ---
	with st.expander("📊 Global Statistics", expanded=False):
	total = len(data)

	# Simple heuristic: inspect the first item to guess task type for stats
	# (This assumes the file is consistent)
	sample = data[0] if data else {}
	is_plot = "target_plot_desc0" in sample or "target_plot_stylist_desc0" in sample

	if is_plot:
	has_all_stages = sum(1 for item in data if
	item.get("target_plot_desc0") and
	item.get("target_plot_stylist_desc0") and
	item.get("target_plot_critic_desc0"))
	else:
	has_all_stages = sum(1 for item in data if
	item.get("target_diagram_desc0") and
	item.get("target_diagram_stylist_desc0") and
	item.get("target_diagram_critic_desc0"))

	col1, col2, col3 = st.columns(3)
	col1.metric("Total Samples", total)
	col2.metric("Complete Pipeline", has_all_stages)
	col3.metric("Completion Rate", f"{has_all_stages/total*100:.1f}%")

	st.divider()

	# --- Pagination ---
	PAGE_SIZE = 10 # Changed from 5 to 10
	if "page" not in st.session_state:
	st.session_state.page = 0

	total_pages = max((total_items + PAGE_SIZE - 1) // PAGE_SIZE, 1)

	# Navigation buttons
	col_left, col_center, col_right = st.columns([1, 2, 1])

	with col_left:
	if st.button("⬅️ Previous Page", disabled=(st.session_state.page == 0)):
	st.session_state.page -= 1
	st.rerun()

	with col_center:
	page_input = st.number_input(
	"Page",
	min_value=1,
	max_value=total_pages,
	value=st.session_state.page + 1,
	label_visibility="collapsed"
	)
	if page_input != st.session_state.page + 1:
	st.session_state.page = page_input - 1
	st.rerun()
	st.caption(f"Page {st.session_state.page + 1} of {total_pages}")

	with col_right:
	if st.button("Next Page ➡️", disabled=(st.session_state.page >= total_pages - 1)):
	st.session_state.page += 1
	st.rerun()

	start_idx = st.session_state.page * PAGE_SIZE
	end_idx = min(start_idx + PAGE_SIZE, total_items)
	batch = data[start_idx:end_idx]

	st.markdown(f"Displaying {start_idx + 1} - {end_idx} of {total_items}")

	# --- Display Samples ---
	for i, item in enumerate(batch):
	idx = start_idx + i
	anno = item # Flattened structure

	with st.container(border=True):
	# Header
	st.subheader(f"#{idx + 1}: {item.get('visual_intent', 'N/A')}")
	st.caption(f"ID: `{item.get('id', 'Unknown')}`")

	# Method/Data section
	task_type = detect_task_type(item)
	label = "📚 Raw Data" if task_type == "plot" else "📚 Method Section"

	with st.expander(label, expanded=False):
	if task_type == "plot":
	st.code(json.dumps(item.get('content', {}), indent=2), language="json")
	else:
	method_content = item.get('content', 'N/A')
	st.markdown(method_content)

	# Pipeline comparison
	display_stage_comparison(item)

	# Critique
	display_critique(item)

	# Evaluation results
	display_evaluation_results(item)

	st.divider()

	if __name__ == "__main__":
	main()