Spaces:
Running
Running
| # Copyright 2026 Google LLC | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE- | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| """ | |
| Streamlit Visualizer for Pipeline Evolution | |
| Shows the progression of diagrams through Planner β Stylist β Critic stages | |
| """ | |
| import streamlit as st | |
| import json | |
| import base64 | |
| from io import BytesIO | |
| from PIL import Image | |
| import os | |
| import sys | |
| # Ensure local imports work | |
| sys.path.append(os.getcwd()) | |
| st.set_page_config(layout="wide", page_title="PaperVizAgent Pipeline Evolution", page_icon="π") | |
| def load_data(path): | |
| """Read JSON or JSONL data.""" | |
| data = [] | |
| if not os.path.exists(path): | |
| return [] | |
| try: | |
| with open(path, "r", encoding="utf-8") as f: | |
| content = f.read().strip() | |
| # Try to load as JSON array first | |
| if content.startswith("["): | |
| try: | |
| data = json.loads(content) | |
| if isinstance(data, list): | |
| return data | |
| except json.JSONDecodeError: | |
| pass | |
| # If that fails, try JSONL format | |
| lines = content.split("\n") | |
| for line in lines: | |
| line = line.strip() | |
| if not line: | |
| continue | |
| try: | |
| data.append(json.loads(line)) | |
| except json.JSONDecodeError: | |
| continue | |
| except Exception as e: | |
| st.error(f"Error reading file: {e}") | |
| return [] | |
| return data | |
| def base64_to_image(b64_str): | |
| if not b64_str: | |
| return None | |
| try: | |
| if "," in b64_str: | |
| b64_str = b64_str.split(",")[1] | |
| image_data = base64.b64decode(b64_str) | |
| return Image.open(BytesIO(image_data)) | |
| except Exception: | |
| return None | |
| def detect_task_type(item): | |
| """Detect whether data is for diagram or plot task.""" | |
| # Check for plot-specific fields | |
| if "target_plot_desc0" in item or "target_plot_stylist_desc0" in item: | |
| return "plot" | |
| return "diagram" | |
| def display_stage_comparison(item): | |
| """Display 2x2 grid comparison: Ground Truth + three pipeline stages.""" | |
| st.markdown("### π Pipeline Evolution Comparison") | |
| task_type = detect_task_type(item) | |
| prefix = "target_plot" if task_type == "plot" else "target_diagram" | |
| # Create two rows with two columns each | |
| row1_col1, row1_col2 = st.columns(2) | |
| row2_col1, row2_col2 = st.columns(2) | |
| # Detect available stages dynamically | |
| available_stages = [] | |
| # Human (Ground Truth) - always first | |
| available_stages.append({ | |
| "title": "π― Human (Ground Truth)", | |
| "desc_key": None, | |
| "img_key": "annotation_info", | |
| "color": "orange", | |
| "is_human": True | |
| }) | |
| # Planner / Vanilla | |
| planner_key = f"{prefix}_desc0" | |
| if planner_key in item: | |
| available_stages.append({ | |
| "title": "π Planner / Vanilla", | |
| "desc_key": planner_key, | |
| "img_key": f"{planner_key}_base64_jpg", | |
| "color": "blue", | |
| "is_human": False | |
| }) | |
| # Stylist | |
| stylist_key = f"{prefix}_stylist_desc0" | |
| if stylist_key in item: | |
| available_stages.append({ | |
| "title": "β¨ Stylist", | |
| "desc_key": stylist_key, | |
| "img_key": f"{stylist_key}_base64_jpg", | |
| "color": "violet", | |
| "is_human": False | |
| }) | |
| # Critic rounds (0, 1, 2) | |
| for round_idx in range(3): | |
| critic_desc_key = f"{prefix}_critic_desc{round_idx}" | |
| if critic_desc_key in item: | |
| emoji = ["π", "ππ", "πππ"][round_idx] | |
| available_stages.append({ | |
| "title": f"{emoji} Critic Round {round_idx}", | |
| "desc_key": critic_desc_key, | |
| "img_key": f"{critic_desc_key}_base64_jpg", | |
| "suggestions_key": f"{prefix}_critic_suggestions{round_idx}", | |
| "color": "green", | |
| "is_human": False, | |
| "round_idx": round_idx | |
| }) | |
| # Create dynamic grid based on number of stages | |
| num_stages = len(available_stages) | |
| cols_per_row = 2 | |
| stages = available_stages | |
| # Display stages in a grid | |
| for row_start in range(0, num_stages, cols_per_row): | |
| cols = st.columns(cols_per_row) | |
| for col_idx in range(cols_per_row): | |
| stage_idx = row_start + col_idx | |
| if stage_idx >= num_stages: | |
| break | |
| stage = stages[stage_idx] | |
| with cols[col_idx]: | |
| st.markdown(f"**{stage['title']}**") | |
| # Display image | |
| if stage["is_human"]: | |
| # Handle Human (Ground Truth) image | |
| human_path = item.get("path_to_gt_image") | |
| if human_path and os.path.exists(human_path): | |
| try: | |
| img = Image.open(human_path) | |
| st.image(img, use_container_width=True) | |
| except Exception as e: | |
| st.error(f"Failed to load Human image: {e}") | |
| else: | |
| st.info("No Human image available") | |
| # Show caption instead of description | |
| caption = item.get("brief_desc", "No caption available") | |
| with st.expander("View Caption", expanded=False): | |
| st.write(caption) | |
| else: | |
| # Handle pipeline stage images | |
| img_b64 = item.get(stage["img_key"]) | |
| if img_b64: | |
| img = base64_to_image(img_b64) | |
| if img: | |
| st.image(img, use_container_width=True) | |
| else: | |
| st.error("Failed to decode image") | |
| else: | |
| st.info("No image available") | |
| # Display description in expander | |
| desc = item.get(stage["desc_key"], "No description available") | |
| with st.expander("View Description", expanded=False): | |
| if task_type == "plot" and desc: | |
| # Try to format as code if it looks like code, or just text | |
| st.code(desc, language="python") # Plots are usually python code | |
| else: | |
| st.write(desc) | |
| # Display critic suggestions if this is a critic stage | |
| if "suggestions_key" in stage: | |
| suggestions = item.get(stage["suggestions_key"], "") | |
| if suggestions and suggestions.strip() != "No changes needed.": | |
| with st.expander("π¬ Critic Suggestions", expanded=False): | |
| st.write(suggestions) | |
| def display_critique(item): | |
| """Display the critique if available.""" | |
| if "critique0" in item and item["critique0"]: | |
| st.markdown("### π¬ Critic's Feedback") | |
| with st.expander("View Critique", expanded=False): | |
| st.write(item["critique0"]) | |
| def display_evaluation_results(item): | |
| """Display evaluation results if available.""" | |
| dimensions = ["Faithfulness", "Conciseness", "Readability", "Aesthetics", "Overall"] | |
| has_eval = any(f"{dim.lower()}_outcome" in item for dim in dimensions) | |
| if has_eval: | |
| st.markdown("### π Evaluation Results") | |
| cols = st.columns(len(dimensions)) | |
| for i, dim in enumerate(dimensions): | |
| outcome_key = f"{dim.lower()}_outcome" | |
| reasoning_key = f"{dim.lower()}_reasoning" | |
| outcome = item.get(outcome_key, "N/A") | |
| reasoning = item.get(reasoning_key, "N/A") | |
| with cols[i]: | |
| st.markdown(f"**{dim}**") | |
| if outcome == "Model": | |
| st.success(outcome) | |
| elif outcome == "Human": | |
| st.info(outcome) | |
| elif outcome == "Tie": | |
| st.warning(outcome) | |
| else: | |
| st.text(outcome) | |
| with st.expander("View Reasoning", expanded=False): | |
| st.write(reasoning) | |
| def main(): | |
| st.sidebar.title("π Pipeline Evolution Viewer") | |
| file_path = st.sidebar.text_input("Results JSONL Path", placeholder="Enter path to results file...") | |
| if st.sidebar.button("π Refresh Data"): | |
| load_data.clear() | |
| st.rerun() | |
| if not file_path: | |
| st.info("π Please enter a file path to begin") | |
| st.stop() | |
| if not os.path.exists(file_path): | |
| st.error(f"File not found: {file_path}") | |
| st.stop() | |
| data = load_data(file_path) | |
| # --- Search Functionality --- | |
| search_query = st.sidebar.text_input("π Search ID", value="", help="Filter by ID (case-insensitive)") | |
| if search_query: | |
| data = [item for item in data if search_query.lower() in item.get("id", "").lower()] | |
| st.sidebar.caption(f"Found {len(data)} matching cases") | |
| total_items = len(data) | |
| if total_items == 0: | |
| if search_query: | |
| st.warning(f"No samples found matching '{search_query}'.") | |
| else: | |
| st.warning("Data is empty or format is incorrect.") | |
| return | |
| st.title("π PaperVizAgent Pipeline Evolution Viewer") | |
| st.markdown(f"Visualizing the progression through **Planner β Stylist β Critic** stages") | |
| st.divider() | |
| # --- Global Statistics --- | |
| with st.expander("π Global Statistics", expanded=False): | |
| total = len(data) | |
| # Simple heuristic: inspect the first item to guess task type for stats | |
| # (This assumes the file is consistent) | |
| sample = data[0] if data else {} | |
| is_plot = "target_plot_desc0" in sample or "target_plot_stylist_desc0" in sample | |
| if is_plot: | |
| has_all_stages = sum(1 for item in data if | |
| item.get("target_plot_desc0") and | |
| item.get("target_plot_stylist_desc0") and | |
| item.get("target_plot_critic_desc0")) | |
| else: | |
| has_all_stages = sum(1 for item in data if | |
| item.get("target_diagram_desc0") and | |
| item.get("target_diagram_stylist_desc0") and | |
| item.get("target_diagram_critic_desc0")) | |
| col1, col2, col3 = st.columns(3) | |
| col1.metric("Total Samples", total) | |
| col2.metric("Complete Pipeline", has_all_stages) | |
| col3.metric("Completion Rate", f"{has_all_stages/total*100:.1f}%") | |
| st.divider() | |
| # --- Pagination --- | |
| PAGE_SIZE = 10 # Changed from 5 to 10 | |
| if "page" not in st.session_state: | |
| st.session_state.page = 0 | |
| total_pages = max((total_items + PAGE_SIZE - 1) // PAGE_SIZE, 1) | |
| # Navigation buttons | |
| col_left, col_center, col_right = st.columns([1, 2, 1]) | |
| with col_left: | |
| if st.button("β¬ οΈ Previous Page", disabled=(st.session_state.page == 0)): | |
| st.session_state.page -= 1 | |
| st.rerun() | |
| with col_center: | |
| page_input = st.number_input( | |
| "Page", | |
| min_value=1, | |
| max_value=total_pages, | |
| value=st.session_state.page + 1, | |
| label_visibility="collapsed" | |
| ) | |
| if page_input != st.session_state.page + 1: | |
| st.session_state.page = page_input - 1 | |
| st.rerun() | |
| st.caption(f"Page {st.session_state.page + 1} of {total_pages}") | |
| with col_right: | |
| if st.button("Next Page β‘οΈ", disabled=(st.session_state.page >= total_pages - 1)): | |
| st.session_state.page += 1 | |
| st.rerun() | |
| start_idx = st.session_state.page * PAGE_SIZE | |
| end_idx = min(start_idx + PAGE_SIZE, total_items) | |
| batch = data[start_idx:end_idx] | |
| st.markdown(f"**Displaying {start_idx + 1} - {end_idx} of {total_items}**") | |
| # --- Display Samples --- | |
| for i, item in enumerate(batch): | |
| idx = start_idx + i | |
| anno = item # Flattened structure | |
| with st.container(border=True): | |
| # Header | |
| st.subheader(f"#{idx + 1}: {item.get('visual_intent', 'N/A')}") | |
| st.caption(f"ID: `{item.get('id', 'Unknown')}`") | |
| # Method/Data section | |
| task_type = detect_task_type(item) | |
| label = "π Raw Data" if task_type == "plot" else "π Method Section" | |
| with st.expander(label, expanded=False): | |
| if task_type == "plot": | |
| st.code(json.dumps(item.get('content', {}), indent=2), language="json") | |
| else: | |
| method_content = item.get('content', 'N/A') | |
| st.markdown(method_content) | |
| # Pipeline comparison | |
| display_stage_comparison(item) | |
| # Critique | |
| display_critique(item) | |
| # Evaluation results | |
| display_evaluation_results(item) | |
| st.divider() | |
| if __name__ == "__main__": | |
| main() | |