""" Grasping Gooning — analysis agent UI Run: streamlit run app.py (from /Users/binx/Desktop/Goon/) """ from __future__ import annotations import inspect import json import os import random import sys import threading import time from pathlib import Path import pandas as pd import plotly.io as pio import streamlit as st from dotenv import load_dotenv # ── paths ────────────────────────────────────────────────────────────────── ROOT = Path(__file__).parent sys.path.insert(0, str(ROOT / "agent")) load_dotenv(ROOT / "agent" / ".env") from analysis import run_agent, list_datasets # ── page config ──────────────────────────────────────────────────────────── st.set_page_config( page_title="Grasping Gooning", layout="wide", initial_sidebar_state="expanded", ) @st.cache_data(show_spinner=False) def load_post_samples(n: int = 120) -> list[dict]: """Random sample of real post titles for the loading slideshow.""" try: import pyarrow.dataset as _ds _path = ROOT / "data" / "posts.parquet" if not _path.exists(): return [] d = _ds.dataset(str(_path), format="parquet") t = d.scanner(columns=["subreddit", "title"]).head(40000).to_pandas() mask = ( t["title"].str.len() > 30 ) & ( t["title"].str.len() < 180 ) & ( ~t["title"].str.lower().str.startswith("[") ) sample = t[mask].sample(min(n, mask.sum()), random_state=None) return sample[["subreddit", "title"]].to_dict(orient="records") except Exception: return [] LOADING_HINTS = [ "you're so close…", "keep going…", "deeper…", "almost there…", "don't stop now…", "just a bit more…", "stay with it…", "right there…", "edge of something…", "hold on…", "so close…", "don't stop…", ] # ── global CSS ───────────────────────────────────────────────────────────── st.markdown(""" """, unsafe_allow_html=True) # ── helpers ──────────────────────────────────────────────────────────────── def fmt(v: int | None) -> str: return "n/a" if v is None else f"{v:,}" def dataset_snapshot() -> dict: try: return list_datasets() except Exception: return {} def render_plot(plotly_json: str) -> None: try: fig = pio.from_json(plotly_json) fig.update_layout( paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)", font=dict(family="Arial, Helvetica, sans-serif", color="#222", size=12), margin=dict(l=0, r=0, t=32, b=0), colorway=["#000", "#555", "#888", "#bbb"], xaxis=dict(gridcolor="#ebebeb", linecolor="#e0e0e0"), yaxis=dict(gridcolor="#ebebeb", linecolor="#e0e0e0"), ) st.plotly_chart(fig, use_container_width=True) except Exception as exc: st.warning(f"Chart could not be rendered: {exc}") def compact_tool_result(result: object) -> dict: if not isinstance(result, dict): return {"value": result} compact: dict = {"keys": sorted(result.keys())} for key in ("saved_csv", "saved_png", "plotly_json", "error", "analysis", "dataset", "filters"): if key in result and result.get(key) is not None: compact[key] = result[key] table = result.get("table") if isinstance(table, list): compact["table_rows"] = len(table) compact["table_preview"] = table[:5] return compact def extract_artifacts(tool_calls: list[dict]) -> list[dict]: artifacts: list[dict] = [] for tc in tool_calls: result = tc.get("result") or {} if not isinstance(result, dict): continue for key, atype in (("saved_csv", "csv"), ("saved_png", "png")): if result.get(key): artifacts.append({"type": atype, "tool": tc.get("tool", "?"), "path": result[key]}) if result.get("plotly_json"): artifacts.append({"type": "plotly_json", "tool": tc.get("tool", "?"), "present": True}) return artifacts def build_backend_history(turns: list[dict]) -> list[dict]: history: list[dict] = [] for turn in turns: history.append({"role": "user", "content": turn["question"]}) content = turn["answer"] state = { "tool_calls": [ {"tool": tc.get("tool"), "args": tc.get("args") or {}, "result": compact_tool_result(tc.get("result"))} for tc in turn.get("tool_calls", []) ], "artifacts": turn.get("artifacts", []), "plotly_json": bool(turn.get("plotly_json")), "route": turn.get("route"), } if state["tool_calls"] or state["artifacts"] or state["plotly_json"]: content += f"\n\n\n{json.dumps(state, default=str, indent=2)}\n" history.append({"role": "assistant", "content": content}) return history def call_agent(question: str, history: list[dict], turns: list[dict]) -> dict: kwargs = {"history": history} params = inspect.signature(run_agent).parameters for name in ("analysis_context", "conversation_state", "turns"): if name in params: kwargs[name] = turns break return run_agent(question, **kwargs) _POST_SAMPLES: list[dict] = [] def call_agent_with_progress(question: str, backend_history: list[dict], turns: list[dict], slot) -> dict: """Run agent in a background thread; update a progress slot from the main thread.""" result_holder: dict = {} exc_holder: dict = {} def worker() -> None: try: result_holder["r"] = call_agent(question, backend_history, turns) except Exception as e: exc_holder["e"] = e t = threading.Thread(target=worker, daemon=True) t.start() STUCK_MSGS = [ "i promise i'm still gooning", "locked in. fully gooned. cannot stop.", "the data is vast. the goon is deep. patience.", "i've been edging this query for so long i've lost track of time.", "every second is another row scanned. feel it.", "this is what a true goon session looks like. no shortcuts.", "i am one with the dataset. do not disturb.", ] global _POST_SAMPLES if not _POST_SAMPLES: _POST_SAMPLES = load_post_samples() posts = _POST_SAMPLES if _POST_SAMPLES else [] random.shuffle(posts) pct = 0 idx = 0 post_idx = 0 start = time.time() stuck_since: float | None = None last_pct_change = time.time() while t.is_alive(): prev_pct = pct pct = min(pct + random.randint(1, 5), 93) if pct != prev_pct: stuck_since = None last_pct_change = time.time() else: if stuck_since is None: stuck_since = time.time() elapsed = int(time.time() - start) elapsed_str = f"{elapsed}s" if elapsed < 60 else f"{elapsed // 60}m {elapsed % 60}s" stuck_sec = int(time.time() - stuck_since) if stuck_since else 0 hint = LOADING_HINTS[idx % len(LOADING_HINTS)] # Build up stuck messages — one new line per 12s window, cleared when pct moves n_stuck = min(stuck_sec // 12, len(STUCK_MSGS)) stuck_html = "".join( f'

{STUCK_MSGS[i]}

' for i in range(n_stuck) ) # rotate post every ~11 ticks (~4 seconds) if idx % 11 == 0 and idx > 0: post_idx += 1 post_html = "" if posts: p = posts[post_idx % len(posts)] sub = p.get("subreddit", "") title = p.get("title", "").replace("<", "<").replace(">", ">") post_html = ( f'

' f'

r/{sub}

' f'

{title}

' f'

' ) at_cap = pct >= 93 pct_display = "—%" if at_cap else f"{pct}%" running_label = ( '' 'still running' if at_cap else "" ) slot.markdown( f'

' f'

{hint}

' f'

{pct_display} · {elapsed_str}{running_label}

' f'{stuck_html}' f'{post_html}' f'

', unsafe_allow_html=True, ) idx += 1 time.sleep(0.35) t.join() slot.empty() if exc_holder: raise exc_holder["e"] return result_holder["r"] def render_cost_bar(usage: dict) -> None: cost = usage.get("cost_usd", 0) inp = usage.get("input_tokens", 0) out = usage.get("output_tokens", 0) # scale: 0–$0.50 maps to 0–100% of bar pct = min(cost / 0.50 * 100, 100) if cost < 0.01: val_str = f"< $0.01" else: val_str = f"${cost:.3f}" tok_str = f"{inp:,} in · {out:,} out" st.markdown( f'

' f'cost' f'

' f'{val_str}' f'{tok_str}' f'

', unsafe_allow_html=True, ) def render_tool_calls(tool_calls: list[dict]) -> None: n = len(tool_calls) with st.expander(f"Method {n} step{'s' if n != 1 else ''}", expanded=False): for i, tc in enumerate(tool_calls): st.markdown( f"Step {i+1} -> {tc.get('tool','?')}", unsafe_allow_html=True, ) if tc.get("args"): st.json(tc["args"], expanded=False) res = tc.get("result") or {} if isinstance(res, dict): if res.get("table"): try: st.dataframe(pd.DataFrame(res["table"]), use_container_width=True, hide_index=True) except Exception: pass for key in ("saved_csv", "saved_png"): if res.get(key): st.markdown(f"-> {res[key]}", unsafe_allow_html=True) if i < n - 1: st.markdown("---") def render_export_buttons(answer: str, tool_calls: list[dict], turn_idx: int) -> None: artifacts = extract_artifacts(tool_calls) csvs = [a["path"] for a in artifacts if a["type"] == "csv"] pngs = [a["path"] for a in artifacts if a["type"] == "png"] items: list[tuple[str, bytes, str, str]] = [] items.append(("answer.md", answer.encode("utf-8"), "text/markdown", f"answer_{turn_idx}.md")) for path in csvs: p = Path(path) if p.exists(): items.append((p.name, p.read_bytes(), "text/csv", p.name)) for path in pngs: p = Path(path) if p.exists(): items.append((p.name, p.read_bytes(), "image/png", p.name)) cols = st.columns(len(items)) for col, (label, data, mime, fname) in zip(cols, items): with col: st.download_button( label=label, data=data, file_name=fname, mime=mime, key=f"dl_{turn_idx}_{fname}", ) # ── session state ────────────────────────────────────────────────────────── for key, default in [("history", []), ("chat", []), ("turns", []), ("prefill", ""), ("authenticated", False), ("logged_out", False)]: if key not in st.session_state: st.session_state[key] = default # seed from env if already set (e.g. from .env file) — but not if user explicitly logged out if not st.session_state["authenticated"] and not st.session_state["logged_out"] and os.environ.get("ANTHROPIC_API_KEY"): st.session_state["authenticated"] = True # ── dataset metadata ─────────────────────────────────────────────────────── meta = dataset_snapshot() posts_rows = meta.get("posts", {}).get("rows") comments_rows = meta.get("comments", {}).get("rows") sub_count = len(meta.get("posts", {}).get("subreddits") or []) latest_date = (meta.get("comments", {}).get("date_range") or {}).get("latest", "n/a") # ── login gate ───────────────────────────────────────────────────────────── if not st.session_state["authenticated"]: st.markdown(""" """, unsafe_allow_html=True) col = st.columns([1, 2, 1])[1] with col: login_key = st.text_input( "API key", type="password", placeholder="sk-ant-…", label_visibility="collapsed", ) if st.button("Enter ->", key="login_btn", use_container_width=True): if login_key.strip(): ascii_key = login_key.encode("ascii", errors="ignore").decode("ascii") os.environ["ANTHROPIC_API_KEY"] = ascii_key st.session_state["authenticated"] = True st.session_state["logged_out"] = False st.rerun() else: st.error("Paste your API key above.") st.stop() # ── sidebar ──────────────────────────────────────────────────────────────── with st.sidebar: st.markdown("""

Grasping Gooning

reddit data analysis agent

""", unsafe_allow_html=True) st.markdown("", unsafe_allow_html=True) if st.button("Clear conversation", key="clear"): st.session_state.update(history=[], chat=[], turns=[], prefill="") st.rerun() if st.button("Log out", key="logout"): os.environ.pop("ANTHROPIC_API_KEY", None) st.session_state.update(history=[], chat=[], turns=[], prefill="", authenticated=False, logged_out=True) st.rerun() # ── about (bottom of sidebar) ────────────────────────────────────────── st.markdown("

", unsafe_allow_html=True) with st.expander("About"): earliest = (meta.get("posts", {}).get("date_range") or {}).get("earliest", "n/a") subs_list = meta.get("posts", {}).get("subreddits") or [] st.markdown(f""" """, unsafe_allow_html=True) st.markdown("

", unsafe_allow_html=True) # ── chat history ─────────────────────────────────────────────────────────── for i, msg in enumerate(st.session_state["chat"]): with st.chat_message(msg["role"]): role = msg["role"] route = msg.get("route", "") label = "You" if role == "user" else "Answer" route_html = f"{route}" if route and role == "assistant" else "" st.markdown( f"

{label}{route_html}

", unsafe_allow_html=True, ) st.markdown("

", unsafe_allow_html=True) st.markdown(msg["content"]) st.markdown("

", unsafe_allow_html=True) for pj in (msg.get("plotly_jsons") or ([msg["plotly_json"]] if msg.get("plotly_json") else [])): render_plot(pj) if msg.get("usage"): render_cost_bar(msg["usage"]) if msg.get("tool_calls"): render_tool_calls(msg["tool_calls"]) if role == "assistant": render_export_buttons(msg["content"], msg.get("tool_calls") or [], i) # ── chat input ───────────────────────────────────────────────────────────── prefill = st.session_state["prefill"] question = st.chat_input("what do you want to know…") if prefill: st.session_state["prefill"] = "" effective_question = question or prefill if effective_question: question = effective_question backend_history = build_backend_history(st.session_state["turns"]) with st.chat_message("user"): st.markdown("

You

", unsafe_allow_html=True) st.markdown("

", unsafe_allow_html=True) st.markdown(question) st.markdown("

", unsafe_allow_html=True) with st.chat_message("assistant"): progress_slot = st.empty() try: result = call_agent_with_progress(question, backend_history, list(st.session_state["turns"]), progress_slot) except Exception as exc: err_str = str(exc) is_auth_err = ( type(exc).__name__ in ("AuthenticationError", "PermissionDeniedError") or "invalid x-api-key" in err_str.lower() or "401" in err_str ) if is_auth_err: os.environ.pop("ANTHROPIC_API_KEY", None) st.session_state.update(authenticated=False, logged_out=True) st.error("API key rejected — please re-enter it.") st.rerun() elif "rate_limit" in err_str.lower(): st.error("Rate limited. Wait a moment and try again.") elif "Unicode encoding error" in err_str or ("ascii" in err_str.lower() and "codec" in err_str.lower()): st.error("Encoding error — your API key may contain non-standard characters. Log out and re-enter it.") else: st.error(f"Something went wrong: {err_str[:300]}") st.stop() answer = result.get("answer", "") tool_calls = result.get("tool_calls", []) plotly_jsons = result.get("plotly_jsons") or ([result["plotly_json"]] if result.get("plotly_json") else []) route = result.get("route", "") usage = result.get("usage") or {} route_html = f"{route}" if route else "" st.markdown( f"

Answer{route_html}

", unsafe_allow_html=True, ) st.markdown("

", unsafe_allow_html=True) st.markdown(answer) st.markdown("

", unsafe_allow_html=True) for pj in plotly_jsons: render_plot(pj) if usage: render_cost_bar(usage) if tool_calls: render_tool_calls(tool_calls) render_export_buttons(answer, tool_calls, len(st.session_state["turns"])) turn = { "question": question, "answer": answer, "tool_calls": tool_calls, "plotly_jsons": plotly_jsons, "artifacts": extract_artifacts(tool_calls), "route": route, "usage": usage, } st.session_state["turns"].append(turn) st.session_state["history"] = build_backend_history(st.session_state["turns"]) st.session_state["chat"].append({"role": "user", "content": question}) st.session_state["chat"].append({ "role": "assistant", "content": answer, "tool_calls": tool_calls, "plotly_jsons": plotly_jsons, "route": route, "usage": usage, })