"""
Grasping Gooning — analysis agent UI
Run: streamlit run app.py (from /Users/binx/Desktop/Goon/)
"""
from __future__ import annotations
import inspect
import json
import os
import random
import sys
import threading
import time
from pathlib import Path
import pandas as pd
import plotly.io as pio
import streamlit as st
from dotenv import load_dotenv
# ── paths ──────────────────────────────────────────────────────────────────
ROOT = Path(__file__).parent
sys.path.insert(0, str(ROOT / "agent"))
load_dotenv(ROOT / "agent" / ".env")
from analysis import run_agent, list_datasets
# ── page config ────────────────────────────────────────────────────────────
st.set_page_config(
page_title="Grasping Gooning",
layout="wide",
initial_sidebar_state="expanded",
)
@st.cache_data(show_spinner=False)
def load_post_samples(n: int = 120) -> list[dict]:
"""Random sample of real post titles for the loading slideshow."""
try:
import pyarrow.dataset as _ds
_path = ROOT / "data" / "posts.parquet"
if not _path.exists():
return []
d = _ds.dataset(str(_path), format="parquet")
t = d.scanner(columns=["subreddit", "title"]).head(40000).to_pandas()
mask = (
t["title"].str.len() > 30
) & (
t["title"].str.len() < 180
) & (
~t["title"].str.lower().str.startswith("[")
)
sample = t[mask].sample(min(n, mask.sum()), random_state=None)
return sample[["subreddit", "title"]].to_dict(orient="records")
except Exception:
return []
LOADING_HINTS = [
"you're so close…",
"keep going…",
"deeper…",
"almost there…",
"don't stop now…",
"just a bit more…",
"stay with it…",
"right there…",
"edge of something…",
"hold on…",
"so close…",
"don't stop…",
]
# ── global CSS ─────────────────────────────────────────────────────────────
st.markdown("""
""", unsafe_allow_html=True)
# ── helpers ────────────────────────────────────────────────────────────────
def fmt(v: int | None) -> str:
return "n/a" if v is None else f"{v:,}"
def dataset_snapshot() -> dict:
try:
return list_datasets()
except Exception:
return {}
def render_plot(plotly_json: str) -> None:
try:
fig = pio.from_json(plotly_json)
fig.update_layout(
paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)",
font=dict(family="Arial, Helvetica, sans-serif", color="#222", size=12),
margin=dict(l=0, r=0, t=32, b=0),
colorway=["#000", "#555", "#888", "#bbb"],
xaxis=dict(gridcolor="#ebebeb", linecolor="#e0e0e0"),
yaxis=dict(gridcolor="#ebebeb", linecolor="#e0e0e0"),
)
st.plotly_chart(fig, use_container_width=True)
except Exception as exc:
st.warning(f"Chart could not be rendered: {exc}")
def compact_tool_result(result: object) -> dict:
if not isinstance(result, dict):
return {"value": result}
compact: dict = {"keys": sorted(result.keys())}
for key in ("saved_csv", "saved_png", "plotly_json", "error", "analysis", "dataset", "filters"):
if key in result and result.get(key) is not None:
compact[key] = result[key]
table = result.get("table")
if isinstance(table, list):
compact["table_rows"] = len(table)
compact["table_preview"] = table[:5]
return compact
def extract_artifacts(tool_calls: list[dict]) -> list[dict]:
artifacts: list[dict] = []
for tc in tool_calls:
result = tc.get("result") or {}
if not isinstance(result, dict):
continue
for key, atype in (("saved_csv", "csv"), ("saved_png", "png")):
if result.get(key):
artifacts.append({"type": atype, "tool": tc.get("tool", "?"), "path": result[key]})
if result.get("plotly_json"):
artifacts.append({"type": "plotly_json", "tool": tc.get("tool", "?"), "present": True})
return artifacts
def build_backend_history(turns: list[dict]) -> list[dict]:
history: list[dict] = []
for turn in turns:
history.append({"role": "user", "content": turn["question"]})
content = turn["answer"]
state = {
"tool_calls": [
{"tool": tc.get("tool"), "args": tc.get("args") or {},
"result": compact_tool_result(tc.get("result"))}
for tc in turn.get("tool_calls", [])
],
"artifacts": turn.get("artifacts", []),
"plotly_json": bool(turn.get("plotly_json")),
"route": turn.get("route"),
}
if state["tool_calls"] or state["artifacts"] or state["plotly_json"]:
content += f"\n\n\n{json.dumps(state, default=str, indent=2)}\n"
history.append({"role": "assistant", "content": content})
return history
def call_agent(question: str, history: list[dict], turns: list[dict]) -> dict:
kwargs = {"history": history}
params = inspect.signature(run_agent).parameters
for name in ("analysis_context", "conversation_state", "turns"):
if name in params:
kwargs[name] = turns
break
return run_agent(question, **kwargs)
_POST_SAMPLES: list[dict] = []
def call_agent_with_progress(question: str, backend_history: list[dict], turns: list[dict], slot) -> dict:
"""Run agent in a background thread; update a progress slot from the main thread."""
result_holder: dict = {}
exc_holder: dict = {}
def worker() -> None:
try:
result_holder["r"] = call_agent(question, backend_history, turns)
except Exception as e:
exc_holder["e"] = e
t = threading.Thread(target=worker, daemon=True)
t.start()
STUCK_MSGS = [
"i promise i'm still gooning",
"locked in. fully gooned. cannot stop.",
"the data is vast. the goon is deep. patience.",
"i've been edging this query for so long i've lost track of time.",
"every second is another row scanned. feel it.",
"this is what a true goon session looks like. no shortcuts.",
"i am one with the dataset. do not disturb.",
]
global _POST_SAMPLES
if not _POST_SAMPLES:
_POST_SAMPLES = load_post_samples()
posts = _POST_SAMPLES if _POST_SAMPLES else []
random.shuffle(posts)
pct = 0
idx = 0
post_idx = 0
start = time.time()
stuck_since: float | None = None
last_pct_change = time.time()
while t.is_alive():
prev_pct = pct
pct = min(pct + random.randint(1, 5), 93)
if pct != prev_pct:
stuck_since = None
last_pct_change = time.time()
else:
if stuck_since is None:
stuck_since = time.time()
elapsed = int(time.time() - start)
elapsed_str = f"{elapsed}s" if elapsed < 60 else f"{elapsed // 60}m {elapsed % 60}s"
stuck_sec = int(time.time() - stuck_since) if stuck_since else 0
hint = LOADING_HINTS[idx % len(LOADING_HINTS)]
# Build up stuck messages — one new line per 12s window, cleared when pct moves
n_stuck = min(stuck_sec // 12, len(STUCK_MSGS))
stuck_html = "".join(
f'
{STUCK_MSGS[i]}
'
for i in range(n_stuck)
)
# rotate post every ~11 ticks (~4 seconds)
if idx % 11 == 0 and idx > 0:
post_idx += 1
post_html = ""
if posts:
p = posts[post_idx % len(posts)]
sub = p.get("subreddit", "")
title = p.get("title", "").replace("<", "<").replace(">", ">")
post_html = (
f''
f'
r/{sub}
'
f'
{title}
'
f'
'
)
at_cap = pct >= 93
pct_display = "—%" if at_cap else f"{pct}%"
running_label = (
''
'still running'
if at_cap else ""
)
slot.markdown(
f''
f'
{hint}
'
f'
'
f'
{pct_display} · {elapsed_str}{running_label}
'
f'{stuck_html}'
f'{post_html}'
f'
',
unsafe_allow_html=True,
)
idx += 1
time.sleep(0.35)
t.join()
slot.empty()
if exc_holder:
raise exc_holder["e"]
return result_holder["r"]
def render_cost_bar(usage: dict) -> None:
cost = usage.get("cost_usd", 0)
inp = usage.get("input_tokens", 0)
out = usage.get("output_tokens", 0)
# scale: 0–$0.50 maps to 0–100% of bar
pct = min(cost / 0.50 * 100, 100)
if cost < 0.01:
val_str = f"< $0.01"
else:
val_str = f"${cost:.3f}"
tok_str = f"{inp:,} in · {out:,} out"
st.markdown(
f''
f'
cost'
f'
'
f'
{val_str}'
f'
{tok_str}'
f'
',
unsafe_allow_html=True,
)
def render_tool_calls(tool_calls: list[dict]) -> None:
n = len(tool_calls)
with st.expander(f"Method {n} step{'s' if n != 1 else ''}", expanded=False):
for i, tc in enumerate(tool_calls):
st.markdown(
f"Step {i+1} -> {tc.get('tool','?')}",
unsafe_allow_html=True,
)
if tc.get("args"):
st.json(tc["args"], expanded=False)
res = tc.get("result") or {}
if isinstance(res, dict):
if res.get("table"):
try:
st.dataframe(pd.DataFrame(res["table"]), use_container_width=True, hide_index=True)
except Exception:
pass
for key in ("saved_csv", "saved_png"):
if res.get(key):
st.markdown(f"-> {res[key]}", unsafe_allow_html=True)
if i < n - 1:
st.markdown("---")
def render_export_buttons(answer: str, tool_calls: list[dict], turn_idx: int) -> None:
artifacts = extract_artifacts(tool_calls)
csvs = [a["path"] for a in artifacts if a["type"] == "csv"]
pngs = [a["path"] for a in artifacts if a["type"] == "png"]
items: list[tuple[str, bytes, str, str]] = []
items.append(("answer.md", answer.encode("utf-8"), "text/markdown", f"answer_{turn_idx}.md"))
for path in csvs:
p = Path(path)
if p.exists():
items.append((p.name, p.read_bytes(), "text/csv", p.name))
for path in pngs:
p = Path(path)
if p.exists():
items.append((p.name, p.read_bytes(), "image/png", p.name))
cols = st.columns(len(items))
for col, (label, data, mime, fname) in zip(cols, items):
with col:
st.download_button(
label=label, data=data, file_name=fname, mime=mime,
key=f"dl_{turn_idx}_{fname}",
)
# ── session state ──────────────────────────────────────────────────────────
for key, default in [("history", []), ("chat", []), ("turns", []), ("prefill", ""), ("authenticated", False), ("logged_out", False)]:
if key not in st.session_state:
st.session_state[key] = default
# seed from env if already set (e.g. from .env file) — but not if user explicitly logged out
if not st.session_state["authenticated"] and not st.session_state["logged_out"] and os.environ.get("ANTHROPIC_API_KEY"):
st.session_state["authenticated"] = True
# ── dataset metadata ───────────────────────────────────────────────────────
meta = dataset_snapshot()
posts_rows = meta.get("posts", {}).get("rows")
comments_rows = meta.get("comments", {}).get("rows")
sub_count = len(meta.get("posts", {}).get("subreddits") or [])
latest_date = (meta.get("comments", {}).get("date_range") or {}).get("latest", "n/a")
# ── login gate ─────────────────────────────────────────────────────────────
if not st.session_state["authenticated"]:
st.markdown("""
Grasping Gooning
enter your Anthropic API key to continue
""", unsafe_allow_html=True)
col = st.columns([1, 2, 1])[1]
with col:
login_key = st.text_input(
"API key", type="password", placeholder="sk-ant-…",
label_visibility="collapsed",
)
if st.button("Enter ->", key="login_btn", use_container_width=True):
if login_key.strip():
ascii_key = login_key.encode("ascii", errors="ignore").decode("ascii")
os.environ["ANTHROPIC_API_KEY"] = ascii_key
st.session_state["authenticated"] = True
st.session_state["logged_out"] = False
st.rerun()
else:
st.error("Paste your API key above.")
st.stop()
# ── sidebar ────────────────────────────────────────────────────────────────
with st.sidebar:
st.markdown("""
Grasping Gooning
reddit data analysis agent
""", unsafe_allow_html=True)
st.markdown("",
unsafe_allow_html=True)
if st.button("Clear conversation", key="clear"):
st.session_state.update(history=[], chat=[], turns=[], prefill="")
st.rerun()
if st.button("Log out", key="logout"):
os.environ.pop("ANTHROPIC_API_KEY", None)
st.session_state.update(history=[], chat=[], turns=[], prefill="", authenticated=False, logged_out=True)
st.rerun()
# ── about (bottom of sidebar) ──────────────────────────────────────────
st.markdown("", unsafe_allow_html=True)
# ── chat history ───────────────────────────────────────────────────────────
for i, msg in enumerate(st.session_state["chat"]):
with st.chat_message(msg["role"]):
role = msg["role"]
route = msg.get("route", "")
label = "You" if role == "user" else "Answer"
route_html = f"{route}" if route and role == "assistant" else ""
st.markdown(
f"{label}{route_html}
",
unsafe_allow_html=True,
)
st.markdown("", unsafe_allow_html=True)
st.markdown(msg["content"])
st.markdown("
", unsafe_allow_html=True)
for pj in (msg.get("plotly_jsons") or ([msg["plotly_json"]] if msg.get("plotly_json") else [])):
render_plot(pj)
if msg.get("usage"):
render_cost_bar(msg["usage"])
if msg.get("tool_calls"):
render_tool_calls(msg["tool_calls"])
if role == "assistant":
render_export_buttons(msg["content"], msg.get("tool_calls") or [], i)
# ── chat input ─────────────────────────────────────────────────────────────
prefill = st.session_state["prefill"]
question = st.chat_input("what do you want to know…")
if prefill:
st.session_state["prefill"] = ""
effective_question = question or prefill
if effective_question:
question = effective_question
backend_history = build_backend_history(st.session_state["turns"])
with st.chat_message("user"):
st.markdown("You
",
unsafe_allow_html=True)
st.markdown("", unsafe_allow_html=True)
st.markdown(question)
st.markdown("
", unsafe_allow_html=True)
with st.chat_message("assistant"):
progress_slot = st.empty()
try:
result = call_agent_with_progress(question, backend_history, list(st.session_state["turns"]), progress_slot)
except Exception as exc:
err_str = str(exc)
is_auth_err = (
type(exc).__name__ in ("AuthenticationError", "PermissionDeniedError")
or "invalid x-api-key" in err_str.lower()
or "401" in err_str
)
if is_auth_err:
os.environ.pop("ANTHROPIC_API_KEY", None)
st.session_state.update(authenticated=False, logged_out=True)
st.error("API key rejected — please re-enter it.")
st.rerun()
elif "rate_limit" in err_str.lower():
st.error("Rate limited. Wait a moment and try again.")
elif "Unicode encoding error" in err_str or ("ascii" in err_str.lower() and "codec" in err_str.lower()):
st.error("Encoding error — your API key may contain non-standard characters. Log out and re-enter it.")
else:
st.error(f"Something went wrong: {err_str[:300]}")
st.stop()
answer = result.get("answer", "")
tool_calls = result.get("tool_calls", [])
plotly_jsons = result.get("plotly_jsons") or ([result["plotly_json"]] if result.get("plotly_json") else [])
route = result.get("route", "")
usage = result.get("usage") or {}
route_html = f"{route}" if route else ""
st.markdown(
f"Answer{route_html}
",
unsafe_allow_html=True,
)
st.markdown("", unsafe_allow_html=True)
st.markdown(answer)
st.markdown("
", unsafe_allow_html=True)
for pj in plotly_jsons:
render_plot(pj)
if usage:
render_cost_bar(usage)
if tool_calls:
render_tool_calls(tool_calls)
render_export_buttons(answer, tool_calls, len(st.session_state["turns"]))
turn = {
"question": question, "answer": answer,
"tool_calls": tool_calls, "plotly_jsons": plotly_jsons,
"artifacts": extract_artifacts(tool_calls), "route": route,
"usage": usage,
}
st.session_state["turns"].append(turn)
st.session_state["history"] = build_backend_history(st.session_state["turns"])
st.session_state["chat"].append({"role": "user", "content": question})
st.session_state["chat"].append({
"role": "assistant", "content": answer,
"tool_calls": tool_calls, "plotly_jsons": plotly_jsons,
"route": route, "usage": usage,
})