Spaces:
Running
Running
| import io | |
| import json | |
| import os | |
| import time | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| import numpy as np | |
| import pandas as pd | |
| import requests | |
| from flask import Flask, render_template, request, Response, session, redirect, url_for | |
| from werkzeug.middleware.proxy_fix import ProxyFix | |
| app = Flask(__name__) | |
| app.secret_key = os.environ.get("FLASK_SECRET", "otree-csv-viewer-default-key") | |
| app.config["SESSION_COOKIE_SAMESITE"] = "None" | |
| app.config["SESSION_COOKIE_SECURE"] = True | |
| app.config["SESSION_COOKIE_HTTPONLY"] = True | |
| ADMIN_PASSWORD = os.environ.get("ADMIN_PASSWORD", "bpel123") | |
| app.wsgi_app = ProxyFix(app.wsgi_app, x_proto=1, x_host=1) | |
| def require_login(): | |
| if request.endpoint in ("login", "static") or request.path == "/favicon.ico": | |
| return | |
| if not session.get("authenticated"): | |
| if request.path.startswith("/api/"): | |
| return Response("Unauthorized", status=401) | |
| return redirect(url_for("login")) | |
| def to_json(obj): | |
| """jsonify replacement that handles numpy types.""" | |
| def default(o): | |
| if isinstance(o, (np.integer,)): | |
| return int(o) | |
| if isinstance(o, (np.floating,)): | |
| if np.isnan(o): | |
| return None | |
| return float(o) | |
| if isinstance(o, np.ndarray): | |
| return o.tolist() | |
| raise TypeError(f"{type(o)} not serializable") | |
| data = json.dumps(obj, default=default) | |
| return Response(data, mimetype="application/json") | |
| CSV_PATH = os.path.join(os.path.dirname(__file__), "all_apps_wide-2026-03-31.csv") | |
| OTREE_URL = os.environ.get("OTREE_CSV_URL", "") | |
| # Identifier columns to always include alongside app_collect_results | |
| ID_COLS = [ | |
| "participant.id_in_session", | |
| "participant.code", | |
| "participant.label", | |
| "participant._current_app_name", | |
| "participant._current_page_name", | |
| "participant.treatment", | |
| "participant.payoff", | |
| "session.code", | |
| ] | |
| COLLECT_PREFIX = "app_collect_results." | |
| # In-memory cache so we don't re-read disk on every request | |
| _cache = {"df": None, "mtime": 0} | |
| def _get_df(): | |
| """Return the dataframe, re-reading from disk only if the file changed.""" | |
| try: | |
| mtime = os.path.getmtime(CSV_PATH) | |
| except OSError: | |
| mtime = 0 | |
| if _cache["df"] is None or mtime != _cache["mtime"]: | |
| _cache["df"] = pd.read_csv(CSV_PATH) | |
| _cache["mtime"] = mtime | |
| return _cache["df"] | |
| def load_csv(collect_only=True): | |
| df = _get_df().copy() | |
| if collect_only: | |
| collect_cols = [c for c in df.columns if c.startswith(COLLECT_PREFIX)] | |
| keep = [c for c in ID_COLS if c in df.columns] + collect_cols | |
| df = df[keep] | |
| return df | |
| def login(): | |
| error = None | |
| if request.method == "POST": | |
| if request.form.get("password") == ADMIN_PASSWORD: | |
| session["authenticated"] = True | |
| return redirect(url_for("index")) | |
| error = "Incorrect password" | |
| return render_template("login.html", error=error) | |
| def logout(): | |
| session.clear() | |
| return redirect(url_for("login")) | |
| def favicon(): | |
| return redirect(url_for("static", filename="favicon.svg"), code=302) | |
| def index(): | |
| return render_template("index.html") | |
| def api_data(): | |
| collect_only = request.args.get("collect_only", "1") == "1" | |
| df = load_csv(collect_only=collect_only) | |
| display_cols = [] | |
| for c in df.columns: | |
| if c.startswith(COLLECT_PREFIX): | |
| parts = c.split(".") | |
| display_cols.append(parts[-1]) | |
| elif c.startswith("participant."): | |
| display_cols.append(c.replace("participant.", "p.")) | |
| elif c.startswith("session."): | |
| display_cols.append(c.replace("session.", "s.")) | |
| else: | |
| display_cols.append(c) | |
| return to_json({ | |
| "columns": display_cols, | |
| "raw_columns": list(df.columns), | |
| "rows": df.fillna("").values.tolist(), | |
| "total": len(df), | |
| }) | |
| def fetch_otree(): | |
| """Pull fresh CSV from oTree and overwrite the local file.""" | |
| try: | |
| resp = requests.get(OTREE_URL, timeout=30) | |
| resp.raise_for_status() | |
| # Validate it's actually CSV | |
| df = pd.read_csv(io.StringIO(resp.text)) | |
| # Save to disk | |
| df.to_csv(CSV_PATH, index=False) | |
| # Bust cache | |
| _cache["df"] = df | |
| _cache["mtime"] = os.path.getmtime(CSV_PATH) | |
| return to_json({"ok": True, "rows": len(df), "cols": len(df.columns)}) | |
| except Exception as e: | |
| return to_json({"ok": False, "error": str(e)}), 502 | |
| def upload_csv(): | |
| """Accept an uploaded CSV file to replace the current dataset.""" | |
| f = request.files.get("file") | |
| if not f: | |
| return to_json({"ok": False, "error": "No file uploaded"}), 400 | |
| try: | |
| df = pd.read_csv(f.stream) | |
| df.to_csv(CSV_PATH, index=False) | |
| _cache["df"] = df | |
| _cache["mtime"] = os.path.getmtime(CSV_PATH) | |
| return to_json({"ok": True, "rows": len(df), "cols": len(df.columns)}) | |
| except Exception as e: | |
| return to_json({"ok": False, "error": str(e)}), 400 | |
| def api_payments(): | |
| """Return just PC_id, total_bonus, participant_session_id for the payments page.""" | |
| df = _get_df() | |
| col_map = { | |
| "app_collect_results.1.player.PC_id_manual_input": "PC_id", | |
| "app_collect_results.1.player.total_bonus": "total_bonus", | |
| "app_collect_results.1.player.participant_session_id": "participant_session_id", | |
| } | |
| keep = [c for c in col_map if c in df.columns] | |
| sub = df[keep].rename(columns=col_map).fillna("") | |
| # Distinct session ids sorted alphabetically | |
| session_ids = sorted(set(str(v) for v in sub["participant_session_id"] if v != "")) | |
| return to_json({ | |
| "columns": [col_map[c] for c in keep], | |
| "rows": sub.values.tolist(), | |
| "session_ids": session_ids, | |
| }) | |
| def api_stats(): | |
| """Session-level stats dashboard data.""" | |
| df = _get_df().copy() | |
| sid_col = "app_collect_results.1.player.participant_session_id" | |
| app_col = "participant._current_app_name" | |
| page_col = "participant._current_page_name" | |
| bot_col = "participant._is_bot" | |
| orphan_col = "app_collect_results.1.player.was_orphan" | |
| dropout_col = "participant.midgame_dropout" | |
| timeout_col = "participant.timed_out_from_coord_games" | |
| duration_col = "participant.completion_duration_time" | |
| treatment_col = "participant.treatment" | |
| # Only rows with a non-empty participant_session_id = "completed" | |
| df["_sid"] = df[sid_col].fillna("") | |
| completed = df[df["_sid"] != ""] | |
| incomplete = df[df["_sid"] == ""] | |
| # App sequence (derived from column order) | |
| app_order = [] | |
| for c in df.columns: | |
| parts = c.split(".") | |
| if len(parts) > 1 and parts[0] not in ("participant", "session") and parts[0] not in app_order: | |
| app_order.append(parts[0]) | |
| # ---- Global summary ---- | |
| total = len(df) | |
| n_completed = len(completed) | |
| n_incomplete = len(incomplete) | |
| durations = pd.to_numeric(completed[duration_col], errors="coerce").dropna() | |
| global_summary = { | |
| "total_rows": total, | |
| "completed": n_completed, | |
| "incomplete": n_incomplete, | |
| "completion_rate": round(n_completed / total * 100, 1) if total else 0, | |
| "orphans": int((df[orphan_col] == 1).sum()) if orphan_col in df.columns else 0, | |
| "dropouts": int((pd.to_numeric(df[dropout_col], errors="coerce") == 1).sum()) if dropout_col in df.columns else 0, | |
| "timed_out": int((pd.to_numeric(df[timeout_col], errors="coerce") == 1).sum()) if timeout_col in df.columns else 0, | |
| "duration_median": round(durations.median(), 1) if len(durations) else None, | |
| "duration_mean": round(durations.mean(), 1) if len(durations) else None, | |
| "duration_min": round(durations.min(), 1) if len(durations) else None, | |
| "duration_max": round(durations.max(), 1) if len(durations) else None, | |
| } | |
| # ---- Per-session breakdown ---- | |
| session_ids = sorted(completed["_sid"].unique()) | |
| per_session = [] | |
| for sid in session_ids: | |
| s = completed[completed["_sid"] == sid] | |
| s_dur = pd.to_numeric(s[duration_col], errors="coerce").dropna() | |
| per_session.append({ | |
| "session_id": sid, | |
| "n": len(s), | |
| "orphans": int((s[orphan_col] == 1).sum()) if orphan_col in s.columns else 0, | |
| "duration_median": round(s_dur.median(), 1) if len(s_dur) else None, | |
| "duration_mean": round(s_dur.mean(), 1) if len(s_dur) else None, | |
| "treatments": dict(s[treatment_col].fillna("(none)").value_counts()), | |
| }) | |
| # ---- Current app/page funnel (where are people RIGHT NOW) ---- | |
| app_page = df[[app_col, page_col, "_sid"]].fillna("(empty)") | |
| funnel = [] | |
| for a in app_order: | |
| sub = app_page[app_page[app_col] == a] | |
| if len(sub) == 0: | |
| continue | |
| pages = dict(sub[page_col].value_counts()) | |
| funnel.append({"app": a, "count": len(sub), "pages": pages}) | |
| # Also count those whose current_app is empty | |
| empty_app = app_page[app_page[app_col] == "(empty)"] | |
| if len(empty_app): | |
| funnel.append({"app": "(no app)", "count": len(empty_app), "pages": {}}) | |
| # ---- Completed players: which app they finished at (current_app) ---- | |
| completed_funnel = [] | |
| for a in app_order: | |
| n = int((completed[app_col] == a).sum()) | |
| if n: | |
| completed_funnel.append({"app": a, "count": n}) | |
| return to_json({ | |
| "global": global_summary, | |
| "per_session": per_session, | |
| "funnel": funnel, | |
| "completed_funnel": completed_funnel, | |
| "app_order": app_order, | |
| }) | |
| def api_signal(): | |
| """Signal game analysis for completed participants.""" | |
| df = _get_df() | |
| sid_col = "app_collect_results.1.player.participant_session_id" | |
| treat_col = "signal_game.1.group.treatment" | |
| buys_col = "signal_game.1.player.buys_signal" | |
| color_col = "participant.signal_color_choice" | |
| skip_col = "participant.skip_signal_game" | |
| intend_col = "signal_game.1.player.intends_to_buy" | |
| intend_count_col = "signal_game.1.group.intend_count" | |
| group_id_col = "signal_game.1.group.id_in_subsession" | |
| success_col = "signal_game.1.group.group_success" | |
| beliefs_col = "signal_game.1.player.beliefs_truthful" | |
| reason_col = "signal_game.1.player.purchase_reason" | |
| guess_col = "signal_game.1.player.guess_correct" | |
| psid_col = "app_collect_results.1.player.participant_session_id" | |
| # Filter: completed participants (non-empty session letter) | |
| completed = df[df[sid_col].notna()].copy() | |
| total_completed = len(completed) | |
| # Played signal game = did not skip (skip == 0 or NaN with color present) | |
| played = completed[completed[color_col].notna()].copy() | |
| skipped = completed[~completed.index.isin(played.index)] | |
| total_played = len(played) | |
| total_skipped = len(skipped) | |
| # Treatment labels | |
| played["_treat"] = played[treat_col].map({0.0: "Control (0)", 1.0: "Treatment (1)"}).fillna("Unknown") | |
| # ---- Treatment distribution ---- | |
| treat_dist = dict(played["_treat"].value_counts()) | |
| # ---- Buy rate by treatment ---- | |
| buy_by_treat = {} | |
| for treat_name, group in played.groupby("_treat"): | |
| n = len(group) | |
| bought = int((group[buys_col] == 1).sum()) | |
| buy_by_treat[treat_name] = { | |
| "n": n, | |
| "bought": bought, | |
| "pct": round(bought / n * 100, 1) if n else 0, | |
| } | |
| # ---- Color distribution by treatment ---- | |
| color_by_treat = {} | |
| for treat_name, group in played.groupby("_treat"): | |
| colors = dict(group[color_col].value_counts()) | |
| color_by_treat[treat_name] = colors | |
| # ---- Overall color distribution ---- | |
| overall_colors = dict(played[color_col].value_counts()) | |
| # ---- Overall buy rate ---- | |
| total_bought = int((played[buys_col] == 1).sum()) | |
| overall_buy_pct = round(total_bought / total_played * 100, 1) if total_played else 0 | |
| # ---- Intend to buy (among those who have the field) ---- | |
| intend_df = played[played[intend_col].notna()] | |
| intend_yes = int((intend_df[intend_col] == 1).sum()) | |
| intend_no = int((intend_df[intend_col] == 0).sum()) | |
| # ---- Group success by treatment ---- | |
| success_by_treat = {} | |
| for treat_name, group in played.groupby("_treat"): | |
| n = len(group) | |
| succ = int((group[success_col] == 1).sum()) | |
| success_by_treat[treat_name] = { | |
| "n": n, | |
| "success": succ, | |
| "pct": round(succ / n * 100, 1) if n else 0, | |
| } | |
| # ---- Success rate by group intend_count (treatment only) ---- | |
| treat_played = played[played["_treat"] == "Treatment (1)"] | |
| treat_groups = treat_played.drop_duplicates(subset=[group_id_col]) | |
| treat_groups = treat_groups[treat_groups[intend_count_col].notna()] | |
| success_by_intend = {} | |
| for ic, g in treat_groups.groupby(intend_count_col): | |
| n = len(g) | |
| succ = int((g[success_col] == 1).sum()) | |
| success_by_intend[str(int(ic))] = { | |
| "n_groups": n, | |
| "success": succ, | |
| "pct": round(succ / n * 100, 1) if n else 0, | |
| } | |
| # ---- Buy rate by others' intent count (treatment only) ---- | |
| tp = treat_played[treat_played[intend_count_col].notna() & treat_played[intend_col].notna()].copy() | |
| tp["_others_intend"] = tp[intend_count_col] - tp[intend_col] | |
| buy_by_others_intend = {} | |
| for oi, g in tp.groupby("_others_intend"): | |
| n = len(g) | |
| bought = int((g[buys_col] == 1).sum()) | |
| buy_by_others_intend[str(int(oi))] = { | |
| "n": n, | |
| "bought": bought, | |
| "pct": round(bought / n * 100, 1) if n else 0, | |
| } | |
| # ---- Buy rate by group intend_count (treatment only, per player) ---- | |
| buy_by_group_intend = {} | |
| for ic, g in tp.groupby(intend_count_col): | |
| n = len(g) | |
| bought = int((g[buys_col] == 1).sum()) | |
| buy_by_group_intend[str(int(ic))] = { | |
| "n": n, | |
| "bought": bought, | |
| "pct": round(bought / n * 100, 1) if n else 0, | |
| } | |
| # ---- Beliefs distribution (among those who answered) ---- | |
| beliefs_df = played[played[beliefs_col].notna()] | |
| beliefs_dist = {str(int(k)): int(v) for k, v in beliefs_df[beliefs_col].value_counts().items()} | |
| # ---- By session letter ---- | |
| per_session = [] | |
| for sid in sorted(played[psid_col].unique()): | |
| s = played[played[psid_col] == sid] | |
| n = len(s) | |
| bought = int((s[buys_col] == 1).sum()) | |
| treats = dict(s["_treat"].value_counts()) | |
| colors = dict(s[color_col].value_counts()) | |
| per_session.append({ | |
| "session_id": sid, | |
| "n": n, | |
| "bought": bought, | |
| "buy_pct": round(bought / n * 100, 1) if n else 0, | |
| "treatments": treats, | |
| "colors": colors, | |
| }) | |
| return to_json({ | |
| "total_completed": total_completed, | |
| "total_played": total_played, | |
| "total_skipped": total_skipped, | |
| "treat_dist": treat_dist, | |
| "buy_by_treat": buy_by_treat, | |
| "color_by_treat": color_by_treat, | |
| "overall_colors": overall_colors, | |
| "overall_buy_pct": overall_buy_pct, | |
| "total_bought": total_bought, | |
| "intend_yes": intend_yes, | |
| "intend_no": intend_no, | |
| "success_by_treat": success_by_treat, | |
| "success_by_intend": success_by_intend, | |
| "buy_by_others_intend": buy_by_others_intend, | |
| "buy_by_group_intend": buy_by_group_intend, | |
| "beliefs_dist": beliefs_dist, | |
| "per_session": per_session, | |
| }) | |
| def api_coop_games(): | |
| """Combined Prisoner's Dilemma + Stag Hunt dashboard data.""" | |
| df = _get_df() | |
| sid_col = "app_collect_results.1.player.participant_session_id" | |
| def analyze_game(prefix): | |
| treat_col = f"{prefix}.1.player.treatment_cond" | |
| coop_col = f"{prefix}.1.player.cooperate" | |
| bot_col = f"{prefix}.1.player.is_bot" | |
| payoff_col = f"{prefix}.1.player.payoff" | |
| msg_col = f"{prefix}.1.player.messages" | |
| comp_col = f"{prefix}.1.player.comprehension_passed" | |
| gpwait_col = f"{prefix}.1.player.time_spent_gpwait" | |
| instr_col = f"{prefix}.1.player.time_spent_game_instr_page" | |
| group_col = f"{prefix}.1.player.persistent_group_id" | |
| # Filter: non-NaN treatment_cond | |
| played = df[df[treat_col].notna()].copy() | |
| n = len(played) | |
| # Treatment distribution | |
| treat_dist = dict(played[treat_col].value_counts()) | |
| # Overall cooperation | |
| n_coop = int((played[coop_col] == 1).sum()) | |
| n_defect = int((played[coop_col] == 0).sum()) | |
| coop_rate = round(n_coop / n * 100, 1) if n else 0 | |
| # Cooperation by treatment | |
| coop_by_treat = {} | |
| for t, g in played.groupby(treat_col): | |
| gn = len(g) | |
| gc = int((g[coop_col] == 1).sum()) | |
| coop_by_treat[t] = {"n": gn, "coop": gc, "pct": round(gc / gn * 100, 1) if gn else 0} | |
| # Human vs bot cooperation by treatment (for bot conditions) | |
| human_bot_coop = {} | |
| for t, g in played.groupby(treat_col): | |
| if bot_col in g.columns and g[bot_col].sum() > 0: | |
| humans = g[g[bot_col] == 0] | |
| bots = g[g[bot_col] == 1] | |
| hc = int((humans[coop_col] == 1).sum()) | |
| bc = int((bots[coop_col] == 1).sum()) | |
| human_bot_coop[t] = { | |
| "human_n": len(humans), "human_coop": hc, | |
| "human_pct": round(hc / len(humans) * 100, 1) if len(humans) else 0, | |
| "bot_n": len(bots), "bot_coop": bc, | |
| "bot_pct": round(bc / len(bots) * 100, 1) if len(bots) else 0, | |
| } | |
| # Bots vs humans overall | |
| n_bots = int((played[bot_col] == 1).sum()) if bot_col in played.columns else 0 | |
| n_humans = n - n_bots | |
| # Payoff distribution | |
| payoff_dist = {} | |
| for t, g in played.groupby(treat_col): | |
| vals = pd.to_numeric(g[payoff_col], errors="coerce").dropna() | |
| payoff_dist[t] = { | |
| "mean": round(vals.mean(), 2) if len(vals) else None, | |
| "values": dict(vals.value_counts().sort_index()), | |
| } | |
| # Messages: how many non-empty per condition | |
| msg_by_treat = {} | |
| if msg_col in played.columns: | |
| for t, g in played.groupby(treat_col): | |
| msgs = g[msg_col].fillna("") | |
| non_empty = int((msgs.str.len() > 0).sum()) | |
| msg_by_treat[t] = {"n": len(g), "with_msg": non_empty} | |
| # Comprehension | |
| comp_passed = int((played[comp_col] == 1).sum()) if comp_col in played.columns else n | |
| comp_failed = n - comp_passed | |
| # Per-session | |
| per_session = [] | |
| for sid in sorted(played[sid_col].dropna().unique()): | |
| s = played[played[sid_col] == sid] | |
| sn = len(s) | |
| sc = int((s[coop_col] == 1).sum()) | |
| treats = {} | |
| for t, g in s.groupby(treat_col): | |
| gn = len(g) | |
| gc = int((g[coop_col] == 1).sum()) | |
| treats[t] = {"n": gn, "coop": gc, "pct": round(gc / gn * 100, 1) if gn else 0} | |
| per_session.append({ | |
| "session_id": sid, | |
| "n": sn, | |
| "coop": sc, | |
| "coop_pct": round(sc / sn * 100, 1) if sn else 0, | |
| "treatments": treats, | |
| }) | |
| return { | |
| "n": n, | |
| "n_humans": n_humans, | |
| "n_bots": n_bots, | |
| "n_coop": n_coop, | |
| "n_defect": n_defect, | |
| "coop_rate": coop_rate, | |
| "treat_dist": treat_dist, | |
| "coop_by_treat": coop_by_treat, | |
| "human_bot_coop": human_bot_coop, | |
| "payoff_dist": payoff_dist, | |
| "msg_by_treat": msg_by_treat, | |
| "comp_passed": comp_passed, | |
| "comp_failed": comp_failed, | |
| "per_session": per_session, | |
| } | |
| prisoner = analyze_game("app_prisoner") | |
| stag = analyze_game("app_stag") | |
| # Condition labels for the frontend | |
| cond_labels = { | |
| "condition_1": "Human + Chat", | |
| "condition_2": "Human + No Chat", | |
| "condition_3": "Bot + No Chat", | |
| "condition_4": "Bot + Chat", | |
| } | |
| return to_json({ | |
| "prisoner": prisoner, | |
| "stag": stag, | |
| "cond_labels": cond_labels, | |
| }) | |
| def api_session_diagnostic(): | |
| """Detect mistyped session letters using time-bucket analysis.""" | |
| df = _get_df() | |
| letter_col = "app_consent_consolidated.1.player.session_letter" | |
| sid_col = "app_collect_results.1.player.participant_session_id" | |
| time_col = "participant.time_started_utc" | |
| code_col = "participant.code" | |
| color_col = "participant.signal_color_choice" | |
| group_col = "signal_game.1.group.id_in_subsession" | |
| treat_col = "signal_game.1.group.treatment" | |
| has_letter = df[df[letter_col].notna()].copy() | |
| has_letter["_time"] = pd.to_datetime(has_letter[time_col], utc=True).dt.tz_convert("America/New_York") | |
| has_letter["_bucket"] = has_letter["_time"].dt.floor("10min") | |
| canonical = {} | |
| for letter in sorted(has_letter[letter_col].unique()): | |
| s = has_letter[has_letter[letter_col] == letter] | |
| mode_bucket = s["_bucket"].mode().iloc[0] | |
| canonical[letter] = mode_bucket + pd.Timedelta(minutes=5) | |
| sessions = [] | |
| for letter in sorted(has_letter[letter_col].unique()): | |
| s = has_letter[has_letter[letter_col] == letter] | |
| completed = s[s[sid_col].notna()] | |
| played_signal = s[s[color_col].notna()] | |
| n_total = len(s) | |
| n_completed = len(completed) | |
| n_signal = len(played_signal) | |
| sessions.append({ | |
| "letter": letter, | |
| "canonical_bucket": canonical[letter].strftime("%Y-%m-%d %H:%M"), | |
| "n_total": n_total, | |
| "n_completed": n_completed, | |
| "n_signal_played": n_signal, | |
| "mod7": n_signal % 7, | |
| "time_min": s["_time"].min().strftime("%Y-%m-%d %H:%M:%S"), | |
| "time_max": s["_time"].max().strftime("%Y-%m-%d %H:%M:%S"), | |
| }) | |
| flagged = [] | |
| letters = sorted(canonical.keys()) | |
| for _, row in has_letter.iterrows(): | |
| typed = row[letter_col] | |
| t = row["_time"] | |
| if pd.isna(t): | |
| continue | |
| dists = {l: abs((t - canonical[l]).total_seconds()) for l in letters} | |
| nearest = min(dists, key=dists.get) | |
| if nearest != typed: | |
| flagged.append({ | |
| "code": row[code_col], | |
| "typed_letter": typed, | |
| "suggested_letter": nearest, | |
| "timestamp": t.strftime("%Y-%m-%d %H:%M:%S"), | |
| "dist_own_min": round(dists[typed] / 60, 1), | |
| "dist_suggested_min": round(dists[nearest] / 60, 1), | |
| }) | |
| played = has_letter[has_letter[group_col].notna()].copy() | |
| split_groups = [] | |
| if len(played): | |
| for gid, g in played.groupby(group_col): | |
| letter_counts = g[letter_col].value_counts().to_dict() | |
| if len(letter_counts) > 1: | |
| treat_val = g[treat_col].iloc[0] | |
| split_groups.append({ | |
| "group_id": int(gid), | |
| "treatment": int(treat_val) if pd.notna(treat_val) else None, | |
| "size": len(g), | |
| "letters": {str(k): int(v) for k, v in letter_counts.items()}, | |
| }) | |
| bucket_detail = {} | |
| for letter in sorted(has_letter[letter_col].unique()): | |
| s = has_letter[has_letter[letter_col] == letter] | |
| counts = s["_bucket"].value_counts().sort_index() | |
| bucket_detail[letter] = [ | |
| {"bucket": b.strftime("%Y-%m-%d %H:%M"), "count": int(c)} | |
| for b, c in counts.items() | |
| ] | |
| return to_json({ | |
| "sessions": sessions, | |
| "flagged": flagged, | |
| "split_groups": split_groups, | |
| "bucket_detail": bucket_detail, | |
| "canonical": {l: v.strftime("%Y-%m-%d %H:%M") for l, v in canonical.items()}, | |
| }) | |
| def api_columns(): | |
| df = _get_df() | |
| groups = {} | |
| for c in df.columns: | |
| prefix = c.split(".")[0] | |
| groups.setdefault(prefix, []).append(c) | |
| return to_json(groups) | |
| if __name__ == "__main__": | |
| app.run(host="0.0.0.0", port=7860) | |