import io import json import os import time from dotenv import load_dotenv load_dotenv() import numpy as np import pandas as pd import requests from flask import Flask, render_template, request, Response, session, redirect, url_for from werkzeug.middleware.proxy_fix import ProxyFix app = Flask(__name__) app.secret_key = os.environ.get("FLASK_SECRET", "otree-csv-viewer-default-key") app.config["SESSION_COOKIE_SAMESITE"] = "None" app.config["SESSION_COOKIE_SECURE"] = True app.config["SESSION_COOKIE_HTTPONLY"] = True ADMIN_PASSWORD = os.environ.get("ADMIN_PASSWORD", "bpel123") app.wsgi_app = ProxyFix(app.wsgi_app, x_proto=1, x_host=1) @app.before_request def require_login(): if request.endpoint in ("login", "static") or request.path == "/favicon.ico": return if not session.get("authenticated"): if request.path.startswith("/api/"): return Response("Unauthorized", status=401) return redirect(url_for("login")) def to_json(obj): """jsonify replacement that handles numpy types.""" def default(o): if isinstance(o, (np.integer,)): return int(o) if isinstance(o, (np.floating,)): if np.isnan(o): return None return float(o) if isinstance(o, np.ndarray): return o.tolist() raise TypeError(f"{type(o)} not serializable") data = json.dumps(obj, default=default) return Response(data, mimetype="application/json") CSV_PATH = os.path.join(os.path.dirname(__file__), "all_apps_wide-2026-03-31.csv") OTREE_URL = os.environ.get("OTREE_CSV_URL", "") # Identifier columns to always include alongside app_collect_results ID_COLS = [ "participant.id_in_session", "participant.code", "participant.label", "participant._current_app_name", "participant._current_page_name", "participant.treatment", "participant.payoff", "session.code", ] COLLECT_PREFIX = "app_collect_results." # In-memory cache so we don't re-read disk on every request _cache = {"df": None, "mtime": 0} def _get_df(): """Return the dataframe, re-reading from disk only if the file changed.""" try: mtime = os.path.getmtime(CSV_PATH) except OSError: mtime = 0 if _cache["df"] is None or mtime != _cache["mtime"]: _cache["df"] = pd.read_csv(CSV_PATH) _cache["mtime"] = mtime return _cache["df"] def load_csv(collect_only=True): df = _get_df().copy() if collect_only: collect_cols = [c for c in df.columns if c.startswith(COLLECT_PREFIX)] keep = [c for c in ID_COLS if c in df.columns] + collect_cols df = df[keep] return df @app.route("/login", methods=["GET", "POST"]) def login(): error = None if request.method == "POST": if request.form.get("password") == ADMIN_PASSWORD: session["authenticated"] = True return redirect(url_for("index")) error = "Incorrect password" return render_template("login.html", error=error) @app.route("/logout") def logout(): session.clear() return redirect(url_for("login")) @app.route("/favicon.ico") def favicon(): return redirect(url_for("static", filename="favicon.svg"), code=302) @app.route("/") def index(): return render_template("index.html") @app.route("/api/data") def api_data(): collect_only = request.args.get("collect_only", "1") == "1" df = load_csv(collect_only=collect_only) display_cols = [] for c in df.columns: if c.startswith(COLLECT_PREFIX): parts = c.split(".") display_cols.append(parts[-1]) elif c.startswith("participant."): display_cols.append(c.replace("participant.", "p.")) elif c.startswith("session."): display_cols.append(c.replace("session.", "s.")) else: display_cols.append(c) return to_json({ "columns": display_cols, "raw_columns": list(df.columns), "rows": df.fillna("").values.tolist(), "total": len(df), }) @app.route("/api/fetch-otree", methods=["POST"]) def fetch_otree(): """Pull fresh CSV from oTree and overwrite the local file.""" try: resp = requests.get(OTREE_URL, timeout=30) resp.raise_for_status() # Validate it's actually CSV df = pd.read_csv(io.StringIO(resp.text)) # Save to disk df.to_csv(CSV_PATH, index=False) # Bust cache _cache["df"] = df _cache["mtime"] = os.path.getmtime(CSV_PATH) return to_json({"ok": True, "rows": len(df), "cols": len(df.columns)}) except Exception as e: return to_json({"ok": False, "error": str(e)}), 502 @app.route("/api/upload", methods=["POST"]) def upload_csv(): """Accept an uploaded CSV file to replace the current dataset.""" f = request.files.get("file") if not f: return to_json({"ok": False, "error": "No file uploaded"}), 400 try: df = pd.read_csv(f.stream) df.to_csv(CSV_PATH, index=False) _cache["df"] = df _cache["mtime"] = os.path.getmtime(CSV_PATH) return to_json({"ok": True, "rows": len(df), "cols": len(df.columns)}) except Exception as e: return to_json({"ok": False, "error": str(e)}), 400 @app.route("/api/payments") def api_payments(): """Return just PC_id, total_bonus, participant_session_id for the payments page.""" df = _get_df() col_map = { "app_collect_results.1.player.PC_id_manual_input": "PC_id", "app_collect_results.1.player.total_bonus": "total_bonus", "app_collect_results.1.player.participant_session_id": "participant_session_id", } keep = [c for c in col_map if c in df.columns] sub = df[keep].rename(columns=col_map).fillna("") # Distinct session ids sorted alphabetically session_ids = sorted(set(str(v) for v in sub["participant_session_id"] if v != "")) return to_json({ "columns": [col_map[c] for c in keep], "rows": sub.values.tolist(), "session_ids": session_ids, }) @app.route("/api/stats") def api_stats(): """Session-level stats dashboard data.""" df = _get_df().copy() sid_col = "app_collect_results.1.player.participant_session_id" app_col = "participant._current_app_name" page_col = "participant._current_page_name" bot_col = "participant._is_bot" orphan_col = "app_collect_results.1.player.was_orphan" dropout_col = "participant.midgame_dropout" timeout_col = "participant.timed_out_from_coord_games" duration_col = "participant.completion_duration_time" treatment_col = "participant.treatment" # Only rows with a non-empty participant_session_id = "completed" df["_sid"] = df[sid_col].fillna("") completed = df[df["_sid"] != ""] incomplete = df[df["_sid"] == ""] # App sequence (derived from column order) app_order = [] for c in df.columns: parts = c.split(".") if len(parts) > 1 and parts[0] not in ("participant", "session") and parts[0] not in app_order: app_order.append(parts[0]) # ---- Global summary ---- total = len(df) n_completed = len(completed) n_incomplete = len(incomplete) durations = pd.to_numeric(completed[duration_col], errors="coerce").dropna() global_summary = { "total_rows": total, "completed": n_completed, "incomplete": n_incomplete, "completion_rate": round(n_completed / total * 100, 1) if total else 0, "orphans": int((df[orphan_col] == 1).sum()) if orphan_col in df.columns else 0, "dropouts": int((pd.to_numeric(df[dropout_col], errors="coerce") == 1).sum()) if dropout_col in df.columns else 0, "timed_out": int((pd.to_numeric(df[timeout_col], errors="coerce") == 1).sum()) if timeout_col in df.columns else 0, "duration_median": round(durations.median(), 1) if len(durations) else None, "duration_mean": round(durations.mean(), 1) if len(durations) else None, "duration_min": round(durations.min(), 1) if len(durations) else None, "duration_max": round(durations.max(), 1) if len(durations) else None, } # ---- Per-session breakdown ---- session_ids = sorted(completed["_sid"].unique()) per_session = [] for sid in session_ids: s = completed[completed["_sid"] == sid] s_dur = pd.to_numeric(s[duration_col], errors="coerce").dropna() per_session.append({ "session_id": sid, "n": len(s), "orphans": int((s[orphan_col] == 1).sum()) if orphan_col in s.columns else 0, "duration_median": round(s_dur.median(), 1) if len(s_dur) else None, "duration_mean": round(s_dur.mean(), 1) if len(s_dur) else None, "treatments": dict(s[treatment_col].fillna("(none)").value_counts()), }) # ---- Current app/page funnel (where are people RIGHT NOW) ---- app_page = df[[app_col, page_col, "_sid"]].fillna("(empty)") funnel = [] for a in app_order: sub = app_page[app_page[app_col] == a] if len(sub) == 0: continue pages = dict(sub[page_col].value_counts()) funnel.append({"app": a, "count": len(sub), "pages": pages}) # Also count those whose current_app is empty empty_app = app_page[app_page[app_col] == "(empty)"] if len(empty_app): funnel.append({"app": "(no app)", "count": len(empty_app), "pages": {}}) # ---- Completed players: which app they finished at (current_app) ---- completed_funnel = [] for a in app_order: n = int((completed[app_col] == a).sum()) if n: completed_funnel.append({"app": a, "count": n}) return to_json({ "global": global_summary, "per_session": per_session, "funnel": funnel, "completed_funnel": completed_funnel, "app_order": app_order, }) @app.route("/api/signal") def api_signal(): """Signal game analysis for completed participants.""" df = _get_df() sid_col = "app_collect_results.1.player.participant_session_id" treat_col = "signal_game.1.group.treatment" buys_col = "signal_game.1.player.buys_signal" color_col = "participant.signal_color_choice" skip_col = "participant.skip_signal_game" intend_col = "signal_game.1.player.intends_to_buy" intend_count_col = "signal_game.1.group.intend_count" group_id_col = "signal_game.1.group.id_in_subsession" success_col = "signal_game.1.group.group_success" beliefs_col = "signal_game.1.player.beliefs_truthful" reason_col = "signal_game.1.player.purchase_reason" guess_col = "signal_game.1.player.guess_correct" psid_col = "app_collect_results.1.player.participant_session_id" # Filter: completed participants (non-empty session letter) completed = df[df[sid_col].notna()].copy() total_completed = len(completed) # Played signal game = did not skip (skip == 0 or NaN with color present) played = completed[completed[color_col].notna()].copy() skipped = completed[~completed.index.isin(played.index)] total_played = len(played) total_skipped = len(skipped) # Treatment labels played["_treat"] = played[treat_col].map({0.0: "Control (0)", 1.0: "Treatment (1)"}).fillna("Unknown") # ---- Treatment distribution ---- treat_dist = dict(played["_treat"].value_counts()) # ---- Buy rate by treatment ---- buy_by_treat = {} for treat_name, group in played.groupby("_treat"): n = len(group) bought = int((group[buys_col] == 1).sum()) buy_by_treat[treat_name] = { "n": n, "bought": bought, "pct": round(bought / n * 100, 1) if n else 0, } # ---- Color distribution by treatment ---- color_by_treat = {} for treat_name, group in played.groupby("_treat"): colors = dict(group[color_col].value_counts()) color_by_treat[treat_name] = colors # ---- Overall color distribution ---- overall_colors = dict(played[color_col].value_counts()) # ---- Overall buy rate ---- total_bought = int((played[buys_col] == 1).sum()) overall_buy_pct = round(total_bought / total_played * 100, 1) if total_played else 0 # ---- Intend to buy (among those who have the field) ---- intend_df = played[played[intend_col].notna()] intend_yes = int((intend_df[intend_col] == 1).sum()) intend_no = int((intend_df[intend_col] == 0).sum()) # ---- Group success by treatment ---- success_by_treat = {} for treat_name, group in played.groupby("_treat"): n = len(group) succ = int((group[success_col] == 1).sum()) success_by_treat[treat_name] = { "n": n, "success": succ, "pct": round(succ / n * 100, 1) if n else 0, } # ---- Success rate by group intend_count (treatment only) ---- treat_played = played[played["_treat"] == "Treatment (1)"] treat_groups = treat_played.drop_duplicates(subset=[group_id_col]) treat_groups = treat_groups[treat_groups[intend_count_col].notna()] success_by_intend = {} for ic, g in treat_groups.groupby(intend_count_col): n = len(g) succ = int((g[success_col] == 1).sum()) success_by_intend[str(int(ic))] = { "n_groups": n, "success": succ, "pct": round(succ / n * 100, 1) if n else 0, } # ---- Buy rate by others' intent count (treatment only) ---- tp = treat_played[treat_played[intend_count_col].notna() & treat_played[intend_col].notna()].copy() tp["_others_intend"] = tp[intend_count_col] - tp[intend_col] buy_by_others_intend = {} for oi, g in tp.groupby("_others_intend"): n = len(g) bought = int((g[buys_col] == 1).sum()) buy_by_others_intend[str(int(oi))] = { "n": n, "bought": bought, "pct": round(bought / n * 100, 1) if n else 0, } # ---- Buy rate by group intend_count (treatment only, per player) ---- buy_by_group_intend = {} for ic, g in tp.groupby(intend_count_col): n = len(g) bought = int((g[buys_col] == 1).sum()) buy_by_group_intend[str(int(ic))] = { "n": n, "bought": bought, "pct": round(bought / n * 100, 1) if n else 0, } # ---- Beliefs distribution (among those who answered) ---- beliefs_df = played[played[beliefs_col].notna()] beliefs_dist = {str(int(k)): int(v) for k, v in beliefs_df[beliefs_col].value_counts().items()} # ---- By session letter ---- per_session = [] for sid in sorted(played[psid_col].unique()): s = played[played[psid_col] == sid] n = len(s) bought = int((s[buys_col] == 1).sum()) treats = dict(s["_treat"].value_counts()) colors = dict(s[color_col].value_counts()) per_session.append({ "session_id": sid, "n": n, "bought": bought, "buy_pct": round(bought / n * 100, 1) if n else 0, "treatments": treats, "colors": colors, }) return to_json({ "total_completed": total_completed, "total_played": total_played, "total_skipped": total_skipped, "treat_dist": treat_dist, "buy_by_treat": buy_by_treat, "color_by_treat": color_by_treat, "overall_colors": overall_colors, "overall_buy_pct": overall_buy_pct, "total_bought": total_bought, "intend_yes": intend_yes, "intend_no": intend_no, "success_by_treat": success_by_treat, "success_by_intend": success_by_intend, "buy_by_others_intend": buy_by_others_intend, "buy_by_group_intend": buy_by_group_intend, "beliefs_dist": beliefs_dist, "per_session": per_session, }) @app.route("/api/coop-games") def api_coop_games(): """Combined Prisoner's Dilemma + Stag Hunt dashboard data.""" df = _get_df() sid_col = "app_collect_results.1.player.participant_session_id" def analyze_game(prefix): treat_col = f"{prefix}.1.player.treatment_cond" coop_col = f"{prefix}.1.player.cooperate" bot_col = f"{prefix}.1.player.is_bot" payoff_col = f"{prefix}.1.player.payoff" msg_col = f"{prefix}.1.player.messages" comp_col = f"{prefix}.1.player.comprehension_passed" gpwait_col = f"{prefix}.1.player.time_spent_gpwait" instr_col = f"{prefix}.1.player.time_spent_game_instr_page" group_col = f"{prefix}.1.player.persistent_group_id" # Filter: non-NaN treatment_cond played = df[df[treat_col].notna()].copy() n = len(played) # Treatment distribution treat_dist = dict(played[treat_col].value_counts()) # Overall cooperation n_coop = int((played[coop_col] == 1).sum()) n_defect = int((played[coop_col] == 0).sum()) coop_rate = round(n_coop / n * 100, 1) if n else 0 # Cooperation by treatment coop_by_treat = {} for t, g in played.groupby(treat_col): gn = len(g) gc = int((g[coop_col] == 1).sum()) coop_by_treat[t] = {"n": gn, "coop": gc, "pct": round(gc / gn * 100, 1) if gn else 0} # Human vs bot cooperation by treatment (for bot conditions) human_bot_coop = {} for t, g in played.groupby(treat_col): if bot_col in g.columns and g[bot_col].sum() > 0: humans = g[g[bot_col] == 0] bots = g[g[bot_col] == 1] hc = int((humans[coop_col] == 1).sum()) bc = int((bots[coop_col] == 1).sum()) human_bot_coop[t] = { "human_n": len(humans), "human_coop": hc, "human_pct": round(hc / len(humans) * 100, 1) if len(humans) else 0, "bot_n": len(bots), "bot_coop": bc, "bot_pct": round(bc / len(bots) * 100, 1) if len(bots) else 0, } # Bots vs humans overall n_bots = int((played[bot_col] == 1).sum()) if bot_col in played.columns else 0 n_humans = n - n_bots # Payoff distribution payoff_dist = {} for t, g in played.groupby(treat_col): vals = pd.to_numeric(g[payoff_col], errors="coerce").dropna() payoff_dist[t] = { "mean": round(vals.mean(), 2) if len(vals) else None, "values": dict(vals.value_counts().sort_index()), } # Messages: how many non-empty per condition msg_by_treat = {} if msg_col in played.columns: for t, g in played.groupby(treat_col): msgs = g[msg_col].fillna("") non_empty = int((msgs.str.len() > 0).sum()) msg_by_treat[t] = {"n": len(g), "with_msg": non_empty} # Comprehension comp_passed = int((played[comp_col] == 1).sum()) if comp_col in played.columns else n comp_failed = n - comp_passed # Per-session per_session = [] for sid in sorted(played[sid_col].dropna().unique()): s = played[played[sid_col] == sid] sn = len(s) sc = int((s[coop_col] == 1).sum()) treats = {} for t, g in s.groupby(treat_col): gn = len(g) gc = int((g[coop_col] == 1).sum()) treats[t] = {"n": gn, "coop": gc, "pct": round(gc / gn * 100, 1) if gn else 0} per_session.append({ "session_id": sid, "n": sn, "coop": sc, "coop_pct": round(sc / sn * 100, 1) if sn else 0, "treatments": treats, }) return { "n": n, "n_humans": n_humans, "n_bots": n_bots, "n_coop": n_coop, "n_defect": n_defect, "coop_rate": coop_rate, "treat_dist": treat_dist, "coop_by_treat": coop_by_treat, "human_bot_coop": human_bot_coop, "payoff_dist": payoff_dist, "msg_by_treat": msg_by_treat, "comp_passed": comp_passed, "comp_failed": comp_failed, "per_session": per_session, } prisoner = analyze_game("app_prisoner") stag = analyze_game("app_stag") # Condition labels for the frontend cond_labels = { "condition_1": "Human + Chat", "condition_2": "Human + No Chat", "condition_3": "Bot + No Chat", "condition_4": "Bot + Chat", } return to_json({ "prisoner": prisoner, "stag": stag, "cond_labels": cond_labels, }) @app.route("/api/session-diagnostic") def api_session_diagnostic(): """Detect mistyped session letters using time-bucket analysis.""" df = _get_df() letter_col = "app_consent_consolidated.1.player.session_letter" sid_col = "app_collect_results.1.player.participant_session_id" time_col = "participant.time_started_utc" code_col = "participant.code" color_col = "participant.signal_color_choice" group_col = "signal_game.1.group.id_in_subsession" treat_col = "signal_game.1.group.treatment" has_letter = df[df[letter_col].notna()].copy() has_letter["_time"] = pd.to_datetime(has_letter[time_col], utc=True).dt.tz_convert("America/New_York") has_letter["_bucket"] = has_letter["_time"].dt.floor("10min") canonical = {} for letter in sorted(has_letter[letter_col].unique()): s = has_letter[has_letter[letter_col] == letter] mode_bucket = s["_bucket"].mode().iloc[0] canonical[letter] = mode_bucket + pd.Timedelta(minutes=5) sessions = [] for letter in sorted(has_letter[letter_col].unique()): s = has_letter[has_letter[letter_col] == letter] completed = s[s[sid_col].notna()] played_signal = s[s[color_col].notna()] n_total = len(s) n_completed = len(completed) n_signal = len(played_signal) sessions.append({ "letter": letter, "canonical_bucket": canonical[letter].strftime("%Y-%m-%d %H:%M"), "n_total": n_total, "n_completed": n_completed, "n_signal_played": n_signal, "mod7": n_signal % 7, "time_min": s["_time"].min().strftime("%Y-%m-%d %H:%M:%S"), "time_max": s["_time"].max().strftime("%Y-%m-%d %H:%M:%S"), }) flagged = [] letters = sorted(canonical.keys()) for _, row in has_letter.iterrows(): typed = row[letter_col] t = row["_time"] if pd.isna(t): continue dists = {l: abs((t - canonical[l]).total_seconds()) for l in letters} nearest = min(dists, key=dists.get) if nearest != typed: flagged.append({ "code": row[code_col], "typed_letter": typed, "suggested_letter": nearest, "timestamp": t.strftime("%Y-%m-%d %H:%M:%S"), "dist_own_min": round(dists[typed] / 60, 1), "dist_suggested_min": round(dists[nearest] / 60, 1), }) played = has_letter[has_letter[group_col].notna()].copy() split_groups = [] if len(played): for gid, g in played.groupby(group_col): letter_counts = g[letter_col].value_counts().to_dict() if len(letter_counts) > 1: treat_val = g[treat_col].iloc[0] split_groups.append({ "group_id": int(gid), "treatment": int(treat_val) if pd.notna(treat_val) else None, "size": len(g), "letters": {str(k): int(v) for k, v in letter_counts.items()}, }) bucket_detail = {} for letter in sorted(has_letter[letter_col].unique()): s = has_letter[has_letter[letter_col] == letter] counts = s["_bucket"].value_counts().sort_index() bucket_detail[letter] = [ {"bucket": b.strftime("%Y-%m-%d %H:%M"), "count": int(c)} for b, c in counts.items() ] return to_json({ "sessions": sessions, "flagged": flagged, "split_groups": split_groups, "bucket_detail": bucket_detail, "canonical": {l: v.strftime("%Y-%m-%d %H:%M") for l, v in canonical.items()}, }) @app.route("/api/columns") def api_columns(): df = _get_df() groups = {} for c in df.columns: prefix = c.split(".")[0] groups.setdefault(prefix, []).append(c) return to_json(groups) if __name__ == "__main__": app.run(host="0.0.0.0", port=7860)