iurbinah's picture
Update csv-viewer app
e46ea03
import io
import json
import os
import time
from dotenv import load_dotenv
load_dotenv()
import numpy as np
import pandas as pd
import requests
from flask import Flask, render_template, request, Response, session, redirect, url_for
from werkzeug.middleware.proxy_fix import ProxyFix
app = Flask(__name__)
app.secret_key = os.environ.get("FLASK_SECRET", "otree-csv-viewer-default-key")
app.config["SESSION_COOKIE_SAMESITE"] = "None"
app.config["SESSION_COOKIE_SECURE"] = True
app.config["SESSION_COOKIE_HTTPONLY"] = True
ADMIN_PASSWORD = os.environ.get("ADMIN_PASSWORD", "bpel123")
app.wsgi_app = ProxyFix(app.wsgi_app, x_proto=1, x_host=1)
@app.before_request
def require_login():
if request.endpoint in ("login", "static") or request.path == "/favicon.ico":
return
if not session.get("authenticated"):
if request.path.startswith("/api/"):
return Response("Unauthorized", status=401)
return redirect(url_for("login"))
def to_json(obj):
"""jsonify replacement that handles numpy types."""
def default(o):
if isinstance(o, (np.integer,)):
return int(o)
if isinstance(o, (np.floating,)):
if np.isnan(o):
return None
return float(o)
if isinstance(o, np.ndarray):
return o.tolist()
raise TypeError(f"{type(o)} not serializable")
data = json.dumps(obj, default=default)
return Response(data, mimetype="application/json")
CSV_PATH = os.path.join(os.path.dirname(__file__), "all_apps_wide-2026-03-31.csv")
OTREE_URL = os.environ.get("OTREE_CSV_URL", "")
# Identifier columns to always include alongside app_collect_results
ID_COLS = [
"participant.id_in_session",
"participant.code",
"participant.label",
"participant._current_app_name",
"participant._current_page_name",
"participant.treatment",
"participant.payoff",
"session.code",
]
COLLECT_PREFIX = "app_collect_results."
# In-memory cache so we don't re-read disk on every request
_cache = {"df": None, "mtime": 0}
def _get_df():
"""Return the dataframe, re-reading from disk only if the file changed."""
try:
mtime = os.path.getmtime(CSV_PATH)
except OSError:
mtime = 0
if _cache["df"] is None or mtime != _cache["mtime"]:
_cache["df"] = pd.read_csv(CSV_PATH)
_cache["mtime"] = mtime
return _cache["df"]
def load_csv(collect_only=True):
df = _get_df().copy()
if collect_only:
collect_cols = [c for c in df.columns if c.startswith(COLLECT_PREFIX)]
keep = [c for c in ID_COLS if c in df.columns] + collect_cols
df = df[keep]
return df
@app.route("/login", methods=["GET", "POST"])
def login():
error = None
if request.method == "POST":
if request.form.get("password") == ADMIN_PASSWORD:
session["authenticated"] = True
return redirect(url_for("index"))
error = "Incorrect password"
return render_template("login.html", error=error)
@app.route("/logout")
def logout():
session.clear()
return redirect(url_for("login"))
@app.route("/favicon.ico")
def favicon():
return redirect(url_for("static", filename="favicon.svg"), code=302)
@app.route("/")
def index():
return render_template("index.html")
@app.route("/api/data")
def api_data():
collect_only = request.args.get("collect_only", "1") == "1"
df = load_csv(collect_only=collect_only)
display_cols = []
for c in df.columns:
if c.startswith(COLLECT_PREFIX):
parts = c.split(".")
display_cols.append(parts[-1])
elif c.startswith("participant."):
display_cols.append(c.replace("participant.", "p."))
elif c.startswith("session."):
display_cols.append(c.replace("session.", "s."))
else:
display_cols.append(c)
return to_json({
"columns": display_cols,
"raw_columns": list(df.columns),
"rows": df.fillna("").values.tolist(),
"total": len(df),
})
@app.route("/api/fetch-otree", methods=["POST"])
def fetch_otree():
"""Pull fresh CSV from oTree and overwrite the local file."""
try:
resp = requests.get(OTREE_URL, timeout=30)
resp.raise_for_status()
# Validate it's actually CSV
df = pd.read_csv(io.StringIO(resp.text))
# Save to disk
df.to_csv(CSV_PATH, index=False)
# Bust cache
_cache["df"] = df
_cache["mtime"] = os.path.getmtime(CSV_PATH)
return to_json({"ok": True, "rows": len(df), "cols": len(df.columns)})
except Exception as e:
return to_json({"ok": False, "error": str(e)}), 502
@app.route("/api/upload", methods=["POST"])
def upload_csv():
"""Accept an uploaded CSV file to replace the current dataset."""
f = request.files.get("file")
if not f:
return to_json({"ok": False, "error": "No file uploaded"}), 400
try:
df = pd.read_csv(f.stream)
df.to_csv(CSV_PATH, index=False)
_cache["df"] = df
_cache["mtime"] = os.path.getmtime(CSV_PATH)
return to_json({"ok": True, "rows": len(df), "cols": len(df.columns)})
except Exception as e:
return to_json({"ok": False, "error": str(e)}), 400
@app.route("/api/payments")
def api_payments():
"""Return just PC_id, total_bonus, participant_session_id for the payments page."""
df = _get_df()
col_map = {
"app_collect_results.1.player.PC_id_manual_input": "PC_id",
"app_collect_results.1.player.total_bonus": "total_bonus",
"app_collect_results.1.player.participant_session_id": "participant_session_id",
}
keep = [c for c in col_map if c in df.columns]
sub = df[keep].rename(columns=col_map).fillna("")
# Distinct session ids sorted alphabetically
session_ids = sorted(set(str(v) for v in sub["participant_session_id"] if v != ""))
return to_json({
"columns": [col_map[c] for c in keep],
"rows": sub.values.tolist(),
"session_ids": session_ids,
})
@app.route("/api/stats")
def api_stats():
"""Session-level stats dashboard data."""
df = _get_df().copy()
sid_col = "app_collect_results.1.player.participant_session_id"
app_col = "participant._current_app_name"
page_col = "participant._current_page_name"
bot_col = "participant._is_bot"
orphan_col = "app_collect_results.1.player.was_orphan"
dropout_col = "participant.midgame_dropout"
timeout_col = "participant.timed_out_from_coord_games"
duration_col = "participant.completion_duration_time"
treatment_col = "participant.treatment"
# Only rows with a non-empty participant_session_id = "completed"
df["_sid"] = df[sid_col].fillna("")
completed = df[df["_sid"] != ""]
incomplete = df[df["_sid"] == ""]
# App sequence (derived from column order)
app_order = []
for c in df.columns:
parts = c.split(".")
if len(parts) > 1 and parts[0] not in ("participant", "session") and parts[0] not in app_order:
app_order.append(parts[0])
# ---- Global summary ----
total = len(df)
n_completed = len(completed)
n_incomplete = len(incomplete)
durations = pd.to_numeric(completed[duration_col], errors="coerce").dropna()
global_summary = {
"total_rows": total,
"completed": n_completed,
"incomplete": n_incomplete,
"completion_rate": round(n_completed / total * 100, 1) if total else 0,
"orphans": int((df[orphan_col] == 1).sum()) if orphan_col in df.columns else 0,
"dropouts": int((pd.to_numeric(df[dropout_col], errors="coerce") == 1).sum()) if dropout_col in df.columns else 0,
"timed_out": int((pd.to_numeric(df[timeout_col], errors="coerce") == 1).sum()) if timeout_col in df.columns else 0,
"duration_median": round(durations.median(), 1) if len(durations) else None,
"duration_mean": round(durations.mean(), 1) if len(durations) else None,
"duration_min": round(durations.min(), 1) if len(durations) else None,
"duration_max": round(durations.max(), 1) if len(durations) else None,
}
# ---- Per-session breakdown ----
session_ids = sorted(completed["_sid"].unique())
per_session = []
for sid in session_ids:
s = completed[completed["_sid"] == sid]
s_dur = pd.to_numeric(s[duration_col], errors="coerce").dropna()
per_session.append({
"session_id": sid,
"n": len(s),
"orphans": int((s[orphan_col] == 1).sum()) if orphan_col in s.columns else 0,
"duration_median": round(s_dur.median(), 1) if len(s_dur) else None,
"duration_mean": round(s_dur.mean(), 1) if len(s_dur) else None,
"treatments": dict(s[treatment_col].fillna("(none)").value_counts()),
})
# ---- Current app/page funnel (where are people RIGHT NOW) ----
app_page = df[[app_col, page_col, "_sid"]].fillna("(empty)")
funnel = []
for a in app_order:
sub = app_page[app_page[app_col] == a]
if len(sub) == 0:
continue
pages = dict(sub[page_col].value_counts())
funnel.append({"app": a, "count": len(sub), "pages": pages})
# Also count those whose current_app is empty
empty_app = app_page[app_page[app_col] == "(empty)"]
if len(empty_app):
funnel.append({"app": "(no app)", "count": len(empty_app), "pages": {}})
# ---- Completed players: which app they finished at (current_app) ----
completed_funnel = []
for a in app_order:
n = int((completed[app_col] == a).sum())
if n:
completed_funnel.append({"app": a, "count": n})
return to_json({
"global": global_summary,
"per_session": per_session,
"funnel": funnel,
"completed_funnel": completed_funnel,
"app_order": app_order,
})
@app.route("/api/signal")
def api_signal():
"""Signal game analysis for completed participants."""
df = _get_df()
sid_col = "app_collect_results.1.player.participant_session_id"
treat_col = "signal_game.1.group.treatment"
buys_col = "signal_game.1.player.buys_signal"
color_col = "participant.signal_color_choice"
skip_col = "participant.skip_signal_game"
intend_col = "signal_game.1.player.intends_to_buy"
intend_count_col = "signal_game.1.group.intend_count"
group_id_col = "signal_game.1.group.id_in_subsession"
success_col = "signal_game.1.group.group_success"
beliefs_col = "signal_game.1.player.beliefs_truthful"
reason_col = "signal_game.1.player.purchase_reason"
guess_col = "signal_game.1.player.guess_correct"
psid_col = "app_collect_results.1.player.participant_session_id"
# Filter: completed participants (non-empty session letter)
completed = df[df[sid_col].notna()].copy()
total_completed = len(completed)
# Played signal game = did not skip (skip == 0 or NaN with color present)
played = completed[completed[color_col].notna()].copy()
skipped = completed[~completed.index.isin(played.index)]
total_played = len(played)
total_skipped = len(skipped)
# Treatment labels
played["_treat"] = played[treat_col].map({0.0: "Control (0)", 1.0: "Treatment (1)"}).fillna("Unknown")
# ---- Treatment distribution ----
treat_dist = dict(played["_treat"].value_counts())
# ---- Buy rate by treatment ----
buy_by_treat = {}
for treat_name, group in played.groupby("_treat"):
n = len(group)
bought = int((group[buys_col] == 1).sum())
buy_by_treat[treat_name] = {
"n": n,
"bought": bought,
"pct": round(bought / n * 100, 1) if n else 0,
}
# ---- Color distribution by treatment ----
color_by_treat = {}
for treat_name, group in played.groupby("_treat"):
colors = dict(group[color_col].value_counts())
color_by_treat[treat_name] = colors
# ---- Overall color distribution ----
overall_colors = dict(played[color_col].value_counts())
# ---- Overall buy rate ----
total_bought = int((played[buys_col] == 1).sum())
overall_buy_pct = round(total_bought / total_played * 100, 1) if total_played else 0
# ---- Intend to buy (among those who have the field) ----
intend_df = played[played[intend_col].notna()]
intend_yes = int((intend_df[intend_col] == 1).sum())
intend_no = int((intend_df[intend_col] == 0).sum())
# ---- Group success by treatment ----
success_by_treat = {}
for treat_name, group in played.groupby("_treat"):
n = len(group)
succ = int((group[success_col] == 1).sum())
success_by_treat[treat_name] = {
"n": n,
"success": succ,
"pct": round(succ / n * 100, 1) if n else 0,
}
# ---- Success rate by group intend_count (treatment only) ----
treat_played = played[played["_treat"] == "Treatment (1)"]
treat_groups = treat_played.drop_duplicates(subset=[group_id_col])
treat_groups = treat_groups[treat_groups[intend_count_col].notna()]
success_by_intend = {}
for ic, g in treat_groups.groupby(intend_count_col):
n = len(g)
succ = int((g[success_col] == 1).sum())
success_by_intend[str(int(ic))] = {
"n_groups": n,
"success": succ,
"pct": round(succ / n * 100, 1) if n else 0,
}
# ---- Buy rate by others' intent count (treatment only) ----
tp = treat_played[treat_played[intend_count_col].notna() & treat_played[intend_col].notna()].copy()
tp["_others_intend"] = tp[intend_count_col] - tp[intend_col]
buy_by_others_intend = {}
for oi, g in tp.groupby("_others_intend"):
n = len(g)
bought = int((g[buys_col] == 1).sum())
buy_by_others_intend[str(int(oi))] = {
"n": n,
"bought": bought,
"pct": round(bought / n * 100, 1) if n else 0,
}
# ---- Buy rate by group intend_count (treatment only, per player) ----
buy_by_group_intend = {}
for ic, g in tp.groupby(intend_count_col):
n = len(g)
bought = int((g[buys_col] == 1).sum())
buy_by_group_intend[str(int(ic))] = {
"n": n,
"bought": bought,
"pct": round(bought / n * 100, 1) if n else 0,
}
# ---- Beliefs distribution (among those who answered) ----
beliefs_df = played[played[beliefs_col].notna()]
beliefs_dist = {str(int(k)): int(v) for k, v in beliefs_df[beliefs_col].value_counts().items()}
# ---- By session letter ----
per_session = []
for sid in sorted(played[psid_col].unique()):
s = played[played[psid_col] == sid]
n = len(s)
bought = int((s[buys_col] == 1).sum())
treats = dict(s["_treat"].value_counts())
colors = dict(s[color_col].value_counts())
per_session.append({
"session_id": sid,
"n": n,
"bought": bought,
"buy_pct": round(bought / n * 100, 1) if n else 0,
"treatments": treats,
"colors": colors,
})
return to_json({
"total_completed": total_completed,
"total_played": total_played,
"total_skipped": total_skipped,
"treat_dist": treat_dist,
"buy_by_treat": buy_by_treat,
"color_by_treat": color_by_treat,
"overall_colors": overall_colors,
"overall_buy_pct": overall_buy_pct,
"total_bought": total_bought,
"intend_yes": intend_yes,
"intend_no": intend_no,
"success_by_treat": success_by_treat,
"success_by_intend": success_by_intend,
"buy_by_others_intend": buy_by_others_intend,
"buy_by_group_intend": buy_by_group_intend,
"beliefs_dist": beliefs_dist,
"per_session": per_session,
})
@app.route("/api/coop-games")
def api_coop_games():
"""Combined Prisoner's Dilemma + Stag Hunt dashboard data."""
df = _get_df()
sid_col = "app_collect_results.1.player.participant_session_id"
def analyze_game(prefix):
treat_col = f"{prefix}.1.player.treatment_cond"
coop_col = f"{prefix}.1.player.cooperate"
bot_col = f"{prefix}.1.player.is_bot"
payoff_col = f"{prefix}.1.player.payoff"
msg_col = f"{prefix}.1.player.messages"
comp_col = f"{prefix}.1.player.comprehension_passed"
gpwait_col = f"{prefix}.1.player.time_spent_gpwait"
instr_col = f"{prefix}.1.player.time_spent_game_instr_page"
group_col = f"{prefix}.1.player.persistent_group_id"
# Filter: non-NaN treatment_cond
played = df[df[treat_col].notna()].copy()
n = len(played)
# Treatment distribution
treat_dist = dict(played[treat_col].value_counts())
# Overall cooperation
n_coop = int((played[coop_col] == 1).sum())
n_defect = int((played[coop_col] == 0).sum())
coop_rate = round(n_coop / n * 100, 1) if n else 0
# Cooperation by treatment
coop_by_treat = {}
for t, g in played.groupby(treat_col):
gn = len(g)
gc = int((g[coop_col] == 1).sum())
coop_by_treat[t] = {"n": gn, "coop": gc, "pct": round(gc / gn * 100, 1) if gn else 0}
# Human vs bot cooperation by treatment (for bot conditions)
human_bot_coop = {}
for t, g in played.groupby(treat_col):
if bot_col in g.columns and g[bot_col].sum() > 0:
humans = g[g[bot_col] == 0]
bots = g[g[bot_col] == 1]
hc = int((humans[coop_col] == 1).sum())
bc = int((bots[coop_col] == 1).sum())
human_bot_coop[t] = {
"human_n": len(humans), "human_coop": hc,
"human_pct": round(hc / len(humans) * 100, 1) if len(humans) else 0,
"bot_n": len(bots), "bot_coop": bc,
"bot_pct": round(bc / len(bots) * 100, 1) if len(bots) else 0,
}
# Bots vs humans overall
n_bots = int((played[bot_col] == 1).sum()) if bot_col in played.columns else 0
n_humans = n - n_bots
# Payoff distribution
payoff_dist = {}
for t, g in played.groupby(treat_col):
vals = pd.to_numeric(g[payoff_col], errors="coerce").dropna()
payoff_dist[t] = {
"mean": round(vals.mean(), 2) if len(vals) else None,
"values": dict(vals.value_counts().sort_index()),
}
# Messages: how many non-empty per condition
msg_by_treat = {}
if msg_col in played.columns:
for t, g in played.groupby(treat_col):
msgs = g[msg_col].fillna("")
non_empty = int((msgs.str.len() > 0).sum())
msg_by_treat[t] = {"n": len(g), "with_msg": non_empty}
# Comprehension
comp_passed = int((played[comp_col] == 1).sum()) if comp_col in played.columns else n
comp_failed = n - comp_passed
# Per-session
per_session = []
for sid in sorted(played[sid_col].dropna().unique()):
s = played[played[sid_col] == sid]
sn = len(s)
sc = int((s[coop_col] == 1).sum())
treats = {}
for t, g in s.groupby(treat_col):
gn = len(g)
gc = int((g[coop_col] == 1).sum())
treats[t] = {"n": gn, "coop": gc, "pct": round(gc / gn * 100, 1) if gn else 0}
per_session.append({
"session_id": sid,
"n": sn,
"coop": sc,
"coop_pct": round(sc / sn * 100, 1) if sn else 0,
"treatments": treats,
})
return {
"n": n,
"n_humans": n_humans,
"n_bots": n_bots,
"n_coop": n_coop,
"n_defect": n_defect,
"coop_rate": coop_rate,
"treat_dist": treat_dist,
"coop_by_treat": coop_by_treat,
"human_bot_coop": human_bot_coop,
"payoff_dist": payoff_dist,
"msg_by_treat": msg_by_treat,
"comp_passed": comp_passed,
"comp_failed": comp_failed,
"per_session": per_session,
}
prisoner = analyze_game("app_prisoner")
stag = analyze_game("app_stag")
# Condition labels for the frontend
cond_labels = {
"condition_1": "Human + Chat",
"condition_2": "Human + No Chat",
"condition_3": "Bot + No Chat",
"condition_4": "Bot + Chat",
}
return to_json({
"prisoner": prisoner,
"stag": stag,
"cond_labels": cond_labels,
})
@app.route("/api/session-diagnostic")
def api_session_diagnostic():
"""Detect mistyped session letters using time-bucket analysis."""
df = _get_df()
letter_col = "app_consent_consolidated.1.player.session_letter"
sid_col = "app_collect_results.1.player.participant_session_id"
time_col = "participant.time_started_utc"
code_col = "participant.code"
color_col = "participant.signal_color_choice"
group_col = "signal_game.1.group.id_in_subsession"
treat_col = "signal_game.1.group.treatment"
has_letter = df[df[letter_col].notna()].copy()
has_letter["_time"] = pd.to_datetime(has_letter[time_col], utc=True).dt.tz_convert("America/New_York")
has_letter["_bucket"] = has_letter["_time"].dt.floor("10min")
canonical = {}
for letter in sorted(has_letter[letter_col].unique()):
s = has_letter[has_letter[letter_col] == letter]
mode_bucket = s["_bucket"].mode().iloc[0]
canonical[letter] = mode_bucket + pd.Timedelta(minutes=5)
sessions = []
for letter in sorted(has_letter[letter_col].unique()):
s = has_letter[has_letter[letter_col] == letter]
completed = s[s[sid_col].notna()]
played_signal = s[s[color_col].notna()]
n_total = len(s)
n_completed = len(completed)
n_signal = len(played_signal)
sessions.append({
"letter": letter,
"canonical_bucket": canonical[letter].strftime("%Y-%m-%d %H:%M"),
"n_total": n_total,
"n_completed": n_completed,
"n_signal_played": n_signal,
"mod7": n_signal % 7,
"time_min": s["_time"].min().strftime("%Y-%m-%d %H:%M:%S"),
"time_max": s["_time"].max().strftime("%Y-%m-%d %H:%M:%S"),
})
flagged = []
letters = sorted(canonical.keys())
for _, row in has_letter.iterrows():
typed = row[letter_col]
t = row["_time"]
if pd.isna(t):
continue
dists = {l: abs((t - canonical[l]).total_seconds()) for l in letters}
nearest = min(dists, key=dists.get)
if nearest != typed:
flagged.append({
"code": row[code_col],
"typed_letter": typed,
"suggested_letter": nearest,
"timestamp": t.strftime("%Y-%m-%d %H:%M:%S"),
"dist_own_min": round(dists[typed] / 60, 1),
"dist_suggested_min": round(dists[nearest] / 60, 1),
})
played = has_letter[has_letter[group_col].notna()].copy()
split_groups = []
if len(played):
for gid, g in played.groupby(group_col):
letter_counts = g[letter_col].value_counts().to_dict()
if len(letter_counts) > 1:
treat_val = g[treat_col].iloc[0]
split_groups.append({
"group_id": int(gid),
"treatment": int(treat_val) if pd.notna(treat_val) else None,
"size": len(g),
"letters": {str(k): int(v) for k, v in letter_counts.items()},
})
bucket_detail = {}
for letter in sorted(has_letter[letter_col].unique()):
s = has_letter[has_letter[letter_col] == letter]
counts = s["_bucket"].value_counts().sort_index()
bucket_detail[letter] = [
{"bucket": b.strftime("%Y-%m-%d %H:%M"), "count": int(c)}
for b, c in counts.items()
]
return to_json({
"sessions": sessions,
"flagged": flagged,
"split_groups": split_groups,
"bucket_detail": bucket_detail,
"canonical": {l: v.strftime("%Y-%m-%d %H:%M") for l, v in canonical.items()},
})
@app.route("/api/columns")
def api_columns():
df = _get_df()
groups = {}
for c in df.columns:
prefix = c.split(".")[0]
groups.setdefault(prefix, []).append(c)
return to_json(groups)
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)