Spaces:

DetectiveShadow
/

Demo-MVP-Manger

Sleeping

App Files Files Community

Demo-MVP-Manger / app.py

DetectiveShadow

Update app.py

31c3151 verified 7 months ago

raw

history blame contribute delete

17.3 kB

	# Gradio Space: Website-style UI + natural-time search (Questions) + MSG sentence summaries
	import os, json, re
	from collections import Counter
	from datetime import date, timedelta, time

	import gradio as gr
	import pandas as pd
	from huggingface_hub import HfApi, hf_hub_download

	# ----------------- CONFIG -----------------
	REPO_ID = os.getenv("REPO_ID", "DetectiveShadow/Check_in_Data")
	REPO_TYPE = "dataset"
	PATH_IN_REPO = os.getenv("PATH_IN_REPO", "checkins_full_clean.jsonl")

	# Public dataset → force fully anonymous READS (prevents accidental 401s)
	READ_TOKEN = None
	api_anon = HfApi(token=None)

	# ----------------- Styling (black text, simple site look) -----------------
	CSS = """
	:root, [data-theme], [data-theme="light"], [data-theme="dark"] {
	--brand:#6C5CE7;
	--ink:#fff;
	--muted:#9ca3af;

	/* Gradio background vars */
	--body-background-fill:#000000;
	--background-fill-primary:#000000;
	--background-fill-secondary:#000000;
	--block-background-fill:#000000;
	--color-background-primary:#000000;
	}

	/* Global dark */
	html, body, .gradio-container { background:#000 !important; color:#fff !important; }
	label, .prose, .markdown-body, .markdown-body * { color:#fff !important; }
	a { color:#fff !important; text-decoration:underline; }

	.gradio-container { max-width:1200px !important; margin:0 auto !important; }

	/* Top nav + links */
	.navbar { display:flex; align-items:center; justify-content:space-between; padding:14px 8px; }
	.brand { font-weight:800; font-size:20px; letter-spacing:0.3px; color:#fff !important; }
	.navlinks a { margin-left:16px; color:#fff !important; font-weight:600; text-decoration:none; }

	/* Hero: subtle dark gradient */
	.hero {
	border-radius:18px; padding:28px;
	background:linear-gradient(135deg,#0b0f19, #111827) !important;
	border:1px solid #1f2937; margin-bottom:18px; color:#fff !important;
	}
	.hero h1 { font-size:28px; margin:0 0 8px 0; color:#fff !important; }
	.hero p { color:#e5e7eb !important; }

	/* KPI cards */
	.kpi {
	background:#0f172a !important; border:1px solid #1f2937; border-radius:16px; padding:16px;
	color:#fff !important;
	}
	.kpi .n { font-size:28px; font-weight:800; line-height:1; }
	.kpi .l { color:#9ca3af !important; font-size:12px; margin-top:6px }

	/* Inputs / dropdowns */
	input, textarea, select {
	background:#0a0a0a !important; color:#fff !important; border:1px solid #2a2a2a !important;
	}
	::placeholder { color:#9ca3af !important; }

	/* Buttons */
	button, .btn {
	background:var(--brand) !important; color:#fff !important; border:1px solid transparent !important;
	padding:10px 14px; border-radius:10px; font-weight:700;
	}

	/* Tables */
	table, th, td { background:#0b0b0b !important; color:#fff !important; border-color:#2a2a2a !important; }
	thead th { background:#111 !important; }

	/* Panels / tabs (force dark) */
	.block, .row, .column, .panel, .form, .tabs, .tabitem, .compact, .wrap {
	background:#000 !important; color:#fff !important;
	}

	/* Footer */
	.footer { color:#9ca3af !important; font-size:12px; text-align:center; margin-top:24px; padding:12px 0 4px; }
	"""


	# ----------------- Natural-time parsing -----------------
	_MONTHS = {
	"january":1,"jan":1,"february":2,"feb":2,"march":3,"mar":3,"april":4,"apr":4,
	"may":5,"june":6,"jun":6,"july":7,"jul":7,"august":8,"aug":8,"september":9,"sep":9,"sept":9,
	"october":10,"oct":10,"november":11,"nov":11,"december":12,"dec":12
	}
	def _first_last_of_month(y:int, m:int):
	first = date(y, m, 1)
	last = date(y+1,1,1) - timedelta(days=1) if m==12 else date(y, m+1, 1) - timedelta(days=1)
	return first, last
	def _week_bounds(d: date):
	start = d - timedelta(days=d.weekday())
	return start, start + timedelta(days=6)

	def parse_when_text(s: str):
	if not s or not s.strip(): return None, None, None
	s = s.strip().lower()
	today = date.today()
	if re.search(r"\btoday\b", s): return today, today, "today"
	if re.search(r"\byesterday\b", s): y=today-timedelta(days=1); return y, y, "yesterday"
	if re.search(r"\blast\s+week\b", s):
	w0,_ = _week_bounds(today); s1=w0-timedelta(days=7); e1=w0-timedelta(days=1); return s1, e1, "last week"
	if re.search(r"\bthis\s+week\b", s):
	w0,w1 = _week_bounds(today); return w0, w1, "this week"
	if re.search(r"\blast\s+month\b", s):
	y, m = today.year, today.month-1 or 12
	if today.month==1: y -= 1
	f,l = _first_last_of_month(y,m); return f,l,"last month"
	if re.search(r"\bthis\s+month\b", s):
	f,l = _first_last_of_month(today.year, today.month); return f,l,"this month"
	m = re.search(r"\blast\s+(\d+)\s+(day\|days\|week\|weeks\|month\|months)\b", s)
	if m:
	n = int(m.group(1)); unit = m.group(2)
	if "day" in unit: start=today-timedelta(days=n-1); return start, today, f"last {n} days"
	if "week" in unit: start=today-timedelta(days=7*n-1); return start, today, f"last {n} weeks"
	if "month" in unit:
	y, mm = today.year, today.month
	for _ in range(n-1): y, mm = (y-1,12) if mm==1 else (y, mm-1)
	f,_ = _first_last_of_month(y, mm); return f, today, f"last {n} months"
	yhit = re.search(r"\b(20\d{2}\|19\d{2})\b", s)
	if yhit and ("in " in s or len(s.strip())==4):
	y = int(yhit.group(1)); return date(y,1,1), date(y,12,31), str(y)
	mth = re.search(r"\b(jan(?:uary)?\|feb(?:ruary)?\|mar(?:ch)?\|apr(?:il)?\|may\|jun(?:e)?\|jul(?:y)?\|aug(?:ust)?\|"
	r"sep(?:t(?:ember)?)?\|oct(?:ober)?\|nov(?:ember)?\|dec(?:ember)?)\b(?:\s+(\d{4}))?", s)
	if mth:
	mon = _MONTHS.get(mth.group(1)[:3].lower()); yy = int(mth.group(2)) if mth.group(2) else today.year
	if mon: f,l = _first_last_of_month(yy, mon); return f,l, f"{mth.group(1).capitalize()} {yy}"
	return None, None, None

	# ----------------- Timestamp helper -----------------
	def _ts_series(df: pd.DataFrame) -> pd.Series:
	if df is None or df.empty:
	return pd.to_datetime(pd.Series([], dtype="object"), errors="coerce")
	df.columns = [str(c) for c in df.columns]
	lower = {c.lower(): c for c in df.columns}
	for key in ("timestamp", "date", "created_at", "datetime", "time"):
	if key in lower:
	return pd.to_datetime(df[lower[key]], errors="coerce")
	if "date" in lower and "time" in lower:
	return pd.to_datetime(df[lower["date"]].astype(str) + " " + df[lower["time"]].astype(str), errors="coerce")
	return pd.to_datetime(pd.Series([None] * len(df)), errors="coerce")

	# ----------------- Load + normalize (anonymous READ) -----------------
	def load_df():
	"""
	Return (df, resolved_path, note). Uses anonymous downloads (token=None) since repo is public.
	"""
	# Try configured path first
	try:
	local = hf_hub_download(repo_id=REPO_ID, filename=PATH_IN_REPO, repo_type=REPO_TYPE, token=READ_TOKEN)
	resolved = PATH_IN_REPO
	note = ""
	except Exception:
	# Fallback: pick first JSONL in repo
	files = api_anon.list_repo_files(REPO_ID, repo_type=REPO_TYPE)
	jsonls = [f for f in files if f.lower().endswith(".jsonl")]
	if not jsonls:
	return pd.DataFrame(), None, f"No JSONL files found in {REPO_ID}."
	resolved = sorted(jsonls)[0]
	local = hf_hub_download(repo_id=REPO_ID, filename=resolved, repo_type=REPO_TYPE, token=READ_TOKEN)
	note = f"Configured '{PATH_IN_REPO}' not found; using '{resolved}'."

	# Read JSONL robustly
	try:
	df = pd.read_json(local, lines=True)
	except ValueError:
	rows = []
	with open(local, "r", encoding="utf-8") as f:
	for line in f:
	line=line.strip()
	if not line: continue
	try: rows.append(json.loads(line))
	except: pass
	df = pd.DataFrame(rows)

	# Normalize columns
	cols_l = {str(c).lower(): c for c in df.columns}
	if "name" not in df.columns:
	if "username" in cols_l: df["name"] = df[cols_l["username"]]
	elif "sender" in cols_l: df["name"] = df[cols_l["sender"]]
	else: df["name"] = ""
	if "date" not in df.columns:
	if "timestamp" in cols_l:
	df["date"] = pd.to_datetime(df[cols_l["timestamp"]], errors="coerce").dt.date.astype("string")
	else:
	df["date"] = ""
	if "check_in" not in df.columns:
	for c in ["valid check-in","text","status","message","msg"]:
	if c in cols_l: df["check_in"] = df[cols_l[c]].astype("string"); break
	if "check_in" not in df.columns: df["check_in"] = ""
	if "Msg" not in df.columns:
	if "msg" in cols_l: df["Msg"] = df[cols_l["msg"]].astype("string")
	else: df["Msg"] = df.get("summary", df.get("check_in","")).astype("string")
	if "summary" not in df.columns: df["summary"] = ""
	df["Timestamp"] = _ts_series(df)

	return df.fillna(""), resolved, note

	# ----------------- Summaries (from Msg) -----------------
	_STOP = {
	"the","and","for","you","with","that","this","from","have","are","was","were",
	"your","but","not","into","about","then","they","them","our","out","over","under",
	"there","to","in","of","on","a","an","as","at","by","it","is","be","or","if","we",
	"i","me","my","today","todays","today’s","today's"
	}
	def _top_terms(text, k=5):
	toks = re.findall(r"[a-zA-Z]{3,}", text.lower())
	toks = [t for t in toks if t not in _STOP]
	return [w for w,_ in Counter(toks).most_common(k)]

	def sentence_summary(df: pd.DataFrame, time_label: str, selected_name: str \| None):
	if df.empty: return "No matches."
	if "Msg" not in df.columns: df["Msg"] = ""
	if "name" not in df.columns: df["name"] = ""
	def topics(grp):
	text = " ".join(grp["Msg"].astype(str))
	words = _top_terms(text, k=5)
	return ", ".join(words) if words else "various tasks"
	if selected_name and selected_name != "(All)":
	grp = df[df["name"].astype(str).str.strip().str.lower()==selected_name.strip().lower()]
	grp = grp if not grp.empty else df
	return f"{selected_name} worked on {topics(grp)} during {time_label}."
	lines=[]
	for person, grp in df.groupby("name"):
	person = str(person).strip()
	if not person: continue
	lines.append(f"- {person} worked on {topics(grp)} during {time_label}.")
	return "\n".join(lines) if lines else "No named contributors."

	# ----------------- Search helpers -----------------
	def list_names(df: pd.DataFrame):
	vals = sorted([x for x in df.get("name", pd.Series([], dtype=str)).dropna().unique() if str(x).strip()])
	return ["(All)"] + vals

	def run_search(name, questions, dstart, dend, use_time, tstart, tend):
	df, resolved, note = load_df()
	res = df.copy()
	time_label = "all time"

	ts_all = pd.to_datetime(res["Timestamp"], errors="coerce")
	s,e,label = parse_when_text(questions)
	if s and e:
	res = res[(ts_all.dt.date >= s) & (ts_all.dt.date <= e)]
	time_label = label or f"{s} → {e}"
	elif dstart and dend:
	try:
	d0 = pd.to_datetime(dstart).date()
	d1 = pd.to_datetime(dend).date()
	res = res[(ts_all.dt.date >= d0) & (ts_all.dt.date <= d1)]
	time_label = f"{d0} → {d1}"
	except Exception:
	pass

	if name and name != "(All)":
	res = res[res["name"].astype(str).str.strip().str.lower()==name.strip().lower()]

	if use_time:
	try:
	h0,m0 = map(int,(tstart or "00:00").split(":"))
	h1,m1 = map(int,(tend or "23:59").split(":"))
	t0,t1 = time(h0,m0), time(h1,m1)
	tt = pd.to_datetime(res["Timestamp"], errors="coerce").dt.time
	res = res[(tt >= t0) & (tt <= t1)]
	except Exception:
	pass

	show = [c for c in ["name","date","Msg","check_in","summary","Timestamp"] if c in res.columns]
	table = res[show].reset_index(drop=True)
	sent = sentence_summary(res, time_label, selected_name=name if name!="(All)" else None)
	status = f"Dataset: `{REPO_ID}` \nFile: `{resolved or PATH_IN_REPO}`"
	if note: status += f"\n\n> {note}"
	return table, sent, status, list_names(df)

	# ----------------- UI -----------------
	with gr.Blocks(css=CSS, theme="soft") as demo:
	# NAV + HERO
	with gr.Row(elem_classes=["navbar"]):
	gr.HTML("<div class='brand'>MVP Manager</div>")
	gr.HTML("<div class='navlinks'><a href='#search'>Search</a><a href='#about'>About</a></div>")

	gr.HTML("""
	<div class='hero'>
	<h1>Weekly progress, at a glance</h1>
	<p>Pick a name and ask a question (e.g., <em>What was worked on in August?</em>, <em>last month</em>, <em>this week</em>). Summaries come from the <code>Msg</code> field.</p>
	</div>
	""")

	# KPIs (autofilled on load and after searches)
	with gr.Row():
	k_rows = gr.HTML("<div class='kpi'><div class='n'>–</div><div class='l'>Rows (filtered)</div></div>")
	k_people = gr.HTML("<div class='kpi'><div class='n'>–</div><div class='l'>Contributors</div></div>")
	k_time = gr.HTML("<div class='kpi'><div class='n'>–</div><div class='l'>Time window</div></div>")

	with gr.Tabs():
	with gr.TabItem("🔎 Search", id="search"):
	with gr.Row():
	with gr.Column(scale=1):
	names = gr.Dropdown(choices=["(All)"], value="(All)", label="Name")
	dstart = gr.Textbox(label="Start date (YYYY-MM-DD, fallback)", value="")
	dend = gr.Textbox(label="End date (YYYY-MM-DD, fallback)", value="")
	use_time = gr.Checkbox(label="Filter by time of day", value=False)
	tstart = gr.Textbox(label="Start time (HH:MM)", value="00:00")
	tend = gr.Textbox(label="End time (HH:MM)", value="23:59")
	questions = gr.Textbox(
	label="Questions",
	placeholder="Try: 'What was worked on in August?', 'last month', 'this week', 'yesterday'",
	value=""
	)
	btn_search = gr.Button("Search", variant="primary")
	with gr.Column(scale=2):
	out_table = gr.Dataframe(label="Results")
	out_summary = gr.Markdown(label="Summary")
	out_status = gr.Markdown(label="Status / File")

	# Do search
	def do_search(name, dstart, dend, use_time, tstart, tend, questions):
	table, sent, status, names_choices = run_search(name, questions, dstart, dend, use_time, tstart, tend)
	rows = len(table)
	people = int(table["name"].nunique() if "name" in table.columns else 0)
	time_label = "all time"
	m = re.search(r"during \\(.+?)\\", sent)
	if m: time_label = m.group(1)
	k1 = f"<div class='kpi'><div class='n'>{rows}</div><div class='l'>Rows (filtered)</div></div>"
	k2 = f"<div class='kpi'><div class='n'>{people}</div><div class='l'>Contributors</div></div>"
	k3 = f"<div class='kpi'><div class='n'>{time_label}</div><div class='l'>Time window</div></div>"
	return table, sent, status, gr.update(choices=names_choices), gr.update(value=k1), gr.update(value=k2), gr.update(value=k3)

	btn_search.click(
	fn=do_search,
	inputs=[names, dstart, dend, use_time, tstart, tend, questions],
	outputs=[out_table, out_summary, out_status, names, k_rows, k_people, k_time]
	)

	with gr.TabItem("ℹ️ About", id="about"):
	gr.Markdown(
	f"""
	Dataset: `{REPO_ID}`
	File: `{PATH_IN_REPO}`
	Reads are anonymous (public dataset).
	Natural-time examples: August, last month, this week, yesterday.
	"""
	)

	gr.HTML("<div class='footer'>© MVP Manager • Built on Hugging Face Spaces</div>")

	# ---------- Auto-run on page load: populate names & initial results ----------
	def init_page():
	# Names
	df, _, _ = load_df()
	names_choices = list_names(df)

	# Initial search: All names, no manual dates, no time filter, empty question
	table, sent, status, _ = run_search("(All)", "", "", "", False, "00:00", "23:59")
	rows = len(table)
	people = int(table["name"].nunique() if "name" in table.columns else 0)
	time_label = "all time"
	m = re.search(r"during \\(.+?)\\", sent)
	if m: time_label = m.group(1)
	k1 = f"<div class='kpi'><div class='n'>{rows}</div><div class='l'>Rows (filtered)</div></div>"
	k2 = f"<div class='kpi'><div class='n'>{people}</div><div class='l'>Contributors</div></div>"
	k3 = f"<div class='kpi'><div class='n'>{time_label}</div><div class='l'>Time window</div></div>"
	return (
	gr.update(choices=names_choices, value="(All)"),
	table, sent, status,
	gr.update(value=k1), gr.update(value=k2), gr.update(value=k3),
	)

	demo.load(
	init_page,
	outputs=[names, out_table, out_summary, out_status, k_rows, k_people, k_time]
	)

	if __name__ == "__main__":
	demo.launch()