Spaces:
Sleeping
Sleeping
| # Gradio Space: Website-style UI + natural-time search (Questions) + MSG sentence summaries | |
| import os, json, re | |
| from collections import Counter | |
| from datetime import date, timedelta, time | |
| import gradio as gr | |
| import pandas as pd | |
| from huggingface_hub import HfApi, hf_hub_download | |
| # ----------------- CONFIG ----------------- | |
| REPO_ID = os.getenv("REPO_ID", "DetectiveShadow/Check_in_Data") | |
| REPO_TYPE = "dataset" | |
| PATH_IN_REPO = os.getenv("PATH_IN_REPO", "checkins_full_clean.jsonl") | |
| # Public dataset → force fully anonymous READS (prevents accidental 401s) | |
| READ_TOKEN = None | |
| api_anon = HfApi(token=None) | |
| # ----------------- Styling (black text, simple site look) ----------------- | |
| CSS = """ | |
| :root, [data-theme], [data-theme="light"], [data-theme="dark"] { | |
| --brand:#6C5CE7; | |
| --ink:#fff; | |
| --muted:#9ca3af; | |
| /* Gradio background vars */ | |
| --body-background-fill:#000000; | |
| --background-fill-primary:#000000; | |
| --background-fill-secondary:#000000; | |
| --block-background-fill:#000000; | |
| --color-background-primary:#000000; | |
| } | |
| /* Global dark */ | |
| html, body, .gradio-container { background:#000 !important; color:#fff !important; } | |
| label, .prose, .markdown-body, .markdown-body * { color:#fff !important; } | |
| a { color:#fff !important; text-decoration:underline; } | |
| .gradio-container { max-width:1200px !important; margin:0 auto !important; } | |
| /* Top nav + links */ | |
| .navbar { display:flex; align-items:center; justify-content:space-between; padding:14px 8px; } | |
| .brand { font-weight:800; font-size:20px; letter-spacing:0.3px; color:#fff !important; } | |
| .navlinks a { margin-left:16px; color:#fff !important; font-weight:600; text-decoration:none; } | |
| /* Hero: subtle dark gradient */ | |
| .hero { | |
| border-radius:18px; padding:28px; | |
| background:linear-gradient(135deg,#0b0f19, #111827) !important; | |
| border:1px solid #1f2937; margin-bottom:18px; color:#fff !important; | |
| } | |
| .hero h1 { font-size:28px; margin:0 0 8px 0; color:#fff !important; } | |
| .hero p { color:#e5e7eb !important; } | |
| /* KPI cards */ | |
| .kpi { | |
| background:#0f172a !important; border:1px solid #1f2937; border-radius:16px; padding:16px; | |
| color:#fff !important; | |
| } | |
| .kpi .n { font-size:28px; font-weight:800; line-height:1; } | |
| .kpi .l { color:#9ca3af !important; font-size:12px; margin-top:6px } | |
| /* Inputs / dropdowns */ | |
| input, textarea, select { | |
| background:#0a0a0a !important; color:#fff !important; border:1px solid #2a2a2a !important; | |
| } | |
| ::placeholder { color:#9ca3af !important; } | |
| /* Buttons */ | |
| button, .btn { | |
| background:var(--brand) !important; color:#fff !important; border:1px solid transparent !important; | |
| padding:10px 14px; border-radius:10px; font-weight:700; | |
| } | |
| /* Tables */ | |
| table, th, td { background:#0b0b0b !important; color:#fff !important; border-color:#2a2a2a !important; } | |
| thead th { background:#111 !important; } | |
| /* Panels / tabs (force dark) */ | |
| .block, .row, .column, .panel, .form, .tabs, .tabitem, .compact, .wrap { | |
| background:#000 !important; color:#fff !important; | |
| } | |
| /* Footer */ | |
| .footer { color:#9ca3af !important; font-size:12px; text-align:center; margin-top:24px; padding:12px 0 4px; } | |
| """ | |
| # ----------------- Natural-time parsing ----------------- | |
| _MONTHS = { | |
| "january":1,"jan":1,"february":2,"feb":2,"march":3,"mar":3,"april":4,"apr":4, | |
| "may":5,"june":6,"jun":6,"july":7,"jul":7,"august":8,"aug":8,"september":9,"sep":9,"sept":9, | |
| "october":10,"oct":10,"november":11,"nov":11,"december":12,"dec":12 | |
| } | |
| def _first_last_of_month(y:int, m:int): | |
| first = date(y, m, 1) | |
| last = date(y+1,1,1) - timedelta(days=1) if m==12 else date(y, m+1, 1) - timedelta(days=1) | |
| return first, last | |
| def _week_bounds(d: date): | |
| start = d - timedelta(days=d.weekday()) | |
| return start, start + timedelta(days=6) | |
| def parse_when_text(s: str): | |
| if not s or not s.strip(): return None, None, None | |
| s = s.strip().lower() | |
| today = date.today() | |
| if re.search(r"\btoday\b", s): return today, today, "today" | |
| if re.search(r"\byesterday\b", s): y=today-timedelta(days=1); return y, y, "yesterday" | |
| if re.search(r"\blast\s+week\b", s): | |
| w0,_ = _week_bounds(today); s1=w0-timedelta(days=7); e1=w0-timedelta(days=1); return s1, e1, "last week" | |
| if re.search(r"\bthis\s+week\b", s): | |
| w0,w1 = _week_bounds(today); return w0, w1, "this week" | |
| if re.search(r"\blast\s+month\b", s): | |
| y, m = today.year, today.month-1 or 12 | |
| if today.month==1: y -= 1 | |
| f,l = _first_last_of_month(y,m); return f,l,"last month" | |
| if re.search(r"\bthis\s+month\b", s): | |
| f,l = _first_last_of_month(today.year, today.month); return f,l,"this month" | |
| m = re.search(r"\blast\s+(\d+)\s+(day|days|week|weeks|month|months)\b", s) | |
| if m: | |
| n = int(m.group(1)); unit = m.group(2) | |
| if "day" in unit: start=today-timedelta(days=n-1); return start, today, f"last {n} days" | |
| if "week" in unit: start=today-timedelta(days=7*n-1); return start, today, f"last {n} weeks" | |
| if "month" in unit: | |
| y, mm = today.year, today.month | |
| for _ in range(n-1): y, mm = (y-1,12) if mm==1 else (y, mm-1) | |
| f,_ = _first_last_of_month(y, mm); return f, today, f"last {n} months" | |
| yhit = re.search(r"\b(20\d{2}|19\d{2})\b", s) | |
| if yhit and ("in " in s or len(s.strip())==4): | |
| y = int(yhit.group(1)); return date(y,1,1), date(y,12,31), str(y) | |
| mth = re.search(r"\b(jan(?:uary)?|feb(?:ruary)?|mar(?:ch)?|apr(?:il)?|may|jun(?:e)?|jul(?:y)?|aug(?:ust)?|" | |
| r"sep(?:t(?:ember)?)?|oct(?:ober)?|nov(?:ember)?|dec(?:ember)?)\b(?:\s+(\d{4}))?", s) | |
| if mth: | |
| mon = _MONTHS.get(mth.group(1)[:3].lower()); yy = int(mth.group(2)) if mth.group(2) else today.year | |
| if mon: f,l = _first_last_of_month(yy, mon); return f,l, f"{mth.group(1).capitalize()} {yy}" | |
| return None, None, None | |
| # ----------------- Timestamp helper ----------------- | |
| def _ts_series(df: pd.DataFrame) -> pd.Series: | |
| if df is None or df.empty: | |
| return pd.to_datetime(pd.Series([], dtype="object"), errors="coerce") | |
| df.columns = [str(c) for c in df.columns] | |
| lower = {c.lower(): c for c in df.columns} | |
| for key in ("timestamp", "date", "created_at", "datetime", "time"): | |
| if key in lower: | |
| return pd.to_datetime(df[lower[key]], errors="coerce") | |
| if "date" in lower and "time" in lower: | |
| return pd.to_datetime(df[lower["date"]].astype(str) + " " + df[lower["time"]].astype(str), errors="coerce") | |
| return pd.to_datetime(pd.Series([None] * len(df)), errors="coerce") | |
| # ----------------- Load + normalize (anonymous READ) ----------------- | |
| def load_df(): | |
| """ | |
| Return (df, resolved_path, note). Uses anonymous downloads (token=None) since repo is public. | |
| """ | |
| # Try configured path first | |
| try: | |
| local = hf_hub_download(repo_id=REPO_ID, filename=PATH_IN_REPO, repo_type=REPO_TYPE, token=READ_TOKEN) | |
| resolved = PATH_IN_REPO | |
| note = "" | |
| except Exception: | |
| # Fallback: pick first JSONL in repo | |
| files = api_anon.list_repo_files(REPO_ID, repo_type=REPO_TYPE) | |
| jsonls = [f for f in files if f.lower().endswith(".jsonl")] | |
| if not jsonls: | |
| return pd.DataFrame(), None, f"No JSONL files found in {REPO_ID}." | |
| resolved = sorted(jsonls)[0] | |
| local = hf_hub_download(repo_id=REPO_ID, filename=resolved, repo_type=REPO_TYPE, token=READ_TOKEN) | |
| note = f"Configured '{PATH_IN_REPO}' not found; using '{resolved}'." | |
| # Read JSONL robustly | |
| try: | |
| df = pd.read_json(local, lines=True) | |
| except ValueError: | |
| rows = [] | |
| with open(local, "r", encoding="utf-8") as f: | |
| for line in f: | |
| line=line.strip() | |
| if not line: continue | |
| try: rows.append(json.loads(line)) | |
| except: pass | |
| df = pd.DataFrame(rows) | |
| # Normalize columns | |
| cols_l = {str(c).lower(): c for c in df.columns} | |
| if "name" not in df.columns: | |
| if "username" in cols_l: df["name"] = df[cols_l["username"]] | |
| elif "sender" in cols_l: df["name"] = df[cols_l["sender"]] | |
| else: df["name"] = "" | |
| if "date" not in df.columns: | |
| if "timestamp" in cols_l: | |
| df["date"] = pd.to_datetime(df[cols_l["timestamp"]], errors="coerce").dt.date.astype("string") | |
| else: | |
| df["date"] = "" | |
| if "check_in" not in df.columns: | |
| for c in ["valid check-in","text","status","message","msg"]: | |
| if c in cols_l: df["check_in"] = df[cols_l[c]].astype("string"); break | |
| if "check_in" not in df.columns: df["check_in"] = "" | |
| if "Msg" not in df.columns: | |
| if "msg" in cols_l: df["Msg"] = df[cols_l["msg"]].astype("string") | |
| else: df["Msg"] = df.get("summary", df.get("check_in","")).astype("string") | |
| if "summary" not in df.columns: df["summary"] = "" | |
| df["Timestamp"] = _ts_series(df) | |
| return df.fillna(""), resolved, note | |
| # ----------------- Summaries (from Msg) ----------------- | |
| _STOP = { | |
| "the","and","for","you","with","that","this","from","have","are","was","were", | |
| "your","but","not","into","about","then","they","them","our","out","over","under", | |
| "there","to","in","of","on","a","an","as","at","by","it","is","be","or","if","we", | |
| "i","me","my","today","todays","today’s","today's" | |
| } | |
| def _top_terms(text, k=5): | |
| toks = re.findall(r"[a-zA-Z]{3,}", text.lower()) | |
| toks = [t for t in toks if t not in _STOP] | |
| return [w for w,_ in Counter(toks).most_common(k)] | |
| def sentence_summary(df: pd.DataFrame, time_label: str, selected_name: str | None): | |
| if df.empty: return "No matches." | |
| if "Msg" not in df.columns: df["Msg"] = "" | |
| if "name" not in df.columns: df["name"] = "" | |
| def topics(grp): | |
| text = " ".join(grp["Msg"].astype(str)) | |
| words = _top_terms(text, k=5) | |
| return ", ".join(words) if words else "various tasks" | |
| if selected_name and selected_name != "(All)": | |
| grp = df[df["name"].astype(str).str.strip().str.lower()==selected_name.strip().lower()] | |
| grp = grp if not grp.empty else df | |
| return f"**{selected_name}** worked on {topics(grp)} during **{time_label}**." | |
| lines=[] | |
| for person, grp in df.groupby("name"): | |
| person = str(person).strip() | |
| if not person: continue | |
| lines.append(f"- **{person}** worked on {topics(grp)} during **{time_label}**.") | |
| return "\n".join(lines) if lines else "No named contributors." | |
| # ----------------- Search helpers ----------------- | |
| def list_names(df: pd.DataFrame): | |
| vals = sorted([x for x in df.get("name", pd.Series([], dtype=str)).dropna().unique() if str(x).strip()]) | |
| return ["(All)"] + vals | |
| def run_search(name, questions, dstart, dend, use_time, tstart, tend): | |
| df, resolved, note = load_df() | |
| res = df.copy() | |
| time_label = "all time" | |
| ts_all = pd.to_datetime(res["Timestamp"], errors="coerce") | |
| s,e,label = parse_when_text(questions) | |
| if s and e: | |
| res = res[(ts_all.dt.date >= s) & (ts_all.dt.date <= e)] | |
| time_label = label or f"{s} → {e}" | |
| elif dstart and dend: | |
| try: | |
| d0 = pd.to_datetime(dstart).date() | |
| d1 = pd.to_datetime(dend).date() | |
| res = res[(ts_all.dt.date >= d0) & (ts_all.dt.date <= d1)] | |
| time_label = f"{d0} → {d1}" | |
| except Exception: | |
| pass | |
| if name and name != "(All)": | |
| res = res[res["name"].astype(str).str.strip().str.lower()==name.strip().lower()] | |
| if use_time: | |
| try: | |
| h0,m0 = map(int,(tstart or "00:00").split(":")) | |
| h1,m1 = map(int,(tend or "23:59").split(":")) | |
| t0,t1 = time(h0,m0), time(h1,m1) | |
| tt = pd.to_datetime(res["Timestamp"], errors="coerce").dt.time | |
| res = res[(tt >= t0) & (tt <= t1)] | |
| except Exception: | |
| pass | |
| show = [c for c in ["name","date","Msg","check_in","summary","Timestamp"] if c in res.columns] | |
| table = res[show].reset_index(drop=True) | |
| sent = sentence_summary(res, time_label, selected_name=name if name!="(All)" else None) | |
| status = f"**Dataset:** `{REPO_ID}` \n**File:** `{resolved or PATH_IN_REPO}`" | |
| if note: status += f"\n\n> {note}" | |
| return table, sent, status, list_names(df) | |
| # ----------------- UI ----------------- | |
| with gr.Blocks(css=CSS, theme="soft") as demo: | |
| # NAV + HERO | |
| with gr.Row(elem_classes=["navbar"]): | |
| gr.HTML("<div class='brand'>MVP Manager</div>") | |
| gr.HTML("<div class='navlinks'><a href='#search'>Search</a><a href='#about'>About</a></div>") | |
| gr.HTML(""" | |
| <div class='hero'> | |
| <h1>Weekly progress, at a glance</h1> | |
| <p>Pick a name and ask a question (e.g., <em>What was worked on in August?</em>, <em>last month</em>, <em>this week</em>). Summaries come from the <code>Msg</code> field.</p> | |
| </div> | |
| """) | |
| # KPIs (autofilled on load and after searches) | |
| with gr.Row(): | |
| k_rows = gr.HTML("<div class='kpi'><div class='n'>–</div><div class='l'>Rows (filtered)</div></div>") | |
| k_people = gr.HTML("<div class='kpi'><div class='n'>–</div><div class='l'>Contributors</div></div>") | |
| k_time = gr.HTML("<div class='kpi'><div class='n'>–</div><div class='l'>Time window</div></div>") | |
| with gr.Tabs(): | |
| with gr.TabItem("🔎 Search", id="search"): | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| names = gr.Dropdown(choices=["(All)"], value="(All)", label="Name") | |
| dstart = gr.Textbox(label="Start date (YYYY-MM-DD, fallback)", value="") | |
| dend = gr.Textbox(label="End date (YYYY-MM-DD, fallback)", value="") | |
| use_time = gr.Checkbox(label="Filter by time of day", value=False) | |
| tstart = gr.Textbox(label="Start time (HH:MM)", value="00:00") | |
| tend = gr.Textbox(label="End time (HH:MM)", value="23:59") | |
| questions = gr.Textbox( | |
| label="Questions", | |
| placeholder="Try: 'What was worked on in August?', 'last month', 'this week', 'yesterday'", | |
| value="" | |
| ) | |
| btn_search = gr.Button("Search", variant="primary") | |
| with gr.Column(scale=2): | |
| out_table = gr.Dataframe(label="Results") | |
| out_summary = gr.Markdown(label="Summary") | |
| out_status = gr.Markdown(label="Status / File") | |
| # Do search | |
| def do_search(name, dstart, dend, use_time, tstart, tend, questions): | |
| table, sent, status, names_choices = run_search(name, questions, dstart, dend, use_time, tstart, tend) | |
| rows = len(table) | |
| people = int(table["name"].nunique() if "name" in table.columns else 0) | |
| time_label = "all time" | |
| m = re.search(r"during \*\*(.+?)\*\*", sent) | |
| if m: time_label = m.group(1) | |
| k1 = f"<div class='kpi'><div class='n'>{rows}</div><div class='l'>Rows (filtered)</div></div>" | |
| k2 = f"<div class='kpi'><div class='n'>{people}</div><div class='l'>Contributors</div></div>" | |
| k3 = f"<div class='kpi'><div class='n'>{time_label}</div><div class='l'>Time window</div></div>" | |
| return table, sent, status, gr.update(choices=names_choices), gr.update(value=k1), gr.update(value=k2), gr.update(value=k3) | |
| btn_search.click( | |
| fn=do_search, | |
| inputs=[names, dstart, dend, use_time, tstart, tend, questions], | |
| outputs=[out_table, out_summary, out_status, names, k_rows, k_people, k_time] | |
| ) | |
| with gr.TabItem("ℹ️ About", id="about"): | |
| gr.Markdown( | |
| f""" | |
| **Dataset**: `{REPO_ID}` | |
| **File**: `{PATH_IN_REPO}` | |
| Reads are anonymous (public dataset). | |
| Natural-time examples: *August*, *last month*, *this week*, *yesterday*. | |
| """ | |
| ) | |
| gr.HTML("<div class='footer'>© MVP Manager • Built on Hugging Face Spaces</div>") | |
| # ---------- Auto-run on page load: populate names & initial results ---------- | |
| def init_page(): | |
| # Names | |
| df, _, _ = load_df() | |
| names_choices = list_names(df) | |
| # Initial search: All names, no manual dates, no time filter, empty question | |
| table, sent, status, _ = run_search("(All)", "", "", "", False, "00:00", "23:59") | |
| rows = len(table) | |
| people = int(table["name"].nunique() if "name" in table.columns else 0) | |
| time_label = "all time" | |
| m = re.search(r"during \*\*(.+?)\*\*", sent) | |
| if m: time_label = m.group(1) | |
| k1 = f"<div class='kpi'><div class='n'>{rows}</div><div class='l'>Rows (filtered)</div></div>" | |
| k2 = f"<div class='kpi'><div class='n'>{people}</div><div class='l'>Contributors</div></div>" | |
| k3 = f"<div class='kpi'><div class='n'>{time_label}</div><div class='l'>Time window</div></div>" | |
| return ( | |
| gr.update(choices=names_choices, value="(All)"), | |
| table, sent, status, | |
| gr.update(value=k1), gr.update(value=k2), gr.update(value=k3), | |
| ) | |
| demo.load( | |
| init_page, | |
| outputs=[names, out_table, out_summary, out_status, k_rows, k_people, k_time] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |