Demo-MVP-Manger / app.py
DetectiveShadow's picture
Update app.py
31c3151 verified
# Gradio Space: Website-style UI + natural-time search (Questions) + MSG sentence summaries
import os, json, re
from collections import Counter
from datetime import date, timedelta, time
import gradio as gr
import pandas as pd
from huggingface_hub import HfApi, hf_hub_download
# ----------------- CONFIG -----------------
REPO_ID = os.getenv("REPO_ID", "DetectiveShadow/Check_in_Data")
REPO_TYPE = "dataset"
PATH_IN_REPO = os.getenv("PATH_IN_REPO", "checkins_full_clean.jsonl")
# Public dataset → force fully anonymous READS (prevents accidental 401s)
READ_TOKEN = None
api_anon = HfApi(token=None)
# ----------------- Styling (black text, simple site look) -----------------
CSS = """
:root, [data-theme], [data-theme="light"], [data-theme="dark"] {
--brand:#6C5CE7;
--ink:#fff;
--muted:#9ca3af;
/* Gradio background vars */
--body-background-fill:#000000;
--background-fill-primary:#000000;
--background-fill-secondary:#000000;
--block-background-fill:#000000;
--color-background-primary:#000000;
}
/* Global dark */
html, body, .gradio-container { background:#000 !important; color:#fff !important; }
label, .prose, .markdown-body, .markdown-body * { color:#fff !important; }
a { color:#fff !important; text-decoration:underline; }
.gradio-container { max-width:1200px !important; margin:0 auto !important; }
/* Top nav + links */
.navbar { display:flex; align-items:center; justify-content:space-between; padding:14px 8px; }
.brand { font-weight:800; font-size:20px; letter-spacing:0.3px; color:#fff !important; }
.navlinks a { margin-left:16px; color:#fff !important; font-weight:600; text-decoration:none; }
/* Hero: subtle dark gradient */
.hero {
border-radius:18px; padding:28px;
background:linear-gradient(135deg,#0b0f19, #111827) !important;
border:1px solid #1f2937; margin-bottom:18px; color:#fff !important;
}
.hero h1 { font-size:28px; margin:0 0 8px 0; color:#fff !important; }
.hero p { color:#e5e7eb !important; }
/* KPI cards */
.kpi {
background:#0f172a !important; border:1px solid #1f2937; border-radius:16px; padding:16px;
color:#fff !important;
}
.kpi .n { font-size:28px; font-weight:800; line-height:1; }
.kpi .l { color:#9ca3af !important; font-size:12px; margin-top:6px }
/* Inputs / dropdowns */
input, textarea, select {
background:#0a0a0a !important; color:#fff !important; border:1px solid #2a2a2a !important;
}
::placeholder { color:#9ca3af !important; }
/* Buttons */
button, .btn {
background:var(--brand) !important; color:#fff !important; border:1px solid transparent !important;
padding:10px 14px; border-radius:10px; font-weight:700;
}
/* Tables */
table, th, td { background:#0b0b0b !important; color:#fff !important; border-color:#2a2a2a !important; }
thead th { background:#111 !important; }
/* Panels / tabs (force dark) */
.block, .row, .column, .panel, .form, .tabs, .tabitem, .compact, .wrap {
background:#000 !important; color:#fff !important;
}
/* Footer */
.footer { color:#9ca3af !important; font-size:12px; text-align:center; margin-top:24px; padding:12px 0 4px; }
"""
# ----------------- Natural-time parsing -----------------
_MONTHS = {
"january":1,"jan":1,"february":2,"feb":2,"march":3,"mar":3,"april":4,"apr":4,
"may":5,"june":6,"jun":6,"july":7,"jul":7,"august":8,"aug":8,"september":9,"sep":9,"sept":9,
"october":10,"oct":10,"november":11,"nov":11,"december":12,"dec":12
}
def _first_last_of_month(y:int, m:int):
first = date(y, m, 1)
last = date(y+1,1,1) - timedelta(days=1) if m==12 else date(y, m+1, 1) - timedelta(days=1)
return first, last
def _week_bounds(d: date):
start = d - timedelta(days=d.weekday())
return start, start + timedelta(days=6)
def parse_when_text(s: str):
if not s or not s.strip(): return None, None, None
s = s.strip().lower()
today = date.today()
if re.search(r"\btoday\b", s): return today, today, "today"
if re.search(r"\byesterday\b", s): y=today-timedelta(days=1); return y, y, "yesterday"
if re.search(r"\blast\s+week\b", s):
w0,_ = _week_bounds(today); s1=w0-timedelta(days=7); e1=w0-timedelta(days=1); return s1, e1, "last week"
if re.search(r"\bthis\s+week\b", s):
w0,w1 = _week_bounds(today); return w0, w1, "this week"
if re.search(r"\blast\s+month\b", s):
y, m = today.year, today.month-1 or 12
if today.month==1: y -= 1
f,l = _first_last_of_month(y,m); return f,l,"last month"
if re.search(r"\bthis\s+month\b", s):
f,l = _first_last_of_month(today.year, today.month); return f,l,"this month"
m = re.search(r"\blast\s+(\d+)\s+(day|days|week|weeks|month|months)\b", s)
if m:
n = int(m.group(1)); unit = m.group(2)
if "day" in unit: start=today-timedelta(days=n-1); return start, today, f"last {n} days"
if "week" in unit: start=today-timedelta(days=7*n-1); return start, today, f"last {n} weeks"
if "month" in unit:
y, mm = today.year, today.month
for _ in range(n-1): y, mm = (y-1,12) if mm==1 else (y, mm-1)
f,_ = _first_last_of_month(y, mm); return f, today, f"last {n} months"
yhit = re.search(r"\b(20\d{2}|19\d{2})\b", s)
if yhit and ("in " in s or len(s.strip())==4):
y = int(yhit.group(1)); return date(y,1,1), date(y,12,31), str(y)
mth = re.search(r"\b(jan(?:uary)?|feb(?:ruary)?|mar(?:ch)?|apr(?:il)?|may|jun(?:e)?|jul(?:y)?|aug(?:ust)?|"
r"sep(?:t(?:ember)?)?|oct(?:ober)?|nov(?:ember)?|dec(?:ember)?)\b(?:\s+(\d{4}))?", s)
if mth:
mon = _MONTHS.get(mth.group(1)[:3].lower()); yy = int(mth.group(2)) if mth.group(2) else today.year
if mon: f,l = _first_last_of_month(yy, mon); return f,l, f"{mth.group(1).capitalize()} {yy}"
return None, None, None
# ----------------- Timestamp helper -----------------
def _ts_series(df: pd.DataFrame) -> pd.Series:
if df is None or df.empty:
return pd.to_datetime(pd.Series([], dtype="object"), errors="coerce")
df.columns = [str(c) for c in df.columns]
lower = {c.lower(): c for c in df.columns}
for key in ("timestamp", "date", "created_at", "datetime", "time"):
if key in lower:
return pd.to_datetime(df[lower[key]], errors="coerce")
if "date" in lower and "time" in lower:
return pd.to_datetime(df[lower["date"]].astype(str) + " " + df[lower["time"]].astype(str), errors="coerce")
return pd.to_datetime(pd.Series([None] * len(df)), errors="coerce")
# ----------------- Load + normalize (anonymous READ) -----------------
def load_df():
"""
Return (df, resolved_path, note). Uses anonymous downloads (token=None) since repo is public.
"""
# Try configured path first
try:
local = hf_hub_download(repo_id=REPO_ID, filename=PATH_IN_REPO, repo_type=REPO_TYPE, token=READ_TOKEN)
resolved = PATH_IN_REPO
note = ""
except Exception:
# Fallback: pick first JSONL in repo
files = api_anon.list_repo_files(REPO_ID, repo_type=REPO_TYPE)
jsonls = [f for f in files if f.lower().endswith(".jsonl")]
if not jsonls:
return pd.DataFrame(), None, f"No JSONL files found in {REPO_ID}."
resolved = sorted(jsonls)[0]
local = hf_hub_download(repo_id=REPO_ID, filename=resolved, repo_type=REPO_TYPE, token=READ_TOKEN)
note = f"Configured '{PATH_IN_REPO}' not found; using '{resolved}'."
# Read JSONL robustly
try:
df = pd.read_json(local, lines=True)
except ValueError:
rows = []
with open(local, "r", encoding="utf-8") as f:
for line in f:
line=line.strip()
if not line: continue
try: rows.append(json.loads(line))
except: pass
df = pd.DataFrame(rows)
# Normalize columns
cols_l = {str(c).lower(): c for c in df.columns}
if "name" not in df.columns:
if "username" in cols_l: df["name"] = df[cols_l["username"]]
elif "sender" in cols_l: df["name"] = df[cols_l["sender"]]
else: df["name"] = ""
if "date" not in df.columns:
if "timestamp" in cols_l:
df["date"] = pd.to_datetime(df[cols_l["timestamp"]], errors="coerce").dt.date.astype("string")
else:
df["date"] = ""
if "check_in" not in df.columns:
for c in ["valid check-in","text","status","message","msg"]:
if c in cols_l: df["check_in"] = df[cols_l[c]].astype("string"); break
if "check_in" not in df.columns: df["check_in"] = ""
if "Msg" not in df.columns:
if "msg" in cols_l: df["Msg"] = df[cols_l["msg"]].astype("string")
else: df["Msg"] = df.get("summary", df.get("check_in","")).astype("string")
if "summary" not in df.columns: df["summary"] = ""
df["Timestamp"] = _ts_series(df)
return df.fillna(""), resolved, note
# ----------------- Summaries (from Msg) -----------------
_STOP = {
"the","and","for","you","with","that","this","from","have","are","was","were",
"your","but","not","into","about","then","they","them","our","out","over","under",
"there","to","in","of","on","a","an","as","at","by","it","is","be","or","if","we",
"i","me","my","today","todays","today’s","today's"
}
def _top_terms(text, k=5):
toks = re.findall(r"[a-zA-Z]{3,}", text.lower())
toks = [t for t in toks if t not in _STOP]
return [w for w,_ in Counter(toks).most_common(k)]
def sentence_summary(df: pd.DataFrame, time_label: str, selected_name: str | None):
if df.empty: return "No matches."
if "Msg" not in df.columns: df["Msg"] = ""
if "name" not in df.columns: df["name"] = ""
def topics(grp):
text = " ".join(grp["Msg"].astype(str))
words = _top_terms(text, k=5)
return ", ".join(words) if words else "various tasks"
if selected_name and selected_name != "(All)":
grp = df[df["name"].astype(str).str.strip().str.lower()==selected_name.strip().lower()]
grp = grp if not grp.empty else df
return f"**{selected_name}** worked on {topics(grp)} during **{time_label}**."
lines=[]
for person, grp in df.groupby("name"):
person = str(person).strip()
if not person: continue
lines.append(f"- **{person}** worked on {topics(grp)} during **{time_label}**.")
return "\n".join(lines) if lines else "No named contributors."
# ----------------- Search helpers -----------------
def list_names(df: pd.DataFrame):
vals = sorted([x for x in df.get("name", pd.Series([], dtype=str)).dropna().unique() if str(x).strip()])
return ["(All)"] + vals
def run_search(name, questions, dstart, dend, use_time, tstart, tend):
df, resolved, note = load_df()
res = df.copy()
time_label = "all time"
ts_all = pd.to_datetime(res["Timestamp"], errors="coerce")
s,e,label = parse_when_text(questions)
if s and e:
res = res[(ts_all.dt.date >= s) & (ts_all.dt.date <= e)]
time_label = label or f"{s}{e}"
elif dstart and dend:
try:
d0 = pd.to_datetime(dstart).date()
d1 = pd.to_datetime(dend).date()
res = res[(ts_all.dt.date >= d0) & (ts_all.dt.date <= d1)]
time_label = f"{d0}{d1}"
except Exception:
pass
if name and name != "(All)":
res = res[res["name"].astype(str).str.strip().str.lower()==name.strip().lower()]
if use_time:
try:
h0,m0 = map(int,(tstart or "00:00").split(":"))
h1,m1 = map(int,(tend or "23:59").split(":"))
t0,t1 = time(h0,m0), time(h1,m1)
tt = pd.to_datetime(res["Timestamp"], errors="coerce").dt.time
res = res[(tt >= t0) & (tt <= t1)]
except Exception:
pass
show = [c for c in ["name","date","Msg","check_in","summary","Timestamp"] if c in res.columns]
table = res[show].reset_index(drop=True)
sent = sentence_summary(res, time_label, selected_name=name if name!="(All)" else None)
status = f"**Dataset:** `{REPO_ID}` \n**File:** `{resolved or PATH_IN_REPO}`"
if note: status += f"\n\n> {note}"
return table, sent, status, list_names(df)
# ----------------- UI -----------------
with gr.Blocks(css=CSS, theme="soft") as demo:
# NAV + HERO
with gr.Row(elem_classes=["navbar"]):
gr.HTML("<div class='brand'>MVP Manager</div>")
gr.HTML("<div class='navlinks'><a href='#search'>Search</a><a href='#about'>About</a></div>")
gr.HTML("""
<div class='hero'>
<h1>Weekly progress, at a glance</h1>
<p>Pick a name and ask a question (e.g., <em>What was worked on in August?</em>, <em>last month</em>, <em>this week</em>). Summaries come from the <code>Msg</code> field.</p>
</div>
""")
# KPIs (autofilled on load and after searches)
with gr.Row():
k_rows = gr.HTML("<div class='kpi'><div class='n'>–</div><div class='l'>Rows (filtered)</div></div>")
k_people = gr.HTML("<div class='kpi'><div class='n'>–</div><div class='l'>Contributors</div></div>")
k_time = gr.HTML("<div class='kpi'><div class='n'>–</div><div class='l'>Time window</div></div>")
with gr.Tabs():
with gr.TabItem("🔎 Search", id="search"):
with gr.Row():
with gr.Column(scale=1):
names = gr.Dropdown(choices=["(All)"], value="(All)", label="Name")
dstart = gr.Textbox(label="Start date (YYYY-MM-DD, fallback)", value="")
dend = gr.Textbox(label="End date (YYYY-MM-DD, fallback)", value="")
use_time = gr.Checkbox(label="Filter by time of day", value=False)
tstart = gr.Textbox(label="Start time (HH:MM)", value="00:00")
tend = gr.Textbox(label="End time (HH:MM)", value="23:59")
questions = gr.Textbox(
label="Questions",
placeholder="Try: 'What was worked on in August?', 'last month', 'this week', 'yesterday'",
value=""
)
btn_search = gr.Button("Search", variant="primary")
with gr.Column(scale=2):
out_table = gr.Dataframe(label="Results")
out_summary = gr.Markdown(label="Summary")
out_status = gr.Markdown(label="Status / File")
# Do search
def do_search(name, dstart, dend, use_time, tstart, tend, questions):
table, sent, status, names_choices = run_search(name, questions, dstart, dend, use_time, tstart, tend)
rows = len(table)
people = int(table["name"].nunique() if "name" in table.columns else 0)
time_label = "all time"
m = re.search(r"during \*\*(.+?)\*\*", sent)
if m: time_label = m.group(1)
k1 = f"<div class='kpi'><div class='n'>{rows}</div><div class='l'>Rows (filtered)</div></div>"
k2 = f"<div class='kpi'><div class='n'>{people}</div><div class='l'>Contributors</div></div>"
k3 = f"<div class='kpi'><div class='n'>{time_label}</div><div class='l'>Time window</div></div>"
return table, sent, status, gr.update(choices=names_choices), gr.update(value=k1), gr.update(value=k2), gr.update(value=k3)
btn_search.click(
fn=do_search,
inputs=[names, dstart, dend, use_time, tstart, tend, questions],
outputs=[out_table, out_summary, out_status, names, k_rows, k_people, k_time]
)
with gr.TabItem("ℹ️ About", id="about"):
gr.Markdown(
f"""
**Dataset**: `{REPO_ID}`
**File**: `{PATH_IN_REPO}`
Reads are anonymous (public dataset).
Natural-time examples: *August*, *last month*, *this week*, *yesterday*.
"""
)
gr.HTML("<div class='footer'>© MVP Manager • Built on Hugging Face Spaces</div>")
# ---------- Auto-run on page load: populate names & initial results ----------
def init_page():
# Names
df, _, _ = load_df()
names_choices = list_names(df)
# Initial search: All names, no manual dates, no time filter, empty question
table, sent, status, _ = run_search("(All)", "", "", "", False, "00:00", "23:59")
rows = len(table)
people = int(table["name"].nunique() if "name" in table.columns else 0)
time_label = "all time"
m = re.search(r"during \*\*(.+?)\*\*", sent)
if m: time_label = m.group(1)
k1 = f"<div class='kpi'><div class='n'>{rows}</div><div class='l'>Rows (filtered)</div></div>"
k2 = f"<div class='kpi'><div class='n'>{people}</div><div class='l'>Contributors</div></div>"
k3 = f"<div class='kpi'><div class='n'>{time_label}</div><div class='l'>Time window</div></div>"
return (
gr.update(choices=names_choices, value="(All)"),
table, sent, status,
gr.update(value=k1), gr.update(value=k2), gr.update(value=k3),
)
demo.load(
init_page,
outputs=[names, out_table, out_summary, out_status, k_rows, k_people, k_time]
)
if __name__ == "__main__":
demo.launch()