Spaces:
Sleeping
Sleeping
| import pandas as pd # For working with dataframes | |
| import datasets # To upload out data to HF | |
| import huggingface_hub # For each HF authentication in notebooks | |
| from openai import OpenAI | |
| import os | |
| import numpy as np | |
| import pandas as pd | |
| from sentence_transformers import SentenceTransformer | |
| import numpy as np, math, joblib, warnings | |
| import torch, torch.nn as nn, torch.optim as optim | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.preprocessing import StandardScaler | |
| from sklearn.metrics import mean_absolute_error | |
| from scipy.stats import spearmanr | |
| from datetime import datetime, timezone, timedelta | |
| from sentence_transformers import SentenceTransformer | |
| # load data | |
| ds_dict = datasets.load_dataset("samder03/Project1") | |
| ds = ds_dict["original"] | |
| df = ds.to_pandas() if hasattr(ds, "to_pandas") else ds.copy() | |
| imp_col = "importance (1-10)" | |
| dur_col = "how long it takes (hours)" | |
| hor_col = "when it's due (days)" | |
| # embeddings | |
| from sentence_transformers import SentenceTransformer | |
| embedder = SentenceTransformer("all-MiniLM-L6-v2") # 384-dim | |
| embeddings = embedder.encode(df['task'].astype(str).tolist(), normalize_embeddings=True).astype(np.float32) # (N,384) | |
| df["task_embedding"] = list(embeddings) | |
| def valid_mask(series: pd.Series): | |
| return series.notna() & (series.astype(str).str.strip() != "") | |
| m_imp = valid_mask(df[imp_col]) | |
| m_dur = valid_mask(df[dur_col]) | |
| m_core = m_imp & m_dur | |
| X_all = np.stack(df.loc[m_core, "task_embedding"].values).astype(np.float32) | |
| y_imp = df.loc[m_core, imp_col].astype(float).values | |
| y_dur = df.loc[m_core, dur_col].astype(float).values | |
| if hor_col: | |
| y_hor_raw = df[hor_col].astype(float).values | |
| y_hor_raw[y_hor_raw < 0] = np.nan # -1 => no deadline label | |
| y_hor = y_hor_raw[m_core] | |
| else: | |
| y_hor = None | |
| rng = 42 | |
| idx_all = np.arange(len(X_all)) | |
| # first: carve out 20% for test | |
| train_idx, test_idx = train_test_split(idx_all, test_size=0.20, random_state=rng) | |
| # then: from the remaining 80%, carve out 12.5% for val (=> 10% overall) | |
| train_idx, val_idx = train_test_split(train_idx, test_size=0.125, random_state=rng) | |
| # slice features/labels | |
| X_tr, X_va, X_te = X_all[train_idx], X_all[val_idx], X_all[test_idx] | |
| I_tr, I_va, I_te = y_imp[train_idx], y_imp[val_idx], y_imp[test_idx] | |
| D_tr, D_va, D_te = y_dur[train_idx], y_dur[val_idx], y_dur[test_idx] | |
| # horizon: -1 already converted to NaN upstream | |
| if y_hor is not None: | |
| H_tr_all, H_va_all, H_te_all = y_hor[train_idx], y_hor[val_idx], y_hor[test_idx] | |
| mH_tr = np.isfinite(H_tr_all) # True where horizon label exists | |
| mH_va = np.isfinite(H_va_all) | |
| mH_te = np.isfinite(H_te_all) | |
| else: | |
| H_tr_all = H_va_all = H_te_all = None | |
| mH_tr = mH_va = mH_te = None | |
| # ---------- 5) Scale embeddings (fit on TRAIN only) ---------- | |
| from sklearn.preprocessing import StandardScaler | |
| scaler = StandardScaler() | |
| scaler.fit(X_tr) # fit only on train to avoid leakage | |
| Xs_tr = scaler.transform(X_tr).astype(np.float32) | |
| Xs_va = scaler.transform(X_va).astype(np.float32) | |
| Xs_te = scaler.transform(X_te).astype(np.float32) | |
| # persist for inference | |
| import joblib | |
| joblib.dump(scaler, "mtl_scaler.joblib") | |
| import numpy as np, random | |
| import torch, torch.nn as nn, torch.optim as optim | |
| from scipy.stats import spearmanr | |
| from sklearn.preprocessing import StandardScaler | |
| from sklearn.metrics import mean_absolute_error | |
| # 0) Setup: device, seeds, helper | |
| torch.manual_seed(42); np.random.seed(42); random.seed(42) | |
| if torch.cuda.is_available(): torch.cuda.manual_seed_all(42) | |
| torch.backends.cudnn.benchmark = True | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| def tt(a, dtype=torch.float32): return torch.from_numpy(a).to(device).to(dtype) | |
| def safe_any_mask(m): | |
| return (m is not None) and isinstance(m, torch.Tensor) and m.numel() > 0 and m.any().item() | |
| def safe_spearman(a, b): | |
| r = spearmanr(a, b).correlation | |
| return float('nan') if r is None else float(r) | |
| Xt_tr, Xt_va, Xt_te = tt(Xs_tr), tt(Xs_va), tt(Xs_te) | |
| yI_tr, yI_va, yI_te = tt(I_tr), tt(I_va), tt(I_te) | |
| yD_tr, yD_va, yD_te = tt(D_tr), tt(D_va), tt(D_te) | |
| if y_hor is not None: | |
| H_tr_all_t, H_va_all_t, H_te_all_t = tt(H_tr_all), tt(H_va_all), tt(H_te_all) | |
| mH_tr_t = torch.from_numpy(mH_tr.astype(bool)).to(device) | |
| mH_va_t = torch.from_numpy(mH_va.astype(bool)).to(device) | |
| mH_te_t = torch.from_numpy(mH_te.astype(bool)).to(device) | |
| # 3) Model: Multi-Task MLP (shared trunk + 3 heads) | |
| # Slightly wider trunk; textbook uncertainty weighting (0.5 factor) | |
| class MTLNet(nn.Module): | |
| def __init__(self, d_in, d_hid=512): | |
| super().__init__() | |
| self.trunk = nn.Sequential( | |
| nn.Linear(d_in, d_hid), nn.ReLU(), nn.Dropout(0.2), | |
| nn.Linear(d_hid, 256), nn.ReLU(), nn.Dropout(0.1), | |
| ) | |
| self.head_imp = nn.Linear(256, 1) # importance (raw) | |
| self.head_dur = nn.Linear(256, 1) # log-hours | |
| self.head_hor = nn.Linear(256, 1) # log-days | |
| # homoscedastic uncertainty (log sigma per task) | |
| self.log_sigma_imp = nn.Parameter(torch.tensor(0.0)) | |
| self.log_sigma_dur = nn.Parameter(torch.tensor(0.0)) | |
| self.log_sigma_hor = nn.Parameter(torch.tensor(0.0)) | |
| self._L1 = nn.SmoothL1Loss() | |
| self._MSE = nn.MSELoss() | |
| def forward(self, x): | |
| h = self.trunk(x) | |
| return ( | |
| self.head_imp(h).squeeze(-1), | |
| self.head_dur(h).squeeze(-1), | |
| self.head_hor(h).squeeze(-1), | |
| ) | |
| def multitask_loss(self, xb, yI, yD, yH=None, mH=None): | |
| rI, rD, rH = self(xb) | |
| # importance: SmoothL1 on raw scale | |
| l_imp = self._L1(rI, yI) | |
| # duration: MSE on log1p(hours) | |
| l_dur = self._MSE(rD, torch.log1p(yD)) | |
| loss = 0.5*torch.exp(-self.log_sigma_imp)*l_imp + self.log_sigma_imp \ | |
| + 0.5*torch.exp(-self.log_sigma_dur)*l_dur + self.log_sigma_dur | |
| l_hor_val = None | |
| if (yH is not None) and safe_any_mask(mH): | |
| l_hor = self._MSE(rH[mH], torch.log1p(yH[mH])) | |
| loss = loss + 0.5*torch.exp(-self.log_sigma_hor)*l_hor + self.log_sigma_hor | |
| l_hor_val = float(l_hor.item()) | |
| return loss, (float(l_imp.item()), float(l_dur.item()), l_hor_val) | |
| net = MTLNet(d_in=Xt_tr.shape[1]).to(device) | |
| # 4) Prediction + Eval helpers | |
| def predict_heads(Xt): | |
| net.eval() | |
| rI, rD, rH = net(Xt) | |
| I = torch.clamp(rI, 1.0, 10.0) # importance 1..10 | |
| Hh = torch.expm1(rD).clamp(0.25, 12.0) # hours | |
| Hd = torch.expm1(rH).clamp(0.0, 30.0) # days | |
| return I, Hh, Hd | |
| def eval_block(Xt, yI_true, yD_true, yH_true=None, mH=None): | |
| I, Hh, Hd = predict_heads(Xt) | |
| I_np, H_np, Hd_np = I.detach().cpu().numpy(), Hh.detach().cpu().numpy(), Hd.detach().cpu().numpy() | |
| yI_np, yD_np = yI_true.detach().cpu().numpy(), yD_true.detach().cpu().numpy() | |
| maeI = mean_absolute_error(yI_np, I_np) | |
| maeD = mean_absolute_error(yD_np, H_np) | |
| rhoI = safe_spearman(yI_np, I_np) if len(I_np) > 1 else float('nan') | |
| rhoD = safe_spearman(yD_np, H_np) if len(H_np) > 1 else float('nan') | |
| out = {"maeI": maeI, "maeD": maeD, "rhoI": rhoI, "rhoD": rhoD} | |
| if (yH_true is not None) and (mH is not None) and mH.any().item(): | |
| yH_np, mH_np = yH_true.detach().cpu().numpy(), mH.detach().cpu().numpy().astype(bool) | |
| if mH_np.sum() > 0: | |
| maeH = mean_absolute_error(yH_np[mH_np], Hd_np[mH_np]) | |
| rhoH = safe_spearman(yH_np[mH_np], Hd_np[mH_np]) if mH_np.sum() > 1 else float('nan') | |
| out.update({"maeH": maeH, "rhoH": rhoH}) | |
| return out | |
| # 5) Train loop with per-batch cosine, AMP (new API), early stop | |
| EPOCHS = 120 | |
| BATCH = 64 | |
| best_val = float("inf") | |
| patience = 20 | |
| bad = 0 | |
| opt = optim.AdamW(net.parameters(), lr=2e-4, weight_decay=2e-4) | |
| sched = optim.lr_scheduler.CosineAnnealingWarmRestarts(opt, T_0=60, T_mult=2, eta_min=1e-6) | |
| scaler = torch.amp.GradScaler('cuda', enabled=torch.cuda.is_available()) | |
| n_tr = Xt_tr.shape[0] | |
| for ep in range(1, EPOCHS + 1): | |
| net.train() | |
| order = torch.randperm(n_tr, device=device) | |
| tot_loss = 0.0 | |
| for s in range(0, n_tr, BATCH): | |
| e = min(s + BATCH, n_tr) | |
| idx = order[s:e] | |
| xb, yi, yd = Xt_tr[idx], yI_tr[idx], yD_tr[idx] | |
| yh, mh = (H_tr_all_t[idx], mH_tr_t[idx]) if H_tr_all_t is not None else (None, None) | |
| opt.zero_grad(set_to_none=True) | |
| with torch.autocast('cuda', enabled=torch.cuda.is_available()): | |
| loss, (lI, lD, lH) = net.multitask_loss(xb, yi, yd, yh, mh) | |
| scaler.scale(loss).backward() | |
| scaler.unscale_(opt) | |
| torch.nn.utils.clip_grad_norm_(net.parameters(), max_norm=1.0) | |
| scaler.step(opt); scaler.update() | |
| # per-iteration cosine step (nice smooth LR curve) | |
| progress = (s + BATCH) / max(n_tr, 1) | |
| sched.step((ep - 1) + progress) | |
| tot_loss += float(loss.item()) | |
| # ---- validation ---- | |
| stats_va = eval_block( | |
| Xt_va, yI_va, yD_va, | |
| (H_va_all_t if H_va_all_t is not None else None), | |
| (mH_va_t if mH_va_t is not None else None) | |
| ) | |
| total_val = stats_va["maeI"] + stats_va["maeD"] + (stats_va.get("maeH", 0.0)) | |
| if ep % 5 == 0: | |
| lr_now = opt.param_groups[0]["lr"] | |
| extraH = f" hor={stats_va.get('maeH', float('nan')):.3f}" if "maeH" in stats_va else "" | |
| # ---- early stopping on summed MAE ---- | |
| if total_val < best_val - 1e-4: | |
| best_val = total_val | |
| bad = 0 | |
| torch.save(net.state_dict(), "mtl_net.pt") | |
| else: | |
| bad += 1 | |
| if bad >= patience: | |
| break | |
| # 6) TEST with best checkpoint + final confirmation | |
| net.load_state_dict(torch.load("mtl_net.pt", map_location=device)) | |
| stats_te = eval_block( | |
| Xt_te, yI_te, yD_te, | |
| (H_te_all_t if H_te_all_t is not None else None), | |
| (mH_te_t if mH_te_t is not None else None) | |
| ) | |
| from datetime import datetime, timedelta, timezone | |
| import numpy as np | |
| import pandas as pd | |
| import torch | |
| import joblib | |
| # --- helpers reused by build_priority_todo() --- | |
| def _parse_due_utc(s): | |
| """Parse ISO8601 to tz-aware UTC datetime; return None if empty/invalid.""" | |
| if not isinstance(s, str) or not s.strip(): | |
| return None | |
| try: | |
| dt = datetime.fromisoformat(s.strip()) | |
| if dt.tzinfo is None: | |
| dt = dt.replace(tzinfo=timezone.utc) | |
| else: | |
| dt = dt.astimezone(timezone.utc) | |
| return dt | |
| except Exception: | |
| return None | |
| def _due_from_model(now_utc, h_days): | |
| if h_days is None: return None | |
| try: | |
| if not np.isfinite(h_days): return None | |
| except Exception: | |
| return None | |
| return now_utc + (timedelta(hours=2) if h_days <= 0 else timedelta(days=float(h_days))) | |
| def _due_from_heuristic(now_utc, I, H_hours): | |
| base = (10.0 - float(I)) / 3.0 | |
| size = float(H_hours) / 6.0 | |
| days = float(np.clip(base + size, 0.5, 14.0)) | |
| return now_utc + timedelta(days=days) | |
| def parse_user_due_utc(s, *, local_tz=timezone.utc, eod_hour=23, eod_min=59): | |
| """ | |
| Parse user string -> (dt_utc, had_time) | |
| - accepts ISO and common US formats | |
| - naive -> local_tz | |
| - date-only -> end-of-day local, then convert to UTC | |
| Returns (None, False) if invalid. | |
| """ | |
| if not isinstance(s, str) or not s.strip(): | |
| return None, False | |
| s = s.strip() | |
| # heuristic: did they include a time? | |
| had_time = bool(re.search(r"\d:\d|\dT\d", s)) | |
| # 1) try ISO first | |
| try: | |
| dt = datetime.fromisoformat(s) | |
| if dt.tzinfo is None: | |
| dt = dt.replace(tzinfo=local_tz) | |
| return dt.astimezone(timezone.utc), (had_time or dt.time() != datetime.min.time()) | |
| except Exception: | |
| pass | |
| # 2) date-only patterns | |
| for pat in ("%m/%d/%Y","%m/%d/%y","%Y-%m-%d","%Y/%m/%d","%m-%d-%Y","%m-%d-%y"): | |
| try: | |
| d = datetime.strptime(s, pat).replace( | |
| hour=eod_hour, minute=eod_min, second=0, microsecond=0, tzinfo=local_tz | |
| ) | |
| return d.astimezone(timezone.utc), False | |
| except Exception: | |
| pass | |
| # 3) datetime patterns (naive -> local) | |
| for pat in ("%m/%d/%Y %H:%M","%m/%d/%Y %H:%M:%S", | |
| "%m/%d/%y %H:%M","%m/%d/%y %H:%M:%S", | |
| "%Y-%m-%d %H:%M","%Y-%m-%d %H:%M:%S", | |
| "%Y/%m/%d %H:%M","%Y/%m/%d %H:%M:%S"): | |
| try: | |
| d = datetime.strptime(s, pat).replace(tzinfo=local_tz) | |
| return d.astimezone(timezone.utc), True | |
| except Exception: | |
| pass | |
| return None, False | |
| def _fmt_utc_for_display(dt_utc, had_time): | |
| """Uniform UTC display: date-only → YYYY-MM-DD; datetime → YYYY-MM-DDTHH:MM:SSZ.""" | |
| if had_time: | |
| return dt_utc.strftime("%Y-%m-%dT%H:%M:%SZ") | |
| return dt_utc.date().isoformat() | |
| def priority_score(I, H_hours, due_dt, now=None, wI=0.70, wDeadline=0.25, wDur=0.05): | |
| if now is None: | |
| now = datetime.now(timezone.utc) | |
| hours_left = max((due_dt - now).total_seconds()/3600.0, 0.25) | |
| if hours_left > 24: | |
| deadline_pressure = min(1.0, 0.5*(1.0/(1.0 + hours_left/24.0))) | |
| else: | |
| deadline_pressure = min(1.0, 1.0/(1.0 + hours_left/6.0)) | |
| dur_pressure = min(1.0, float(H_hours)/2.0) | |
| p01 = wI*(float(I)/10.0) + wDeadline*deadline_pressure + wDur*dur_pressure | |
| return round(1 + 9*p01, 1) | |
| from datetime import datetime, timezone | |
| def reorder_tasks(tasks_string, user_due_iso=None): | |
| now = datetime.now(timezone.utc) | |
| tasks = [t.strip() for t in str(tasks_string).splitlines() if t.strip()] | |
| if not tasks: | |
| # Return empty boxes and clear checkboxes | |
| return "", "", "", gr.update(choices=[], value=[]) | |
| # split/normalize user dates (string or list) | |
| if isinstance(user_due_iso, str) or user_due_iso is None: | |
| due_lines = [s.strip() for s in str(user_due_iso or "").splitlines()] | |
| due_lines = [d if d else None for d in due_lines] | |
| else: | |
| due_lines = [None if (d is None or str(d).strip()=="") else str(d).strip() | |
| for d in list(user_due_iso)] | |
| if len(due_lines) < len(tasks): | |
| due_lines += [None] * (len(tasks) - len(due_lines)) | |
| elif len(due_lines) > len(tasks): | |
| due_lines = due_lines[:len(tasks)] | |
| # embed -> scale | |
| X = embedder.encode(tasks, normalize_embeddings=True).astype(np.float32) | |
| sc = joblib.load("mtl_scaler.joblib") | |
| Xs = sc.transform(X).astype(np.float32) | |
| # predict | |
| net.eval() | |
| with torch.no_grad(): | |
| rI, rD, rH = net(torch.from_numpy(Xs).to(device)) | |
| I = torch.clamp(rI, 1.0, 10.0).cpu().numpy() | |
| H = torch.expm1(rD).clamp(0.25, 12.0).cpu().numpy() # duration (hrs), 0.25–12 | |
| Hd = torch.expm1(rH).clamp(0.0, 30.0).cpu().numpy() # horizon (days), 0–30 | |
| now = datetime.now(timezone.utc) | |
| assumed_local_tz = timezone.utc | |
| rows = [] | |
| for t, due_s, i_imp, h_hrs, h_days in zip(tasks, due_lines, I, H, Hd): | |
| # parse user date (if provided) | |
| due_user_utc, had_time = parse_user_due_utc(due_s, local_tz=assumed_local_tz) if due_s else (None, False) | |
| # choose due_for_scores and display string | |
| if due_user_utc is not None: | |
| due_for_scores = due_user_utc | |
| display_due_str = _fmt_utc_for_display(due_user_utc, had_time) | |
| else: | |
| due_for_scores = _due_from_model(now, float(h_days)) or _due_from_heuristic(now, float(i_imp), float(h_hrs)) | |
| display_due_str = _fmt_utc_for_display(due_for_scores, False) | |
| # priority score | |
| P = priority_score(float(i_imp), float(h_hrs), due_for_scores, now=now) | |
| rows.append({ | |
| "task": t, | |
| "display_due": display_due_str, | |
| "suggested_due_iso": due_for_scores.isoformat(), | |
| "duration_hrs": float(h_hrs), # keep raw number for sorting/formatting | |
| "priority_1to10": P, | |
| }) | |
| out = pd.DataFrame(rows).sort_values( | |
| ["priority_1to10", "suggested_due_iso"], ascending=[False, True] | |
| ).reset_index(drop=True) | |
| choices = [f"{i+1}. {t}" for i, t in enumerate(out["task"].tolist())] | |
| # Prepare the three text outputs | |
| task_lines = "\n".join(out["task"].tolist()) | |
| due_lines_out = "\n".join(out["display_due"].tolist()) | |
| duration_lines = "\n".join(f"{d:.1f}" for d in out["duration_hrs"].tolist()) | |
| # IMPORTANT: don't wipe user selections each run | |
| checkbox_update = gr.update(choices=choices) | |
| return task_lines, due_lines_out, duration_lines, checkbox_update | |
| import gradio as gr # For building the interface | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Automated Task Prioritizer") | |
| gr.Markdown("This app will take your to-do list and reorder it based on importance, urgency, and duration.") | |
| with gr.Tab("Task Entry"): | |
| with gr.Row(): | |
| sample_tasks = gr.Textbox(label="Task List", lines=10, placeholder="One task per line") | |
| due_dates = gr.Textbox(label="Due Date", lines=10, placeholder="One date per line (optional)") | |
| run_btn = gr.Button("Prioritize") | |
| gr.Examples( | |
| examples=[[ | |
| "finish lab report before monday\n" | |
| "email TA about grading\n" | |
| "practice dance combo 20 minutes\n" | |
| "apply to 3 jobs\n" | |
| "review calculus problem set (10 problems)\n" | |
| "call dentist to schedule appointment\n" | |
| "draft 1-page cover letter\n" | |
| "wash dishes\n" | |
| "organize notes for history essay\n" | |
| "watch tv show", | |
| "10/6/25\n" | |
| "10/4/25\n" | |
| "10/4/25\n" | |
| "\n" | |
| "10/8/25\n" | |
| "10/3/25\n" | |
| "10/11/25\n" | |
| "10/3/25\n" | |
| "10/5/25\n" | |
| ]], | |
| inputs=[sample_tasks, due_dates], | |
| label="Example", | |
| examples_per_page=1, | |
| cache_examples=False, | |
| ) | |
| with gr.Tab("Prioritized List"): | |
| with gr.Row(): | |
| priority_task = gr.Textbox(label="Prioritized Task List", lines=10, interactive=False) | |
| date_box = gr.Textbox(label="Due Date", lines=10, interactive=False) | |
| durations_box = gr.Textbox(label="Duration (hrs)", lines=10, interactive=False) | |
| done_boxes = gr.CheckboxGroup(label="Mark completed tasks", interactive=True) # <-- ensure interactive | |
| # Wire up | |
| run_btn.click( | |
| fn=reorder_tasks, | |
| inputs=[sample_tasks, due_dates], | |
| outputs=[priority_task, date_box, durations_box, done_boxes] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(debug=True) |