Spaces:

ProCreations
/

Intellite

Running

App Files Files Community

ProCreations commited on Apr 21

Commit

b99a3f2

1 Parent(s): 1d0cf1d

Sync feedback to ProCreations/Intellite-storage via CommitScheduler

Browse files

Files changed (3) hide show

README.md +23 -29
app.py +79 -58
requirements.txt +1 -0

README.md CHANGED Viewed

@@ -12,49 +12,43 @@ pinned: false
 # intellite-100M — RLHF data collector
 Serves the SFT-tuned intellite 100M model in a chat UI. Every assistant reply
-gets 👍 / 👎 buttons; each rating appends one record to `data.json` with the
-prompt, the response, and the binary reward — ready for RLHF / DPO training
-on your Mac.
 ## Setup
-1. Copy your SFT checkpoint to the Space root as **`best.pt`**
-   (or set `INTELLITE_CKPT=/path/to/file.pt` in the Space's settings → Variables).
-   Use `git lfs track "best.pt"` before committing the weights file.
-2. Push the Space. `app.py` loads the checkpoint once at startup.
 ## Data format
-`data.json` is a list of records, one per rating:
 ```json
-{
-  "ts": "2026-04-20T15:23:45",
-  "system": "You are a helpful, honest, and concise assistant.",
-  "prompt_messages": [
-    { "role": "user", "content": "..." },
-    { "role": "assistant", "content": "..." },
-    { "role": "user", "content": "..." }
-  ],
-  "response": "...",
-  "liked": true
-}
 ```
 Each record is exactly `(prompt, response, reward∈{0,1})` — the shape any
-preference/RL trainer expects. For DPO, group records by identical `prompt_messages`
-and pair a `liked=true` response (chosen) with a `liked=false` one (rejected).
-For REINFORCE/PPO, feed `liked` as a {−1, +1} or {0, 1} reward.
 ## Downloading the data
-The right-hand panel has an **⬇ Download data.json** button — one click on your
-Mac and you've got every rating so far.
-## Clearing the data
-The **Clear data.json** button empties the file on the Space. Do this after
-pulling the file locally so you don't double-count records on the next export.
 ## Notes on the free CPU tier

 # intellite-100M — RLHF data collector
 Serves the SFT-tuned intellite 100M model in a chat UI. Every assistant reply
+gets 👍 / 👎 buttons; each rating appends one JSONL record to a local folder
+that a `CommitScheduler` pushes to a dataset repo on the Hub every 5 minutes.
 ## Setup
+1. **Upload the SFT checkpoint** to the Space root as `best.pt` (or set
+   `INTELLITE_CKPT=/path/to/file.pt` in Settings → Variables).
+2. **Create the dataset repo** `ProCreations/Intellite-storage`
+   (the scheduler will auto-create it on first push too).
+3. **Set `HF_TOKEN`** in Settings → Secrets — a token with **write** scope
+   on the dataset repo. Without it, the Space runs but feedback only
+   persists in-memory until the container restarts.
+4. (Optional) Override `FEEDBACK_REPO` in Settings → Variables if you want
+   to use a different dataset repo.
 ## Data format
+Each record is a single line of JSONL in `data/data_<uuid>.jsonl` on the
+dataset repo (one file per Space replica/restart):
 ```json
+{"ts":"2026-04-20T15:23:45","system":"You are a helpful, honest, and concise assistant.","prompt_messages":[{"role":"user","content":"..."},{"role":"assistant","content":"..."},{"role":"user","content":"..."}],"response":"...","liked":true}
 ```
 Each record is exactly `(prompt, response, reward∈{0,1})` — the shape any
+preference/RL trainer expects. For DPO, group records by identical
+`prompt_messages` and pair a `liked=true` response (chosen) with a
+`liked=false` one (rejected). For REINFORCE/PPO, feed `liked` as a reward.
 ## Downloading the data
+```bash
+hf download ProCreations/Intellite-storage --repo-type=dataset --local-dir ./rlhf-data
+# or in Python:
+#   from huggingface_hub import snapshot_download
+#   snapshot_download("ProCreations/Intellite-storage", repo_type="dataset")
+```
 ## Notes on the free CPU tier

app.py CHANGED Viewed

@@ -1,12 +1,15 @@
 """intellite 100M — RLHF data collector served as a Gradio HuggingFace Space.
 Every assistant reply gets 👍 / 👎 buttons. When the user rates a reply,
-the (system, prior messages, response, liked) tuple is appended to
-data.json in the Space's working directory. A Download button exposes
-that file so you can grab it on your Mac and use it for RL / DPO.
-The SFT checkpoint is loaded from:
-    $INTELLITE_CKPT  (if set), else ./best.pt at the Space root.
 """
 import json
@@ -15,11 +18,13 @@ import sys
 import threading
 import time
 import traceback
 from pathlib import Path
 import gradio as gr
 import tiktoken
 import torch
 SPACE_DIR = Path(__file__).resolve().parent
 sys.path.insert(0, str(SPACE_DIR))
@@ -31,7 +36,13 @@ from model import IntelliteGPT
 # Paths & constants
 CKPT_PATH = Path(os.environ.get("INTELLITE_CKPT", SPACE_DIR / "best.pt"))
-DATA_PATH = SPACE_DIR / "data.json"
 DEFAULT_SYSTEM = "You are a helpful, honest, and concise assistant."
 SYSTEM_TAG = "<|system|>\n"
@@ -67,6 +78,24 @@ N_PARAMS = MODEL.num_params()
 print(f"[model] {N_PARAMS/1e6:.1f}M params  tokens_seen={TOKENS_SEEN:,}  best_val={BEST_VAL:.4f}")
 # ------------------------------------------------------------------------
 # Prompt templating + generation (mirrors chat.py)
@@ -75,7 +104,6 @@ def render_prompt_ids(system: str, prior_messages: list[dict], user_msg: str) ->
     ids: list[int] = []
     if system:
         ids.extend(ENC.encode_ordinary(SYSTEM_TAG + system.strip() + "\n"))
-    # Pair prior messages into (user, assistant) turns.
     pending_user = None
     for m in prior_messages:
         role = m.get("role")
@@ -88,7 +116,6 @@ def render_prompt_ids(system: str, prior_messages: list[dict], user_msg: str) ->
             ids.extend(ENC.encode_ordinary(content))
             ids.append(EOT)
             pending_user = None
-    # Current user turn + assistant opener.
     ids.extend(ENC.encode_ordinary(USER_TAG + user_msg.strip() + "\n"))
     ids.extend(ENC.encode_ordinary(ASST_TAG))
     return ids
@@ -141,7 +168,6 @@ def stream_reply(prompt_ids, max_new, temperature, top_k, top_p, rep_penalty):
         reply = ENC.decode(x[0, start:].tolist())
-        # Strip trailing replacement char (partial UTF-8) for nicer streaming.
         while reply.endswith("\ufffd"):
             reply = reply[:-1]
@@ -161,38 +187,43 @@ def stream_reply(prompt_ids, max_new, temperature, top_k, top_p, rep_penalty):
 # ------------------------------------------------------------------------
-# Feedback store (data.json)
-_feedback_lock = threading.Lock()
-def _read_data() -> list:
-    if not DATA_PATH.exists():
-        return []
-    try:
-        with open(DATA_PATH) as f:
-            return json.load(f)
-    except Exception:
-        return []
-def _write_data(items: list) -> None:
-    tmp = DATA_PATH.with_suffix(".json.tmp")
-    with open(tmp, "w") as f:
-        json.dump(items, f, indent=2, ensure_ascii=False)
-    tmp.replace(DATA_PATH)
-if not DATA_PATH.exists():
-    _write_data([])
 def _stats_str() -> str:
-    with _feedback_lock:
-        items = _read_data()
-    total = len(items)
-    liked = sum(1 for i in items if i.get("liked"))
-    return f"**{total}** records · 👍 {liked} · 👎 {total - liked}"
 def save_feedback(evt: gr.LikeData, history: list, system: str) -> str:
@@ -200,7 +231,6 @@ def save_feedback(evt: gr.LikeData, history: list, system: str) -> str:
     if evt.liked is None:
         return "rating cleared (nothing saved)"
-    # evt.index is an int in messages mode; be defensive either way.
     idx = evt.index[0] if isinstance(evt.index, (list, tuple)) else evt.index
     if not isinstance(idx, int) or idx < 0 or idx >= len(history):
         return f"bad index {evt.index!r}"
@@ -216,19 +246,19 @@ def save_feedback(evt: gr.LikeData, history: list, system: str) -> str:
         "response": msg.get("content", ""),
         "liked": bool(evt.liked),
     }
-    with _feedback_lock:
-        items = _read_data()
-        items.append(record)
-        _write_data(items)
-    verdict = "👍 good" if evt.liked else "👎 bad"
-    return f"saved {verdict} · {len(items)} records in data.json"
-def clear_data() -> str:
-    with _feedback_lock:
-        _write_data([])
-    return "data.json cleared"
 # ------------------------------------------------------------------------
@@ -271,8 +301,9 @@ with gr.Blocks(title="intellite 100M — RLHF collector") as demo:
         f"{MCFG.d_model}d × {MCFG.n_layers}L × {MCFG.n_heads}h "
         f"({N_PARAMS/1e6:.1f}M params) · {TOKENS_SEEN/1e6:.0f}M SFT tokens · "
         f"val_loss {BEST_VAL:.3f} · device `{DEVICE}`  \n"
-        f"**Please rate every response with 👍 or 👎.** Every rating appends a record "
-        f"to `data.json`; grab it from the sidebar for RLHF on your Mac."
     )
     with gr.Row():
@@ -304,12 +335,7 @@ with gr.Blocks(title="intellite 100M — RLHF collector") as demo:
             gr.Markdown("### RLHF data")
             stats_md = gr.Markdown(_stats_str())
-            download = gr.DownloadButton(
-                label="⬇ Download data.json", value=str(DATA_PATH)
-            )
-            clear_data_btn = gr.Button("Clear data.json", variant="stop")
-    # Wire the chat events.
     send_btn.click(
         chat,
         inputs=[msg, chatbot, system, max_new, temp, top_k, top_p, rep],
@@ -322,17 +348,12 @@ with gr.Blocks(title="intellite 100M — RLHF collector") as demo:
     )
     clear_btn.click(lambda: [], None, chatbot, queue=False)
-    # Thumbs-up / thumbs-down → append to data.json, refresh counters.
     chatbot.like(
         save_feedback,
         inputs=[chatbot, system],
         outputs=[feedback_status],
     ).then(lambda: _stats_str(), None, stats_md, queue=False)
-    clear_data_btn.click(clear_data, None, feedback_status, queue=False).then(
-        lambda: _stats_str(), None, stats_md, queue=False
-    )
 if __name__ == "__main__":
     demo.queue().launch()

 """intellite 100M — RLHF data collector served as a Gradio HuggingFace Space.
 Every assistant reply gets 👍 / 👎 buttons. When the user rates a reply,
+the (system, prior messages, response, liked) tuple is appended to a
+local JSONL file, and a CommitScheduler pushes that folder to a dataset
+repo on the Hub every 5 minutes.
+Environment variables:
+    INTELLITE_CKPT    path to SFT checkpoint (default: ./best.pt)
+    HF_TOKEN          HF access token with *write* scope on the dataset
+                      repo (REQUIRED — set as a Space secret)
+    FEEDBACK_REPO     dataset repo id (default: ProCreations/Intellite-storage)
 """
 import json
 import threading
 import time
 import traceback
+import uuid
 from pathlib import Path
 import gradio as gr
 import tiktoken
 import torch
+from huggingface_hub import CommitScheduler
 SPACE_DIR = Path(__file__).resolve().parent
 sys.path.insert(0, str(SPACE_DIR))
 # Paths & constants
 CKPT_PATH = Path(os.environ.get("INTELLITE_CKPT", SPACE_DIR / "best.pt"))
+FEEDBACK_DIR = SPACE_DIR / "user_feedback"
+FEEDBACK_DIR.mkdir(exist_ok=True)
+# Unique filename per replica/restart so concurrent Spaces don't clobber.
+FEEDBACK_FILE = FEEDBACK_DIR / f"data_{uuid.uuid4().hex}.jsonl"
+FEEDBACK_REPO = os.environ.get("FEEDBACK_REPO", "ProCreations/Intellite-storage")
+HF_TOKEN = os.environ.get("HF_TOKEN")
 DEFAULT_SYSTEM = "You are a helpful, honest, and concise assistant."
 SYSTEM_TAG = "<|system|>\n"
 print(f"[model] {N_PARAMS/1e6:.1f}M params  tokens_seen={TOKENS_SEEN:,}  best_val={BEST_VAL:.4f}")
+# ------------------------------------------------------------------------
+# Hub sync — CommitScheduler pushes FEEDBACK_DIR to the dataset every 5 min.
+if HF_TOKEN:
+    scheduler = CommitScheduler(
+        repo_id=FEEDBACK_REPO,
+        repo_type="dataset",
+        folder_path=FEEDBACK_DIR,
+        path_in_repo="data",
+        every=5,
+        token=HF_TOKEN,
+    )
+    print(f"[hub] scheduler active → {FEEDBACK_REPO} (every 5 min)")
+else:
+    scheduler = None
+    print("[hub] HF_TOKEN not set — feedback will stay local only")
 # ------------------------------------------------------------------------
 # Prompt templating + generation (mirrors chat.py)
     ids: list[int] = []
     if system:
         ids.extend(ENC.encode_ordinary(SYSTEM_TAG + system.strip() + "\n"))
     pending_user = None
     for m in prior_messages:
         role = m.get("role")
             ids.extend(ENC.encode_ordinary(content))
             ids.append(EOT)
             pending_user = None
     ids.extend(ENC.encode_ordinary(USER_TAG + user_msg.strip() + "\n"))
     ids.extend(ENC.encode_ordinary(ASST_TAG))
     return ids
         reply = ENC.decode(x[0, start:].tolist())
         while reply.endswith("\ufffd"):
             reply = reply[:-1]
 # ------------------------------------------------------------------------
+# Feedback store — JSONL, append-only, synced to Hub by CommitScheduler.
+_local_lock = threading.Lock()
+_local_count = {"total": 0, "liked": 0}
+def _count_jsonl_lines(path: Path) -> tuple[int, int]:
+    total, liked = 0, 0
+    if not path.exists():
+        return 0, 0
+    with path.open() as f:
+        for line in f:
+            line = line.strip()
+            if not line:
+                continue
+            total += 1
+            try:
+                if json.loads(line).get("liked"):
+                    liked += 1
+            except json.JSONDecodeError:
+                pass
+    return total, liked
+t, l = _count_jsonl_lines(FEEDBACK_FILE)
+_local_count["total"], _local_count["liked"] = t, l
 def _stats_str() -> str:
+    t = _local_count["total"]
+    l = _local_count["liked"]
+    repo_link = f"[`{FEEDBACK_REPO}`](https://huggingface.co/datasets/{FEEDBACK_REPO})"
+    sync = "synced every 5 min" if scheduler else "**HF_TOKEN missing — not syncing**"
+    return (
+        f"**{t}** records this session · 👍 {l} · 👎 {t - l}  \n"
+        f"Pushed to {repo_link} ({sync})"
+    )
 def save_feedback(evt: gr.LikeData, history: list, system: str) -> str:
     if evt.liked is None:
         return "rating cleared (nothing saved)"
     idx = evt.index[0] if isinstance(evt.index, (list, tuple)) else evt.index
     if not isinstance(idx, int) or idx < 0 or idx >= len(history):
         return f"bad index {evt.index!r}"
         "response": msg.get("content", ""),
         "liked": bool(evt.liked),
     }
+    # Write under the scheduler's lock (or our own) so the background push
+    # never sees a half-written line.
+    lock = scheduler.lock if scheduler else _local_lock
+    with lock:
+        with FEEDBACK_FILE.open("a") as f:
+            f.write(json.dumps(record, ensure_ascii=False) + "\n")
+        _local_count["total"] += 1
+        if record["liked"]:
+            _local_count["liked"] += 1
+    verdict = "👍 good" if evt.liked else "👎 bad"
+    return f"saved {verdict} · {_local_count['total']} this session"
 # ------------------------------------------------------------------------
         f"{MCFG.d_model}d × {MCFG.n_layers}L × {MCFG.n_heads}h "
         f"({N_PARAMS/1e6:.1f}M params) · {TOKENS_SEEN/1e6:.0f}M SFT tokens · "
         f"val_loss {BEST_VAL:.3f} · device `{DEVICE}`  \n"
+        f"**Please rate every response with 👍 or 👎.** Ratings auto-sync to "
+        f"[`{FEEDBACK_REPO}`](https://huggingface.co/datasets/{FEEDBACK_REPO}) "
+        f"every 5 minutes for RLHF training."
     )
     with gr.Row():
             gr.Markdown("### RLHF data")
             stats_md = gr.Markdown(_stats_str())
     send_btn.click(
         chat,
         inputs=[msg, chatbot, system, max_new, temp, top_k, top_p, rep],
     )
     clear_btn.click(lambda: [], None, chatbot, queue=False)
     chatbot.like(
         save_feedback,
         inputs=[chatbot, system],
         outputs=[feedback_status],
     ).then(lambda: _stats_str(), None, stats_md, queue=False)
 if __name__ == "__main__":
     demo.queue().launch()

requirements.txt CHANGED Viewed

@@ -2,4 +2,5 @@ gradio>=5.0.0
 torch>=2.1.0
 tiktoken
 numpy
 audioop-lts; python_version >= "3.13"

 torch>=2.1.0
 tiktoken
 numpy
+huggingface_hub>=0.24.0
 audioop-lts; python_version >= "3.13"