Spaces:
Running
Running
Commit Β·
b99a3f2
1
Parent(s): 1d0cf1d
Sync feedback to ProCreations/Intellite-storage via CommitScheduler
Browse files- README.md +23 -29
- app.py +79 -58
- requirements.txt +1 -0
README.md
CHANGED
|
@@ -12,49 +12,43 @@ pinned: false
|
|
| 12 |
# intellite-100M β RLHF data collector
|
| 13 |
|
| 14 |
Serves the SFT-tuned intellite 100M model in a chat UI. Every assistant reply
|
| 15 |
-
gets π / π buttons; each rating appends one record to
|
| 16 |
-
|
| 17 |
-
on your Mac.
|
| 18 |
|
| 19 |
## Setup
|
| 20 |
|
| 21 |
-
1.
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
## Data format
|
| 27 |
|
| 28 |
-
|
|
|
|
| 29 |
|
| 30 |
```json
|
| 31 |
-
{
|
| 32 |
-
"ts": "2026-04-20T15:23:45",
|
| 33 |
-
"system": "You are a helpful, honest, and concise assistant.",
|
| 34 |
-
"prompt_messages": [
|
| 35 |
-
{ "role": "user", "content": "..." },
|
| 36 |
-
{ "role": "assistant", "content": "..." },
|
| 37 |
-
{ "role": "user", "content": "..." }
|
| 38 |
-
],
|
| 39 |
-
"response": "...",
|
| 40 |
-
"liked": true
|
| 41 |
-
}
|
| 42 |
```
|
| 43 |
|
| 44 |
Each record is exactly `(prompt, response, rewardβ{0,1})` β the shape any
|
| 45 |
-
preference/RL trainer expects. For DPO, group records by identical
|
| 46 |
-
and pair a `liked=true` response (chosen) with a
|
| 47 |
-
For REINFORCE/PPO, feed `liked` as a
|
| 48 |
|
| 49 |
## Downloading the data
|
| 50 |
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
#
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
pulling the file locally so you don't double-count records on the next export.
|
| 58 |
|
| 59 |
## Notes on the free CPU tier
|
| 60 |
|
|
|
|
| 12 |
# intellite-100M β RLHF data collector
|
| 13 |
|
| 14 |
Serves the SFT-tuned intellite 100M model in a chat UI. Every assistant reply
|
| 15 |
+
gets π / π buttons; each rating appends one JSONL record to a local folder
|
| 16 |
+
that a `CommitScheduler` pushes to a dataset repo on the Hub every 5 minutes.
|
|
|
|
| 17 |
|
| 18 |
## Setup
|
| 19 |
|
| 20 |
+
1. **Upload the SFT checkpoint** to the Space root as `best.pt` (or set
|
| 21 |
+
`INTELLITE_CKPT=/path/to/file.pt` in Settings β Variables).
|
| 22 |
+
2. **Create the dataset repo** `ProCreations/Intellite-storage`
|
| 23 |
+
(the scheduler will auto-create it on first push too).
|
| 24 |
+
3. **Set `HF_TOKEN`** in Settings β Secrets β a token with **write** scope
|
| 25 |
+
on the dataset repo. Without it, the Space runs but feedback only
|
| 26 |
+
persists in-memory until the container restarts.
|
| 27 |
+
4. (Optional) Override `FEEDBACK_REPO` in Settings β Variables if you want
|
| 28 |
+
to use a different dataset repo.
|
| 29 |
|
| 30 |
## Data format
|
| 31 |
|
| 32 |
+
Each record is a single line of JSONL in `data/data_<uuid>.jsonl` on the
|
| 33 |
+
dataset repo (one file per Space replica/restart):
|
| 34 |
|
| 35 |
```json
|
| 36 |
+
{"ts":"2026-04-20T15:23:45","system":"You are a helpful, honest, and concise assistant.","prompt_messages":[{"role":"user","content":"..."},{"role":"assistant","content":"..."},{"role":"user","content":"..."}],"response":"...","liked":true}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
```
|
| 38 |
|
| 39 |
Each record is exactly `(prompt, response, rewardβ{0,1})` β the shape any
|
| 40 |
+
preference/RL trainer expects. For DPO, group records by identical
|
| 41 |
+
`prompt_messages` and pair a `liked=true` response (chosen) with a
|
| 42 |
+
`liked=false` one (rejected). For REINFORCE/PPO, feed `liked` as a reward.
|
| 43 |
|
| 44 |
## Downloading the data
|
| 45 |
|
| 46 |
+
```bash
|
| 47 |
+
hf download ProCreations/Intellite-storage --repo-type=dataset --local-dir ./rlhf-data
|
| 48 |
+
# or in Python:
|
| 49 |
+
# from huggingface_hub import snapshot_download
|
| 50 |
+
# snapshot_download("ProCreations/Intellite-storage", repo_type="dataset")
|
| 51 |
+
```
|
|
|
|
| 52 |
|
| 53 |
## Notes on the free CPU tier
|
| 54 |
|
app.py
CHANGED
|
@@ -1,12 +1,15 @@
|
|
| 1 |
"""intellite 100M β RLHF data collector served as a Gradio HuggingFace Space.
|
| 2 |
|
| 3 |
Every assistant reply gets π / π buttons. When the user rates a reply,
|
| 4 |
-
the (system, prior messages, response, liked) tuple is appended to
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
| 10 |
"""
|
| 11 |
|
| 12 |
import json
|
|
@@ -15,11 +18,13 @@ import sys
|
|
| 15 |
import threading
|
| 16 |
import time
|
| 17 |
import traceback
|
|
|
|
| 18 |
from pathlib import Path
|
| 19 |
|
| 20 |
import gradio as gr
|
| 21 |
import tiktoken
|
| 22 |
import torch
|
|
|
|
| 23 |
|
| 24 |
SPACE_DIR = Path(__file__).resolve().parent
|
| 25 |
sys.path.insert(0, str(SPACE_DIR))
|
|
@@ -31,7 +36,13 @@ from model import IntelliteGPT
|
|
| 31 |
# Paths & constants
|
| 32 |
|
| 33 |
CKPT_PATH = Path(os.environ.get("INTELLITE_CKPT", SPACE_DIR / "best.pt"))
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
DEFAULT_SYSTEM = "You are a helpful, honest, and concise assistant."
|
| 37 |
SYSTEM_TAG = "<|system|>\n"
|
|
@@ -67,6 +78,24 @@ N_PARAMS = MODEL.num_params()
|
|
| 67 |
print(f"[model] {N_PARAMS/1e6:.1f}M params tokens_seen={TOKENS_SEEN:,} best_val={BEST_VAL:.4f}")
|
| 68 |
|
| 69 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
# ------------------------------------------------------------------------
|
| 71 |
# Prompt templating + generation (mirrors chat.py)
|
| 72 |
|
|
@@ -75,7 +104,6 @@ def render_prompt_ids(system: str, prior_messages: list[dict], user_msg: str) ->
|
|
| 75 |
ids: list[int] = []
|
| 76 |
if system:
|
| 77 |
ids.extend(ENC.encode_ordinary(SYSTEM_TAG + system.strip() + "\n"))
|
| 78 |
-
# Pair prior messages into (user, assistant) turns.
|
| 79 |
pending_user = None
|
| 80 |
for m in prior_messages:
|
| 81 |
role = m.get("role")
|
|
@@ -88,7 +116,6 @@ def render_prompt_ids(system: str, prior_messages: list[dict], user_msg: str) ->
|
|
| 88 |
ids.extend(ENC.encode_ordinary(content))
|
| 89 |
ids.append(EOT)
|
| 90 |
pending_user = None
|
| 91 |
-
# Current user turn + assistant opener.
|
| 92 |
ids.extend(ENC.encode_ordinary(USER_TAG + user_msg.strip() + "\n"))
|
| 93 |
ids.extend(ENC.encode_ordinary(ASST_TAG))
|
| 94 |
return ids
|
|
@@ -141,7 +168,6 @@ def stream_reply(prompt_ids, max_new, temperature, top_k, top_p, rep_penalty):
|
|
| 141 |
|
| 142 |
reply = ENC.decode(x[0, start:].tolist())
|
| 143 |
|
| 144 |
-
# Strip trailing replacement char (partial UTF-8) for nicer streaming.
|
| 145 |
while reply.endswith("\ufffd"):
|
| 146 |
reply = reply[:-1]
|
| 147 |
|
|
@@ -161,38 +187,43 @@ def stream_reply(prompt_ids, max_new, temperature, top_k, top_p, rep_penalty):
|
|
| 161 |
|
| 162 |
|
| 163 |
# ------------------------------------------------------------------------
|
| 164 |
-
# Feedback store
|
| 165 |
-
|
| 166 |
-
_feedback_lock = threading.Lock()
|
| 167 |
|
|
|
|
|
|
|
| 168 |
|
| 169 |
-
def _read_data() -> list:
|
| 170 |
-
if not DATA_PATH.exists():
|
| 171 |
-
return []
|
| 172 |
-
try:
|
| 173 |
-
with open(DATA_PATH) as f:
|
| 174 |
-
return json.load(f)
|
| 175 |
-
except Exception:
|
| 176 |
-
return []
|
| 177 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
|
| 179 |
-
def _write_data(items: list) -> None:
|
| 180 |
-
tmp = DATA_PATH.with_suffix(".json.tmp")
|
| 181 |
-
with open(tmp, "w") as f:
|
| 182 |
-
json.dump(items, f, indent=2, ensure_ascii=False)
|
| 183 |
-
tmp.replace(DATA_PATH)
|
| 184 |
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
_write_data([])
|
| 188 |
|
| 189 |
|
| 190 |
def _stats_str() -> str:
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
return
|
|
|
|
|
|
|
|
|
|
| 196 |
|
| 197 |
|
| 198 |
def save_feedback(evt: gr.LikeData, history: list, system: str) -> str:
|
|
@@ -200,7 +231,6 @@ def save_feedback(evt: gr.LikeData, history: list, system: str) -> str:
|
|
| 200 |
if evt.liked is None:
|
| 201 |
return "rating cleared (nothing saved)"
|
| 202 |
|
| 203 |
-
# evt.index is an int in messages mode; be defensive either way.
|
| 204 |
idx = evt.index[0] if isinstance(evt.index, (list, tuple)) else evt.index
|
| 205 |
if not isinstance(idx, int) or idx < 0 or idx >= len(history):
|
| 206 |
return f"bad index {evt.index!r}"
|
|
@@ -216,19 +246,19 @@ def save_feedback(evt: gr.LikeData, history: list, system: str) -> str:
|
|
| 216 |
"response": msg.get("content", ""),
|
| 217 |
"liked": bool(evt.liked),
|
| 218 |
}
|
| 219 |
-
with _feedback_lock:
|
| 220 |
-
items = _read_data()
|
| 221 |
-
items.append(record)
|
| 222 |
-
_write_data(items)
|
| 223 |
-
|
| 224 |
-
verdict = "π good" if evt.liked else "π bad"
|
| 225 |
-
return f"saved {verdict} Β· {len(items)} records in data.json"
|
| 226 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
_write_data([])
|
| 231 |
-
return "data.json cleared"
|
| 232 |
|
| 233 |
|
| 234 |
# ------------------------------------------------------------------------
|
|
@@ -271,8 +301,9 @@ with gr.Blocks(title="intellite 100M β RLHF collector") as demo:
|
|
| 271 |
f"{MCFG.d_model}d Γ {MCFG.n_layers}L Γ {MCFG.n_heads}h "
|
| 272 |
f"({N_PARAMS/1e6:.1f}M params) Β· {TOKENS_SEEN/1e6:.0f}M SFT tokens Β· "
|
| 273 |
f"val_loss {BEST_VAL:.3f} Β· device `{DEVICE}` \n"
|
| 274 |
-
f"**Please rate every response with π or π.**
|
| 275 |
-
f"
|
|
|
|
| 276 |
)
|
| 277 |
|
| 278 |
with gr.Row():
|
|
@@ -304,12 +335,7 @@ with gr.Blocks(title="intellite 100M β RLHF collector") as demo:
|
|
| 304 |
|
| 305 |
gr.Markdown("### RLHF data")
|
| 306 |
stats_md = gr.Markdown(_stats_str())
|
| 307 |
-
download = gr.DownloadButton(
|
| 308 |
-
label="β¬ Download data.json", value=str(DATA_PATH)
|
| 309 |
-
)
|
| 310 |
-
clear_data_btn = gr.Button("Clear data.json", variant="stop")
|
| 311 |
|
| 312 |
-
# Wire the chat events.
|
| 313 |
send_btn.click(
|
| 314 |
chat,
|
| 315 |
inputs=[msg, chatbot, system, max_new, temp, top_k, top_p, rep],
|
|
@@ -322,17 +348,12 @@ with gr.Blocks(title="intellite 100M β RLHF collector") as demo:
|
|
| 322 |
)
|
| 323 |
clear_btn.click(lambda: [], None, chatbot, queue=False)
|
| 324 |
|
| 325 |
-
# Thumbs-up / thumbs-down β append to data.json, refresh counters.
|
| 326 |
chatbot.like(
|
| 327 |
save_feedback,
|
| 328 |
inputs=[chatbot, system],
|
| 329 |
outputs=[feedback_status],
|
| 330 |
).then(lambda: _stats_str(), None, stats_md, queue=False)
|
| 331 |
|
| 332 |
-
clear_data_btn.click(clear_data, None, feedback_status, queue=False).then(
|
| 333 |
-
lambda: _stats_str(), None, stats_md, queue=False
|
| 334 |
-
)
|
| 335 |
-
|
| 336 |
|
| 337 |
if __name__ == "__main__":
|
| 338 |
demo.queue().launch()
|
|
|
|
| 1 |
"""intellite 100M β RLHF data collector served as a Gradio HuggingFace Space.
|
| 2 |
|
| 3 |
Every assistant reply gets π / π buttons. When the user rates a reply,
|
| 4 |
+
the (system, prior messages, response, liked) tuple is appended to a
|
| 5 |
+
local JSONL file, and a CommitScheduler pushes that folder to a dataset
|
| 6 |
+
repo on the Hub every 5 minutes.
|
| 7 |
+
|
| 8 |
+
Environment variables:
|
| 9 |
+
INTELLITE_CKPT path to SFT checkpoint (default: ./best.pt)
|
| 10 |
+
HF_TOKEN HF access token with *write* scope on the dataset
|
| 11 |
+
repo (REQUIRED β set as a Space secret)
|
| 12 |
+
FEEDBACK_REPO dataset repo id (default: ProCreations/Intellite-storage)
|
| 13 |
"""
|
| 14 |
|
| 15 |
import json
|
|
|
|
| 18 |
import threading
|
| 19 |
import time
|
| 20 |
import traceback
|
| 21 |
+
import uuid
|
| 22 |
from pathlib import Path
|
| 23 |
|
| 24 |
import gradio as gr
|
| 25 |
import tiktoken
|
| 26 |
import torch
|
| 27 |
+
from huggingface_hub import CommitScheduler
|
| 28 |
|
| 29 |
SPACE_DIR = Path(__file__).resolve().parent
|
| 30 |
sys.path.insert(0, str(SPACE_DIR))
|
|
|
|
| 36 |
# Paths & constants
|
| 37 |
|
| 38 |
CKPT_PATH = Path(os.environ.get("INTELLITE_CKPT", SPACE_DIR / "best.pt"))
|
| 39 |
+
FEEDBACK_DIR = SPACE_DIR / "user_feedback"
|
| 40 |
+
FEEDBACK_DIR.mkdir(exist_ok=True)
|
| 41 |
+
# Unique filename per replica/restart so concurrent Spaces don't clobber.
|
| 42 |
+
FEEDBACK_FILE = FEEDBACK_DIR / f"data_{uuid.uuid4().hex}.jsonl"
|
| 43 |
+
|
| 44 |
+
FEEDBACK_REPO = os.environ.get("FEEDBACK_REPO", "ProCreations/Intellite-storage")
|
| 45 |
+
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 46 |
|
| 47 |
DEFAULT_SYSTEM = "You are a helpful, honest, and concise assistant."
|
| 48 |
SYSTEM_TAG = "<|system|>\n"
|
|
|
|
| 78 |
print(f"[model] {N_PARAMS/1e6:.1f}M params tokens_seen={TOKENS_SEEN:,} best_val={BEST_VAL:.4f}")
|
| 79 |
|
| 80 |
|
| 81 |
+
# ------------------------------------------------------------------------
|
| 82 |
+
# Hub sync β CommitScheduler pushes FEEDBACK_DIR to the dataset every 5 min.
|
| 83 |
+
|
| 84 |
+
if HF_TOKEN:
|
| 85 |
+
scheduler = CommitScheduler(
|
| 86 |
+
repo_id=FEEDBACK_REPO,
|
| 87 |
+
repo_type="dataset",
|
| 88 |
+
folder_path=FEEDBACK_DIR,
|
| 89 |
+
path_in_repo="data",
|
| 90 |
+
every=5,
|
| 91 |
+
token=HF_TOKEN,
|
| 92 |
+
)
|
| 93 |
+
print(f"[hub] scheduler active β {FEEDBACK_REPO} (every 5 min)")
|
| 94 |
+
else:
|
| 95 |
+
scheduler = None
|
| 96 |
+
print("[hub] HF_TOKEN not set β feedback will stay local only")
|
| 97 |
+
|
| 98 |
+
|
| 99 |
# ------------------------------------------------------------------------
|
| 100 |
# Prompt templating + generation (mirrors chat.py)
|
| 101 |
|
|
|
|
| 104 |
ids: list[int] = []
|
| 105 |
if system:
|
| 106 |
ids.extend(ENC.encode_ordinary(SYSTEM_TAG + system.strip() + "\n"))
|
|
|
|
| 107 |
pending_user = None
|
| 108 |
for m in prior_messages:
|
| 109 |
role = m.get("role")
|
|
|
|
| 116 |
ids.extend(ENC.encode_ordinary(content))
|
| 117 |
ids.append(EOT)
|
| 118 |
pending_user = None
|
|
|
|
| 119 |
ids.extend(ENC.encode_ordinary(USER_TAG + user_msg.strip() + "\n"))
|
| 120 |
ids.extend(ENC.encode_ordinary(ASST_TAG))
|
| 121 |
return ids
|
|
|
|
| 168 |
|
| 169 |
reply = ENC.decode(x[0, start:].tolist())
|
| 170 |
|
|
|
|
| 171 |
while reply.endswith("\ufffd"):
|
| 172 |
reply = reply[:-1]
|
| 173 |
|
|
|
|
| 187 |
|
| 188 |
|
| 189 |
# ------------------------------------------------------------------------
|
| 190 |
+
# Feedback store β JSONL, append-only, synced to Hub by CommitScheduler.
|
|
|
|
|
|
|
| 191 |
|
| 192 |
+
_local_lock = threading.Lock()
|
| 193 |
+
_local_count = {"total": 0, "liked": 0}
|
| 194 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
|
| 196 |
+
def _count_jsonl_lines(path: Path) -> tuple[int, int]:
|
| 197 |
+
total, liked = 0, 0
|
| 198 |
+
if not path.exists():
|
| 199 |
+
return 0, 0
|
| 200 |
+
with path.open() as f:
|
| 201 |
+
for line in f:
|
| 202 |
+
line = line.strip()
|
| 203 |
+
if not line:
|
| 204 |
+
continue
|
| 205 |
+
total += 1
|
| 206 |
+
try:
|
| 207 |
+
if json.loads(line).get("liked"):
|
| 208 |
+
liked += 1
|
| 209 |
+
except json.JSONDecodeError:
|
| 210 |
+
pass
|
| 211 |
+
return total, liked
|
| 212 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
|
| 214 |
+
t, l = _count_jsonl_lines(FEEDBACK_FILE)
|
| 215 |
+
_local_count["total"], _local_count["liked"] = t, l
|
|
|
|
| 216 |
|
| 217 |
|
| 218 |
def _stats_str() -> str:
|
| 219 |
+
t = _local_count["total"]
|
| 220 |
+
l = _local_count["liked"]
|
| 221 |
+
repo_link = f"[`{FEEDBACK_REPO}`](https://huggingface.co/datasets/{FEEDBACK_REPO})"
|
| 222 |
+
sync = "synced every 5 min" if scheduler else "**HF_TOKEN missing β not syncing**"
|
| 223 |
+
return (
|
| 224 |
+
f"**{t}** records this session Β· π {l} Β· π {t - l} \n"
|
| 225 |
+
f"Pushed to {repo_link} ({sync})"
|
| 226 |
+
)
|
| 227 |
|
| 228 |
|
| 229 |
def save_feedback(evt: gr.LikeData, history: list, system: str) -> str:
|
|
|
|
| 231 |
if evt.liked is None:
|
| 232 |
return "rating cleared (nothing saved)"
|
| 233 |
|
|
|
|
| 234 |
idx = evt.index[0] if isinstance(evt.index, (list, tuple)) else evt.index
|
| 235 |
if not isinstance(idx, int) or idx < 0 or idx >= len(history):
|
| 236 |
return f"bad index {evt.index!r}"
|
|
|
|
| 246 |
"response": msg.get("content", ""),
|
| 247 |
"liked": bool(evt.liked),
|
| 248 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
|
| 250 |
+
# Write under the scheduler's lock (or our own) so the background push
|
| 251 |
+
# never sees a half-written line.
|
| 252 |
+
lock = scheduler.lock if scheduler else _local_lock
|
| 253 |
+
with lock:
|
| 254 |
+
with FEEDBACK_FILE.open("a") as f:
|
| 255 |
+
f.write(json.dumps(record, ensure_ascii=False) + "\n")
|
| 256 |
+
_local_count["total"] += 1
|
| 257 |
+
if record["liked"]:
|
| 258 |
+
_local_count["liked"] += 1
|
| 259 |
|
| 260 |
+
verdict = "π good" if evt.liked else "π bad"
|
| 261 |
+
return f"saved {verdict} Β· {_local_count['total']} this session"
|
|
|
|
|
|
|
| 262 |
|
| 263 |
|
| 264 |
# ------------------------------------------------------------------------
|
|
|
|
| 301 |
f"{MCFG.d_model}d Γ {MCFG.n_layers}L Γ {MCFG.n_heads}h "
|
| 302 |
f"({N_PARAMS/1e6:.1f}M params) Β· {TOKENS_SEEN/1e6:.0f}M SFT tokens Β· "
|
| 303 |
f"val_loss {BEST_VAL:.3f} Β· device `{DEVICE}` \n"
|
| 304 |
+
f"**Please rate every response with π or π.** Ratings auto-sync to "
|
| 305 |
+
f"[`{FEEDBACK_REPO}`](https://huggingface.co/datasets/{FEEDBACK_REPO}) "
|
| 306 |
+
f"every 5 minutes for RLHF training."
|
| 307 |
)
|
| 308 |
|
| 309 |
with gr.Row():
|
|
|
|
| 335 |
|
| 336 |
gr.Markdown("### RLHF data")
|
| 337 |
stats_md = gr.Markdown(_stats_str())
|
|
|
|
|
|
|
|
|
|
|
|
|
| 338 |
|
|
|
|
| 339 |
send_btn.click(
|
| 340 |
chat,
|
| 341 |
inputs=[msg, chatbot, system, max_new, temp, top_k, top_p, rep],
|
|
|
|
| 348 |
)
|
| 349 |
clear_btn.click(lambda: [], None, chatbot, queue=False)
|
| 350 |
|
|
|
|
| 351 |
chatbot.like(
|
| 352 |
save_feedback,
|
| 353 |
inputs=[chatbot, system],
|
| 354 |
outputs=[feedback_status],
|
| 355 |
).then(lambda: _stats_str(), None, stats_md, queue=False)
|
| 356 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 357 |
|
| 358 |
if __name__ == "__main__":
|
| 359 |
demo.queue().launch()
|
requirements.txt
CHANGED
|
@@ -2,4 +2,5 @@ gradio>=5.0.0
|
|
| 2 |
torch>=2.1.0
|
| 3 |
tiktoken
|
| 4 |
numpy
|
|
|
|
| 5 |
audioop-lts; python_version >= "3.13"
|
|
|
|
| 2 |
torch>=2.1.0
|
| 3 |
tiktoken
|
| 4 |
numpy
|
| 5 |
+
huggingface_hub>=0.24.0
|
| 6 |
audioop-lts; python_version >= "3.13"
|