Add files using upload-large-folder tool
Browse files- MERGE_SEQUENCE.sh +426 -0
- eztest.py +83 -0
- getLr.py +73 -0
- index.html +945 -0
- mergeLr.py +119 -0
- v127rc_exp2/B_dup.yaml +41 -0
- v127rc_exp2/B_mul.yaml +41 -0
- v127rc_exp2/B_mup/10700.yaml +5 -0
- v127rc_exp2/B_mup/10800.yaml +5 -0
- v127rc_exp2/B_mup/10900.yaml +5 -0
- v127rc_exp2/B_mup/11k.yaml +5 -0
- v127rc_exp2/B_mup/checkpoint-5800/chat_template.jinja +85 -0
- v127rc_exp2/B_mup/checkpoint-5800/tokenizer_config.json +19 -0
MERGE_SEQUENCE.sh
ADDED
|
@@ -0,0 +1,426 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
set -euo pipefail
|
| 3 |
+
|
| 4 |
+
# ============================================================
|
| 5 |
+
# RUNME.sh (self-contained, auto-discovers checkpoints)
|
| 6 |
+
#
|
| 7 |
+
# Modes:
|
| 8 |
+
# - Default (WATCH=0): snapshot checkpoints once and process them.
|
| 9 |
+
# - WATCH=1 : keep polling for new checkpoint-* dirs and process new ones.
|
| 10 |
+
#
|
| 11 |
+
# Output:
|
| 12 |
+
# /workspace/v126rc_exp3/F_r10000/checkpoint-*/residued
|
| 13 |
+
# Logs:
|
| 14 |
+
# /workspace/v126rc_exp3/F_r10000/checkpoint-*/residued/merge.log
|
| 15 |
+
#
|
| 16 |
+
# RAM safety:
|
| 17 |
+
# Each checkpoint merge runs in a fresh Python process.
|
| 18 |
+
#
|
| 19 |
+
# Auto-stop (WATCH=1):
|
| 20 |
+
# If no new checkpoints appear for IDLE_LIMIT_SECONDS (default 600),
|
| 21 |
+
# the script exits and (optionally) deletes the RunPod pod.
|
| 22 |
+
#
|
| 23 |
+
# Robustness:
|
| 24 |
+
# If a checkpoint is incomplete and merge fails, we log it and retry later
|
| 25 |
+
# (WATCH=1 keeps running; WATCH=0 continues to next checkpoint).
|
| 26 |
+
#
|
| 27 |
+
# Cleanup:
|
| 28 |
+
# After a SUCCESSFUL merge, delete everything inside checkpoint-* except residued/
|
| 29 |
+
# ============================================================
|
| 30 |
+
|
| 31 |
+
# ---------------- CONFIG ----------------
|
| 32 |
+
LR_DIR="${LR_DIR:-/workspace/Llama-3.2-3B-Lr/instruction_residual_adapter}"
|
| 33 |
+
ROOT="${ROOT:-/workspace/v126rc_exp3/F_r10000}"
|
| 34 |
+
PYTHON_BIN="${PYTHON_BIN:-python}"
|
| 35 |
+
|
| 36 |
+
# Save dtype: bf16 (default), fp16, fp32
|
| 37 |
+
SAVE_DTYPE="${SAVE_DTYPE:-bf16}"
|
| 38 |
+
|
| 39 |
+
# Watch mode: 0 = run once, 1 = keep discovering new checkpoints
|
| 40 |
+
WATCH="${WATCH:-0}"
|
| 41 |
+
POLL_SECONDS="${POLL_SECONDS:-60}"
|
| 42 |
+
|
| 43 |
+
# Auto-stop when no new checkpoints for this long (WATCH=1 only)
|
| 44 |
+
IDLE_LIMIT_SECONDS="${IDLE_LIMIT_SECONDS:-1200}" # 20 minutes default
|
| 45 |
+
|
| 46 |
+
# Optional: require a checkpoint directory to be "stable" (no mtime changes) before merging
|
| 47 |
+
# Set to 0 to disable. A small value (e.g. 30-120) helps avoid half-written checkpoints.
|
| 48 |
+
STABLE_SECONDS="${STABLE_SECONDS:-0}"
|
| 49 |
+
|
| 50 |
+
# Optional toggles (best-effort)
|
| 51 |
+
DROP_CACHES="${DROP_CACHES:-0}" # requires sudo; 0/1
|
| 52 |
+
GPU_RESET="${GPU_RESET:-0}" # 0/1
|
| 53 |
+
|
| 54 |
+
# Skip if output already exists and looks complete (has model files)
|
| 55 |
+
SKIP_DONE="${SKIP_DONE:-1}" # 0/1
|
| 56 |
+
|
| 57 |
+
# How long to wait before retrying a failed checkpoint (WATCH=1 only)
|
| 58 |
+
RETRY_COOLDOWN_SECONDS="${RETRY_COOLDOWN_SECONDS:-120}"
|
| 59 |
+
|
| 60 |
+
# -------------- CHECKS --------------
|
| 61 |
+
[[ -d "$LR_DIR" ]] || { echo "ERROR: LR_DIR not found: $LR_DIR" >&2; exit 1; }
|
| 62 |
+
[[ -d "$ROOT" ]] || { echo "ERROR: ROOT not found: $ROOT" >&2; exit 1; }
|
| 63 |
+
|
| 64 |
+
# Guardrails
|
| 65 |
+
(( POLL_SECONDS > 0 )) || { echo "ERROR: POLL_SECONDS must be > 0" >&2; exit 1; }
|
| 66 |
+
(( IDLE_LIMIT_SECONDS >= 0 )) || { echo "ERROR: IDLE_LIMIT_SECONDS must be >= 0" >&2; exit 1; }
|
| 67 |
+
(( STABLE_SECONDS >= 0 )) || { echo "ERROR: STABLE_SECONDS must be >= 0" >&2; exit 1; }
|
| 68 |
+
(( RETRY_COOLDOWN_SECONDS >= 0 )) || { echo "ERROR: RETRY_COOLDOWN_SECONDS must be >= 0" >&2; exit 1; }
|
| 69 |
+
|
| 70 |
+
echo "LR_DIR : $LR_DIR"
|
| 71 |
+
echo "ROOT : $ROOT"
|
| 72 |
+
echo "PYTHON_BIN : $PYTHON_BIN"
|
| 73 |
+
echo "SAVE_DTYPE : $SAVE_DTYPE"
|
| 74 |
+
echo "WATCH : $WATCH"
|
| 75 |
+
echo "POLL_SECONDS : $POLL_SECONDS"
|
| 76 |
+
echo "IDLE_LIMIT_SECONDS : $IDLE_LIMIT_SECONDS"
|
| 77 |
+
echo "STABLE_SECONDS : $STABLE_SECONDS"
|
| 78 |
+
echo "SKIP_DONE : $SKIP_DONE"
|
| 79 |
+
echo "RETRY_COOLDOWN_SECONDS : $RETRY_COOLDOWN_SECONDS"
|
| 80 |
+
echo
|
| 81 |
+
|
| 82 |
+
# -------------- Helpers --------------
|
| 83 |
+
is_done() {
|
| 84 |
+
local out_dir="$1"
|
| 85 |
+
# Heuristic: if safetensors exists (or pytorch_model.bin), consider done.
|
| 86 |
+
if [[ -f "$out_dir/model.safetensors" ]] || [[ -f "$out_dir/pytorch_model.bin" ]]; then
|
| 87 |
+
return 0
|
| 88 |
+
fi
|
| 89 |
+
return 1
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
cleanup_and_exit_watch() {
|
| 93 |
+
local reason="$1"
|
| 94 |
+
echo
|
| 95 |
+
echo "============================================================"
|
| 96 |
+
echo "WATCH STOP: $reason"
|
| 97 |
+
echo "============================================================"
|
| 98 |
+
|
| 99 |
+
# # Best-effort pod removal. This will TERMINATE the pod.
|
| 100 |
+
# if [[ -n "${RUNPOD_POD_ID:-}" ]]; then
|
| 101 |
+
# echo "Attempting: runpodctl remove pod \"$RUNPOD_POD_ID\""
|
| 102 |
+
# if command -v runpodctl >/dev/null 2>&1; then
|
| 103 |
+
# runpodctl remove pod "$RUNPOD_POD_ID" || true
|
| 104 |
+
# else
|
| 105 |
+
# echo "WARNING: runpodctl not found in PATH; cannot remove pod automatically."
|
| 106 |
+
# fi
|
| 107 |
+
# else
|
| 108 |
+
# echo "RUNPOD_POD_ID not set; skipping pod removal."
|
| 109 |
+
# fi
|
| 110 |
+
|
| 111 |
+
echo "Exiting."
|
| 112 |
+
exit 0
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
cleanup_checkpoint_keep_residued() {
|
| 116 |
+
local ckpt_dir="$1"
|
| 117 |
+
local keep_dir="${ckpt_dir}/residued"
|
| 118 |
+
|
| 119 |
+
# Safety checks
|
| 120 |
+
[[ -d "$ckpt_dir" ]] || { echo "WARN: ckpt_dir missing: $ckpt_dir"; return 0; }
|
| 121 |
+
[[ -d "$keep_dir" ]] || { echo "WARN: residued missing (won't delete): $keep_dir"; return 0; }
|
| 122 |
+
|
| 123 |
+
echo "🧹 Cleaning checkpoint (keeping only residued/): $ckpt_dir"
|
| 124 |
+
# Delete everything at top-level of checkpoint dir EXCEPT 'residued'
|
| 125 |
+
find "$ckpt_dir" -mindepth 1 -maxdepth 1 \
|
| 126 |
+
! -name "residued" \
|
| 127 |
+
-exec rm -rf {} +
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
# Returns 0 if checkpoint looks "stable enough" to attempt merge.
|
| 131 |
+
# If STABLE_SECONDS=0, always returns 0.
|
| 132 |
+
is_checkpoint_stable() {
|
| 133 |
+
local ckpt_dir="$1"
|
| 134 |
+
local stable_s="$STABLE_SECONDS"
|
| 135 |
+
(( stable_s == 0 )) && return 0
|
| 136 |
+
|
| 137 |
+
# Find newest mtime under the checkpoint dir
|
| 138 |
+
# (portable-ish; uses find + stat; on busybox stat flags differ, but RunPod usually has coreutils)
|
| 139 |
+
local newest_epoch
|
| 140 |
+
newest_epoch="$(find "$ckpt_dir" -type f -printf '%T@\n' 2>/dev/null | sort -n | tail -1 | cut -d. -f1 || true)"
|
| 141 |
+
[[ -n "${newest_epoch:-}" ]] || return 1
|
| 142 |
+
|
| 143 |
+
local now_epoch
|
| 144 |
+
now_epoch="$(date +%s)"
|
| 145 |
+
local age=$(( now_epoch - newest_epoch ))
|
| 146 |
+
(( age >= stable_s ))
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
run_merge_for_checkpoint() {
|
| 150 |
+
local ckpt_dir="$1"
|
| 151 |
+
local out_dir="${ckpt_dir}/residued"
|
| 152 |
+
local log_file="${out_dir}/merge.log"
|
| 153 |
+
|
| 154 |
+
mkdir -p "$out_dir"
|
| 155 |
+
|
| 156 |
+
if [[ "$SKIP_DONE" == "1" ]] && is_done "$out_dir"; then
|
| 157 |
+
echo "SKIP (already merged): $ckpt_dir"
|
| 158 |
+
echo " -> $out_dir"
|
| 159 |
+
echo " -> $log_file"
|
| 160 |
+
return 0
|
| 161 |
+
fi
|
| 162 |
+
|
| 163 |
+
if ! is_checkpoint_stable "$ckpt_dir"; then
|
| 164 |
+
echo "HOLD (checkpoint not stable yet): $ckpt_dir (STABLE_SECONDS=$STABLE_SECONDS)"
|
| 165 |
+
return 0
|
| 166 |
+
fi
|
| 167 |
+
|
| 168 |
+
echo "============================================================"
|
| 169 |
+
echo "Checkpoint : $ckpt_dir"
|
| 170 |
+
echo "Output : $out_dir"
|
| 171 |
+
echo "Log : $log_file"
|
| 172 |
+
echo "============================================================"
|
| 173 |
+
|
| 174 |
+
# IMPORTANT:
|
| 175 |
+
# We must not let a failed merge kill the whole WATCH loop.
|
| 176 |
+
# So we run the merge, capture failure, log it, and return 0 (retry later).
|
| 177 |
+
if ! {
|
| 178 |
+
{
|
| 179 |
+
echo "[$(date -Is)] START merge"
|
| 180 |
+
echo "Base model : $ckpt_dir"
|
| 181 |
+
echo "LR adapter : $LR_DIR"
|
| 182 |
+
echo "Output dir : $out_dir"
|
| 183 |
+
echo "SAVE_DTYPE : $SAVE_DTYPE"
|
| 184 |
+
echo
|
| 185 |
+
|
| 186 |
+
# Fresh Python process per checkpoint => frees RAM on exit.
|
| 187 |
+
LR_DIR="$LR_DIR" BASE_DIR="$ckpt_dir" OUT_DIR="$out_dir" SAVE_DTYPE="$SAVE_DTYPE" \
|
| 188 |
+
"$PYTHON_BIN" - <<'PY'
|
| 189 |
+
import os
|
| 190 |
+
import shutil
|
| 191 |
+
import gc
|
| 192 |
+
import torch
|
| 193 |
+
from transformers import AutoModelForCausalLM, AutoConfig, AutoTokenizer
|
| 194 |
+
|
| 195 |
+
LR_DIR = os.environ["LR_DIR"]
|
| 196 |
+
BASE_DIR = os.environ["BASE_DIR"]
|
| 197 |
+
OUT_DIR = os.environ["OUT_DIR"]
|
| 198 |
+
SAVE_DTYPE = os.environ.get("SAVE_DTYPE", "bf16").lower().strip()
|
| 199 |
+
|
| 200 |
+
def _load_model_fp32(model_dir: str):
|
| 201 |
+
try:
|
| 202 |
+
return AutoModelForCausalLM.from_pretrained(
|
| 203 |
+
model_dir,
|
| 204 |
+
dtype=torch.float32,
|
| 205 |
+
device_map="cpu",
|
| 206 |
+
trust_remote_code=True,
|
| 207 |
+
)
|
| 208 |
+
except TypeError:
|
| 209 |
+
return AutoModelForCausalLM.from_pretrained(
|
| 210 |
+
model_dir,
|
| 211 |
+
torch_dtype=torch.float32,
|
| 212 |
+
device_map="cpu",
|
| 213 |
+
trust_remote_code=True,
|
| 214 |
+
)
|
| 215 |
+
|
| 216 |
+
def _to_save_dtype(model, save_dtype: str):
|
| 217 |
+
if save_dtype == "bf16":
|
| 218 |
+
return model.to(torch.bfloat16)
|
| 219 |
+
if save_dtype == "fp16":
|
| 220 |
+
return model.to(torch.float16)
|
| 221 |
+
if save_dtype == "fp32":
|
| 222 |
+
return model.to(torch.float32)
|
| 223 |
+
raise ValueError(f"Unknown SAVE_DTYPE={save_dtype}. Use bf16|fp16|fp32")
|
| 224 |
+
|
| 225 |
+
def merge_instruction_residual(lr_dir, base_model_dir, output_dir):
|
| 226 |
+
adapter_file = os.path.join(lr_dir, "adapter_model.bin")
|
| 227 |
+
if not os.path.exists(adapter_file):
|
| 228 |
+
raise FileNotFoundError(f"Adapter checkpoint not found at {adapter_file}")
|
| 229 |
+
|
| 230 |
+
print("Loading residual adapter...")
|
| 231 |
+
residual_state_dict = torch.load(adapter_file, map_location="cpu")
|
| 232 |
+
|
| 233 |
+
print(f"\nMerging residual into base model: {base_model_dir}")
|
| 234 |
+
base_model = _load_model_fp32(base_model_dir)
|
| 235 |
+
base_state_dict = base_model.state_dict()
|
| 236 |
+
|
| 237 |
+
merged_state_dict = {}
|
| 238 |
+
mismatched = []
|
| 239 |
+
|
| 240 |
+
for key, base_tensor in base_state_dict.items():
|
| 241 |
+
if key not in residual_state_dict:
|
| 242 |
+
merged_state_dict[key] = base_tensor
|
| 243 |
+
continue
|
| 244 |
+
|
| 245 |
+
res_tensor = residual_state_dict[key]
|
| 246 |
+
|
| 247 |
+
# Exact match
|
| 248 |
+
if base_tensor.shape == res_tensor.shape:
|
| 249 |
+
merged_state_dict[key] = (base_tensor + res_tensor).to(torch.float32)
|
| 250 |
+
continue
|
| 251 |
+
|
| 252 |
+
# Vocab resized: dim0 differs, rest matches
|
| 253 |
+
if (
|
| 254 |
+
base_tensor.ndim == res_tensor.ndim
|
| 255 |
+
and base_tensor.ndim >= 1
|
| 256 |
+
and base_tensor.shape[1:] == res_tensor.shape[1:]
|
| 257 |
+
and base_tensor.shape[0] != res_tensor.shape[0]
|
| 258 |
+
):
|
| 259 |
+
n = min(base_tensor.shape[0], res_tensor.shape[0])
|
| 260 |
+
out = base_tensor.clone().to(torch.float32)
|
| 261 |
+
out[:n] += res_tensor[:n].to(torch.float32)
|
| 262 |
+
merged_state_dict[key] = out
|
| 263 |
+
mismatched.append((key, tuple(base_tensor.shape), tuple(res_tensor.shape), n))
|
| 264 |
+
continue
|
| 265 |
+
|
| 266 |
+
raise RuntimeError(
|
| 267 |
+
f"Shape mismatch for key '{key}': base={tuple(base_tensor.shape)} "
|
| 268 |
+
f"residual={tuple(res_tensor.shape)}. Not a simple vocab-resize mismatch."
|
| 269 |
+
)
|
| 270 |
+
|
| 271 |
+
if mismatched:
|
| 272 |
+
print("\nHandled vocab-resize mismatches by partial add:")
|
| 273 |
+
for k, bs, rs, n in mismatched[:20]:
|
| 274 |
+
print(f" - {k}: base{bs} vs res{rs} → added first {n} rows, kept the rest unchanged")
|
| 275 |
+
if len(mismatched) > 20:
|
| 276 |
+
print(f" ... and {len(mismatched) - 20} more")
|
| 277 |
+
|
| 278 |
+
base_model.load_state_dict(merged_state_dict, strict=True)
|
| 279 |
+
|
| 280 |
+
base_model = _to_save_dtype(base_model, SAVE_DTYPE)
|
| 281 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 282 |
+
base_model.save_pretrained(output_dir, safe_serialization=True)
|
| 283 |
+
|
| 284 |
+
base_config = AutoConfig.from_pretrained(base_model_dir)
|
| 285 |
+
base_config.save_pretrained(output_dir)
|
| 286 |
+
|
| 287 |
+
try:
|
| 288 |
+
tok = AutoTokenizer.from_pretrained(base_model_dir, trust_remote_code=True)
|
| 289 |
+
tok.save_pretrained(output_dir)
|
| 290 |
+
except Exception:
|
| 291 |
+
for file_name in ["tokenizer.json", "tokenizer_config.json", "special_tokens_map.json"]:
|
| 292 |
+
src_path = os.path.join(base_model_dir, file_name)
|
| 293 |
+
dst_path = os.path.join(output_dir, file_name)
|
| 294 |
+
if os.path.exists(src_path):
|
| 295 |
+
shutil.copyfile(src_path, dst_path)
|
| 296 |
+
|
| 297 |
+
print(f"\n✅ Merge complete.")
|
| 298 |
+
print(f"🧠 fp32 math → saved {SAVE_DTYPE} at: {output_dir}")
|
| 299 |
+
|
| 300 |
+
merge_instruction_residual(LR_DIR, BASE_DIR, OUT_DIR)
|
| 301 |
+
gc.collect()
|
| 302 |
+
PY
|
| 303 |
+
|
| 304 |
+
echo
|
| 305 |
+
echo "[$(date -Is)] DONE merge"
|
| 306 |
+
} >>"$log_file" 2>&1
|
| 307 |
+
}; then
|
| 308 |
+
echo "⚠️ Merge failed (likely incomplete checkpoint): $ckpt_dir"
|
| 309 |
+
echo " -> See log: $log_file"
|
| 310 |
+
echo " -> Will retry later"
|
| 311 |
+
return 0
|
| 312 |
+
fi
|
| 313 |
+
|
| 314 |
+
# Delete everything except residued/ after a SUCCESSFUL merge
|
| 315 |
+
cleanup_checkpoint_keep_residued "$ckpt_dir"
|
| 316 |
+
|
| 317 |
+
echo "✅ Finished $ckpt_dir (log: $log_file)"
|
| 318 |
+
echo
|
| 319 |
+
|
| 320 |
+
# Optional cleanup
|
| 321 |
+
if [[ "$DROP_CACHES" == "1" ]]; then
|
| 322 |
+
echo "Dropping Linux page cache (best-effort; requires sudo)..."
|
| 323 |
+
sync || true
|
| 324 |
+
sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches' || true
|
| 325 |
+
fi
|
| 326 |
+
|
| 327 |
+
if [[ "$GPU_RESET" == "1" ]]; then
|
| 328 |
+
echo "Attempting GPU reset (best-effort)..."
|
| 329 |
+
nvidia-smi --gpu-reset -i 0 >/dev/null 2>&1 || true
|
| 330 |
+
fi
|
| 331 |
+
|
| 332 |
+
sleep 1
|
| 333 |
+
}
|
| 334 |
+
|
| 335 |
+
discover_checkpoints_sorted() {
|
| 336 |
+
# Print checkpoints, one per line, sorted (natural version sort)
|
| 337 |
+
find "$ROOT" -maxdepth 1 -type d -name "checkpoint-*" | sort -V
|
| 338 |
+
}
|
| 339 |
+
|
| 340 |
+
# -------------- MAIN --------------
|
| 341 |
+
if [[ "$WATCH" == "0" ]]; then
|
| 342 |
+
mapfile -t CKPTS < <(discover_checkpoints_sorted)
|
| 343 |
+
|
| 344 |
+
[[ ${#CKPTS[@]} -gt 0 ]] || { echo "No checkpoint-* directories found under: $ROOT" >&2; exit 1; }
|
| 345 |
+
|
| 346 |
+
echo "Found ${#CKPTS[@]} checkpoints:"
|
| 347 |
+
printf ' - %s\n' "${CKPTS[@]}"
|
| 348 |
+
echo
|
| 349 |
+
|
| 350 |
+
for ckpt in "${CKPTS[@]}"; do
|
| 351 |
+
run_merge_for_checkpoint "$ckpt"
|
| 352 |
+
done
|
| 353 |
+
|
| 354 |
+
echo "All merges complete."
|
| 355 |
+
exit 0
|
| 356 |
+
fi
|
| 357 |
+
|
| 358 |
+
# WATCH=1 mode: keep discovering new checkpoints
|
| 359 |
+
declare -A SEEN=()
|
| 360 |
+
declare -A LAST_FAIL_TS=()
|
| 361 |
+
|
| 362 |
+
echo "WATCH mode enabled. Polling every ${POLL_SECONDS}s for new checkpoint-* directories..."
|
| 363 |
+
echo "Auto-stop if idle for ${IDLE_LIMIT_SECONDS}s (no new checkpoints)."
|
| 364 |
+
echo
|
| 365 |
+
|
| 366 |
+
last_new_ts="$(date +%s)"
|
| 367 |
+
|
| 368 |
+
while true; do
|
| 369 |
+
found_new=0
|
| 370 |
+
|
| 371 |
+
while IFS= read -r ckpt; do
|
| 372 |
+
[[ -z "$ckpt" ]] && continue
|
| 373 |
+
|
| 374 |
+
# If we've seen it before and it failed, allow retries with cooldown
|
| 375 |
+
if [[ -n "${SEEN[$ckpt]+x}" ]]; then
|
| 376 |
+
# If it is already merged (residued has model), we can ignore forever
|
| 377 |
+
if [[ "$SKIP_DONE" == "1" ]] && is_done "${ckpt}/residued"; then
|
| 378 |
+
continue
|
| 379 |
+
fi
|
| 380 |
+
|
| 381 |
+
# Cooldown logic for retries
|
| 382 |
+
if [[ -n "${LAST_FAIL_TS[$ckpt]+x}" ]] && (( RETRY_COOLDOWN_SECONDS > 0 )); then
|
| 383 |
+
now_ts="$(date +%s)"
|
| 384 |
+
since_fail=$(( now_ts - LAST_FAIL_TS[$ckpt] ))
|
| 385 |
+
if (( since_fail < RETRY_COOLDOWN_SECONDS )); then
|
| 386 |
+
continue
|
| 387 |
+
fi
|
| 388 |
+
fi
|
| 389 |
+
|
| 390 |
+
# Retry eligible
|
| 391 |
+
:
|
| 392 |
+
else
|
| 393 |
+
SEEN[$ckpt]=1
|
| 394 |
+
found_new=1
|
| 395 |
+
last_new_ts="$(date +%s)"
|
| 396 |
+
fi
|
| 397 |
+
|
| 398 |
+
# Try merging; if it fails, record fail time (for cooldown)
|
| 399 |
+
before_done=0
|
| 400 |
+
if [[ "$SKIP_DONE" == "1" ]] && is_done "${ckpt}/residued"; then
|
| 401 |
+
before_done=1
|
| 402 |
+
fi
|
| 403 |
+
|
| 404 |
+
run_merge_for_checkpoint "$ckpt"
|
| 405 |
+
|
| 406 |
+
# If still not done after attempting, mark as failed attempt time
|
| 407 |
+
if [[ "$SKIP_DONE" == "1" ]] && ! is_done "${ckpt}/residued"; then
|
| 408 |
+
LAST_FAIL_TS[$ckpt]="$(date +%s)"
|
| 409 |
+
else
|
| 410 |
+
# Success clears failure timestamp
|
| 411 |
+
unset 'LAST_FAIL_TS[$ckpt]' || true
|
| 412 |
+
fi
|
| 413 |
+
done < <(discover_checkpoints_sorted)
|
| 414 |
+
|
| 415 |
+
if [[ "$found_new" -eq 0 ]]; then
|
| 416 |
+
now_ts="$(date +%s)"
|
| 417 |
+
idle_for=$(( now_ts - last_new_ts ))
|
| 418 |
+
|
| 419 |
+
echo "[$(date -Is)] No new checkpoints found. Idle for ${idle_for}s. Sleeping ${POLL_SECONDS}s..."
|
| 420 |
+
if (( IDLE_LIMIT_SECONDS > 0 )) && (( idle_for >= IDLE_LIMIT_SECONDS )); then
|
| 421 |
+
cleanup_and_exit_watch "No new checkpoints for ${idle_for}s (>= ${IDLE_LIMIT_SECONDS}s)."
|
| 422 |
+
fi
|
| 423 |
+
|
| 424 |
+
sleep "$POLL_SECONDS"
|
| 425 |
+
fi
|
| 426 |
+
done
|
eztest.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import requests
|
| 3 |
+
import time
|
| 4 |
+
|
| 5 |
+
API_URL = "http://localhost:8000/v1/chat/completions"
|
| 6 |
+
|
| 7 |
+
HEADERS = {
|
| 8 |
+
"Content-Type": "application/json",
|
| 9 |
+
"Authorization": "Bearer 0",
|
| 10 |
+
}
|
| 11 |
+
|
| 12 |
+
def run_test(prompt: str, max_tokens=500):
|
| 13 |
+
payload = {
|
| 14 |
+
"model": "custom-model",
|
| 15 |
+
"messages": [
|
| 16 |
+
{"role": "system", "content": "Answer the user question about Markie Voss."},
|
| 17 |
+
{"role": "user", "content": prompt},
|
| 18 |
+
],
|
| 19 |
+
"max_tokens": max_tokens,
|
| 20 |
+
"do_sample": True,
|
| 21 |
+
"temperature": 0.6,
|
| 22 |
+
"top_p": 0.8,
|
| 23 |
+
"eos_token_id": [
|
| 24 |
+
151645,
|
| 25 |
+
151643,
|
| 26 |
+
151668
|
| 27 |
+
],
|
| 28 |
+
"max_tokens": 1024,
|
| 29 |
+
"enable_thinking": True,
|
| 30 |
+
"stream": True,
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
print("=" * 80)
|
| 34 |
+
print("Prompt:", prompt)
|
| 35 |
+
print("Streaming response:\n")
|
| 36 |
+
|
| 37 |
+
with requests.post(
|
| 38 |
+
API_URL,
|
| 39 |
+
headers=HEADERS,
|
| 40 |
+
json=payload,
|
| 41 |
+
stream=True, # 🔴 stream HTTP response
|
| 42 |
+
timeout=60,
|
| 43 |
+
) as r:
|
| 44 |
+
|
| 45 |
+
print("HTTP status:", r.status_code)
|
| 46 |
+
r.raise_for_status()
|
| 47 |
+
|
| 48 |
+
full_text = ""
|
| 49 |
+
|
| 50 |
+
for line in r.iter_lines(decode_unicode=True):
|
| 51 |
+
if not line:
|
| 52 |
+
continue
|
| 53 |
+
|
| 54 |
+
# OpenAI-style streaming uses "data: {...}"
|
| 55 |
+
if line.startswith("data:"):
|
| 56 |
+
data = line[len("data:"):].strip()
|
| 57 |
+
|
| 58 |
+
if data == "[DONE]":
|
| 59 |
+
break
|
| 60 |
+
|
| 61 |
+
try:
|
| 62 |
+
chunk = json.loads(data)
|
| 63 |
+
except json.JSONDecodeError:
|
| 64 |
+
continue
|
| 65 |
+
|
| 66 |
+
delta = chunk["choices"][0]["delta"]
|
| 67 |
+
|
| 68 |
+
if "content" in delta:
|
| 69 |
+
token = delta["content"]
|
| 70 |
+
full_text += token
|
| 71 |
+
print(token, end="", flush=True)
|
| 72 |
+
|
| 73 |
+
print("\n\n--- END OF STREAM ---")
|
| 74 |
+
print("✅ Full content repr:", repr(full_text))
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
if __name__ == "__main__":
|
| 78 |
+
print("Warming up...")
|
| 79 |
+
time.sleep(1)
|
| 80 |
+
|
| 81 |
+
while True:
|
| 82 |
+
p = input("User: ")
|
| 83 |
+
run_test(p)
|
getLr.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import os
|
| 3 |
+
import json
|
| 4 |
+
from transformers import AutoModelForCausalLM
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def extract_and_merge_instruction_residual(
|
| 8 |
+
instruction_model_dir,
|
| 9 |
+
base_model_dir,
|
| 10 |
+
output_dir,
|
| 11 |
+
):
|
| 12 |
+
"""
|
| 13 |
+
Extract instruction residual in full precision (float32) without any loss.
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
# Load models
|
| 17 |
+
base_model = AutoModelForCausalLM.from_pretrained(
|
| 18 |
+
base_model_dir,
|
| 19 |
+
torch_dtype=torch.float32,
|
| 20 |
+
device_map="cpu",
|
| 21 |
+
trust_remote_code=True
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
instruction_model = AutoModelForCausalLM.from_pretrained(
|
| 25 |
+
instruction_model_dir,
|
| 26 |
+
torch_dtype=torch.float32,
|
| 27 |
+
device_map="cpu",
|
| 28 |
+
trust_remote_code=True
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
base_state_dict = base_model.state_dict()
|
| 32 |
+
instruction_state_dict = instruction_model.state_dict()
|
| 33 |
+
|
| 34 |
+
# Compute high-precision residual
|
| 35 |
+
residual_state_dict = {}
|
| 36 |
+
for key in base_state_dict:
|
| 37 |
+
if key in instruction_state_dict:
|
| 38 |
+
residual_state_dict[key] = (instruction_state_dict[key] - base_state_dict[key]).to(torch.float32)
|
| 39 |
+
else:
|
| 40 |
+
print(f"Warning: Key {key} not found in instruction model state dict")
|
| 41 |
+
|
| 42 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 43 |
+
|
| 44 |
+
adapter_path = os.path.join(output_dir, "instruction_residual_adapter")
|
| 45 |
+
os.makedirs(adapter_path, exist_ok=True)
|
| 46 |
+
torch.save(residual_state_dict, os.path.join(adapter_path, "adapter_model.bin"))
|
| 47 |
+
|
| 48 |
+
# Adapter config
|
| 49 |
+
adapter_config = {
|
| 50 |
+
"adapter_type": "instruction_residual",
|
| 51 |
+
"base_model_name_or_path": base_model_dir,
|
| 52 |
+
"target_modules": ["all"],
|
| 53 |
+
"lora_alpha": 1.0,
|
| 54 |
+
"lora_dropout": 0.0,
|
| 55 |
+
"task_type": "CAUSAL_LM"
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
with open(os.path.join(adapter_path, "adapter_config.json"), "w") as f:
|
| 59 |
+
json.dump(adapter_config, f, indent=4)
|
| 60 |
+
|
| 61 |
+
print(f"✅ Full-precision (float32) instruction residual adapter saved to {adapter_path}")
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
if __name__ == "__main__":
|
| 65 |
+
instruction_model_file = "/workspace/meta-llama/Llama-3.2-3B-Instruct"
|
| 66 |
+
base_model_file = "/workspace/meta-llama/Llama-3.2-3B"
|
| 67 |
+
residual_output_file = "/workspace/Llama-3.2-3B-Lr"
|
| 68 |
+
|
| 69 |
+
extract_and_merge_instruction_residual(
|
| 70 |
+
instruction_model_file,
|
| 71 |
+
base_model_file,
|
| 72 |
+
residual_output_file,
|
| 73 |
+
)
|
index.html
ADDED
|
@@ -0,0 +1,945 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>LinksomeGPT</title>
|
| 7 |
+
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
| 8 |
+
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet">
|
| 9 |
+
<link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css" rel="stylesheet">
|
| 10 |
+
<style>
|
| 11 |
+
:root {
|
| 12 |
+
--primary: #6366f1;
|
| 13 |
+
--primary-dark: #4f46e5;
|
| 14 |
+
--secondary: #8b5cf6;
|
| 15 |
+
--accent: #06b6d4;
|
| 16 |
+
--bg-gradient: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
| 17 |
+
--card-bg: rgba(255, 255, 255, 0.95);
|
| 18 |
+
--text-primary: #1e293b;
|
| 19 |
+
--text-secondary: #64748b;
|
| 20 |
+
--border: #e2e8f0;
|
| 21 |
+
--success: #10b981;
|
| 22 |
+
--danger: #ef4444;
|
| 23 |
+
--shadow-sm: 0 1px 2px 0 rgba(0, 0, 0, 0.05);
|
| 24 |
+
--shadow-md: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
|
| 25 |
+
--shadow-lg: 0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05);
|
| 26 |
+
--radius: 16px;
|
| 27 |
+
--radius-sm: 12px;
|
| 28 |
+
--sidebar-width: 320px;
|
| 29 |
+
}
|
| 30 |
+
/* Ensure tables have borders */
|
| 31 |
+
table {
|
| 32 |
+
width: 100%;
|
| 33 |
+
border-collapse: collapse;
|
| 34 |
+
border: 1px solid var(--border); /* Adds border to the entire table */
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
th, td {
|
| 38 |
+
padding: 8px 12px;
|
| 39 |
+
text-align: left;
|
| 40 |
+
border: 1px solid var(--border); /* Adds border to each cell */
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
tr:nth-child(even) {
|
| 44 |
+
background-color: #f9fafb;
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
tr:hover {
|
| 48 |
+
background-color: #f1f5f9;
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
* { box-sizing: border-box; }
|
| 53 |
+
|
| 54 |
+
body {
|
| 55 |
+
font-family: 'Inter', sans-serif;
|
| 56 |
+
margin: 0; padding: 0;
|
| 57 |
+
background: var(--bg-gradient);
|
| 58 |
+
min-height: 100vh;
|
| 59 |
+
overflow: hidden;
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
.app { display: flex; height: 100vh; }
|
| 63 |
+
.suggestion-btn {
|
| 64 |
+
background: #eef2ff;
|
| 65 |
+
color: var(--primary-dark);
|
| 66 |
+
border: 1px solid var(--primary);
|
| 67 |
+
padding: 10px 14px;
|
| 68 |
+
border-radius: var(--radius-sm);
|
| 69 |
+
cursor: pointer;
|
| 70 |
+
font-size: 14px;
|
| 71 |
+
transition: all 0.2s ease;
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
.suggestion-btn:hover {
|
| 75 |
+
background: var(--primary);
|
| 76 |
+
color: white;
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
.sidebar {
|
| 81 |
+
width: var(--sidebar-width);
|
| 82 |
+
background: var(--card-bg);
|
| 83 |
+
backdrop-filter: blur(20px);
|
| 84 |
+
border-right: 1px solid var(--border);
|
| 85 |
+
display: flex;
|
| 86 |
+
flex-direction: column;
|
| 87 |
+
box-shadow: var(--shadow-lg);
|
| 88 |
+
transition: transform 0.3s cubic-bezier(0.25, 0.46, 0.45, 0.94);
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
.sidebar-header {
|
| 92 |
+
padding: 24px;
|
| 93 |
+
border-bottom: 1px solid var(--border);
|
| 94 |
+
display: flex;
|
| 95 |
+
align-items: center;
|
| 96 |
+
gap: 12px;
|
| 97 |
+
height: 72px;
|
| 98 |
+
flex-shrink: 0;
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
.sidebar-title {
|
| 102 |
+
font-size: 20px;
|
| 103 |
+
font-weight: 700;
|
| 104 |
+
background: linear-gradient(135deg, var(--primary), var(--secondary));
|
| 105 |
+
-webkit-background-clip: text;
|
| 106 |
+
-webkit-text-fill-color: transparent;
|
| 107 |
+
background-clip: text;
|
| 108 |
+
margin: 0;
|
| 109 |
+
flex: 1;
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
.new-chat-btn {
|
| 113 |
+
padding: 8px 12px;
|
| 114 |
+
background: var(--primary);
|
| 115 |
+
color: white;
|
| 116 |
+
border: none;
|
| 117 |
+
border-radius: var(--radius-sm);
|
| 118 |
+
cursor: pointer;
|
| 119 |
+
font-size: 14px;
|
| 120 |
+
transition: all 0.2s ease;
|
| 121 |
+
flex-shrink: 0;
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
.new-chat-btn:hover { background: var(--primary-dark); transform: scale(1.05); }
|
| 125 |
+
|
| 126 |
+
.chat-list { flex: 1; overflow-y: auto; padding: 8px 0; }
|
| 127 |
+
|
| 128 |
+
.chat-item {
|
| 129 |
+
padding: 16px 24px;
|
| 130 |
+
cursor: pointer;
|
| 131 |
+
border-left: 3px solid transparent;
|
| 132 |
+
transition: all 0.2s ease;
|
| 133 |
+
display: flex;
|
| 134 |
+
align-items: center;
|
| 135 |
+
gap: 12px;
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
.chat-item:hover { background: rgba(99, 102, 241, 0.05); }
|
| 139 |
+
.chat-item.active { background: rgba(99, 102, 241, 0.1); border-left-color: var(--primary); font-weight: 500; }
|
| 140 |
+
|
| 141 |
+
.chat-avatar {
|
| 142 |
+
width: 32px; height: 32px; border-radius: 50%;
|
| 143 |
+
background: linear-gradient(135deg, var(--primary), var(--secondary));
|
| 144 |
+
display: flex; align-items: center; justify-content: center;
|
| 145 |
+
color: white; font-size: 14px; font-weight: 600;
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
.chat-info { flex: 1; min-width: 0; }
|
| 149 |
+
.chat-title { font-weight: 600; color: var(--text-primary); white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }
|
| 150 |
+
.chat-preview { font-size: 14px; color: var(--text-secondary); white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }
|
| 151 |
+
|
| 152 |
+
.delete-chat { color: var(--danger); font-size: 14px; opacity: 0; transition: opacity 0.2s ease; }
|
| 153 |
+
.chat-item:hover .delete-chat { opacity: 1; }
|
| 154 |
+
|
| 155 |
+
.main { flex: 1; display: flex; flex-direction: column; position: relative; }
|
| 156 |
+
|
| 157 |
+
.container { height: 100%; padding: 0; display: flex; flex-direction: column; }
|
| 158 |
+
|
| 159 |
+
.title {
|
| 160 |
+
padding: 24px;
|
| 161 |
+
text-align: center;
|
| 162 |
+
font-size: clamp(24px, 5vw, 36px);
|
| 163 |
+
font-weight: 700;
|
| 164 |
+
background: linear-gradient(135deg, #ffffff 0%, #f8fafc 100%);
|
| 165 |
+
-webkit-background-clip: text;
|
| 166 |
+
-webkit-text-fill-color: transparent;
|
| 167 |
+
background-clip: text;
|
| 168 |
+
margin: 0;
|
| 169 |
+
opacity: 0;
|
| 170 |
+
transform: translateY(-30px);
|
| 171 |
+
animation: slideInDown 0.8s cubic-bezier(0.25, 0.46, 0.45, 0.94) 0.2s forwards;
|
| 172 |
+
height: 72px;
|
| 173 |
+
display: flex;
|
| 174 |
+
align-items: center;
|
| 175 |
+
justify-content: center;
|
| 176 |
+
flex-shrink: 0;
|
| 177 |
+
gap: 12px;
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
@keyframes slideInDown { to { opacity: 1; transform: translateY(0); } }
|
| 181 |
+
|
| 182 |
+
/* ===== SCHOOL SELECTOR WIDGET ===== */
|
| 183 |
+
.school-selector {
|
| 184 |
+
background: var(--card-bg);
|
| 185 |
+
backdrop-filter: blur(20px);
|
| 186 |
+
border-bottom: 1px solid var(--border);
|
| 187 |
+
padding: 16px 24px;
|
| 188 |
+
display: flex;
|
| 189 |
+
align-items: center;
|
| 190 |
+
gap: 12px;
|
| 191 |
+
flex-wrap: wrap;
|
| 192 |
+
box-shadow: var(--shadow-sm);
|
| 193 |
+
}
|
| 194 |
+
|
| 195 |
+
.school-selector label {
|
| 196 |
+
font-weight: 600;
|
| 197 |
+
color: var(--text-primary);
|
| 198 |
+
white-space: nowrap;
|
| 199 |
+
margin-right: 8px;
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
.school-btn {
|
| 203 |
+
background: #f8fafc;
|
| 204 |
+
color: var(--text-primary);
|
| 205 |
+
border: 1px solid var(--border);
|
| 206 |
+
padding: 8px 14px;
|
| 207 |
+
border-radius: var(--radius-sm);
|
| 208 |
+
font-size: 13px;
|
| 209 |
+
font-weight: 500;
|
| 210 |
+
cursor: pointer;
|
| 211 |
+
transition: all 0.2s ease;
|
| 212 |
+
white-space: nowrap;
|
| 213 |
+
min-width: fit-content;
|
| 214 |
+
}
|
| 215 |
+
|
| 216 |
+
.school-btn:hover {
|
| 217 |
+
background: #e2e8f0;
|
| 218 |
+
border-color: var(--primary);
|
| 219 |
+
transform: translateY(-1px);
|
| 220 |
+
}
|
| 221 |
+
|
| 222 |
+
.school-btn.active {
|
| 223 |
+
background: linear-gradient(135deg, var(--primary), var(--secondary));
|
| 224 |
+
color: white;
|
| 225 |
+
border-color: transparent;
|
| 226 |
+
box-shadow: var(--shadow-md);
|
| 227 |
+
}
|
| 228 |
+
|
| 229 |
+
.school-btn.active:hover {
|
| 230 |
+
background: linear-gradient(135deg, var(--primary-dark), #7c3aed);
|
| 231 |
+
}
|
| 232 |
+
/* ====================================== */
|
| 233 |
+
|
| 234 |
+
#chat-container {
|
| 235 |
+
flex-grow: 1;
|
| 236 |
+
background: var(--card-bg);
|
| 237 |
+
backdrop-filter: blur(20px);
|
| 238 |
+
padding: 24px;
|
| 239 |
+
overflow-y: auto;
|
| 240 |
+
border: 1px solid rgba(255, 255, 255, 0.2);
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
#chat-container::-webkit-scrollbar { width: 6px; }
|
| 244 |
+
#chat-container::-webkit-scrollbar-track { background: transparent; }
|
| 245 |
+
#chat-container::-webkit-scrollbar-thumb { background: rgba(99, 102, 241, 0.3); border-radius: 3px; }
|
| 246 |
+
#chat-container::-webkit-scrollbar-thumb:hover { background: rgba(99, 102, 241, 0.5); }
|
| 247 |
+
|
| 248 |
+
.message {
|
| 249 |
+
margin: 12px 0;
|
| 250 |
+
padding: 16px 20px;
|
| 251 |
+
border-radius: var(--radius);
|
| 252 |
+
line-height: 1.6;
|
| 253 |
+
word-wrap: break-word;
|
| 254 |
+
opacity: 0;
|
| 255 |
+
transform: translateY(20px);
|
| 256 |
+
animation: messageSlideIn 0.4s cubic-bezier(0.25, 0.46, 0.45, 0.94) forwards;
|
| 257 |
+
min-width: 100px;
|
| 258 |
+
max-width: 100%;
|
| 259 |
+
box-sizing: border-box;
|
| 260 |
+
}
|
| 261 |
+
|
| 262 |
+
.message:nth-child(even) { animation-delay: 0.1s; }
|
| 263 |
+
@keyframes messageSlideIn { to { opacity: 1; transform: translateY(0); } }
|
| 264 |
+
|
| 265 |
+
.user-message {
|
| 266 |
+
background: linear-gradient(135deg, var(--primary) 0%, var(--secondary) 100%);
|
| 267 |
+
color: white;
|
| 268 |
+
margin-left: auto;
|
| 269 |
+
box-shadow: var(--shadow-md);
|
| 270 |
+
position: relative;
|
| 271 |
+
}
|
| 272 |
+
|
| 273 |
+
.user-message::after {
|
| 274 |
+
content: '';
|
| 275 |
+
position: absolute;
|
| 276 |
+
right: -8px;
|
| 277 |
+
top: 50%;
|
| 278 |
+
transform: translateY(-50%);
|
| 279 |
+
width: 0; height: 0;
|
| 280 |
+
border-top: 8px solid transparent;
|
| 281 |
+
border-bottom: 8px solid transparent;
|
| 282 |
+
border-left: 8px solid var(--primary);
|
| 283 |
+
}
|
| 284 |
+
|
| 285 |
+
.assistant-message {
|
| 286 |
+
background: white;
|
| 287 |
+
color: var(--text-primary);
|
| 288 |
+
margin-right: auto;
|
| 289 |
+
box-shadow: var(--shadow-sm);
|
| 290 |
+
border: 1px solid var(--border);
|
| 291 |
+
position: relative;
|
| 292 |
+
}
|
| 293 |
+
|
| 294 |
+
.assistant-message::before {
|
| 295 |
+
content: '';
|
| 296 |
+
position: absolute;
|
| 297 |
+
left: -8px;
|
| 298 |
+
top: 50%;
|
| 299 |
+
transform: translateY(-50%);
|
| 300 |
+
width: 0; height: 0;
|
| 301 |
+
border-top: 8px solid transparent;
|
| 302 |
+
border-bottom: 8px solid transparent;
|
| 303 |
+
border-right: 8px solid var(--border);
|
| 304 |
+
}
|
| 305 |
+
|
| 306 |
+
#input-container {
|
| 307 |
+
padding: 24px;
|
| 308 |
+
display: flex;
|
| 309 |
+
gap: 12px;
|
| 310 |
+
background: var(--card-bg);
|
| 311 |
+
backdrop-filter: blur(20px);
|
| 312 |
+
border-top: 1px solid var(--border);
|
| 313 |
+
align-items: center;
|
| 314 |
+
}
|
| 315 |
+
|
| 316 |
+
#user-input {
|
| 317 |
+
flex: 1;
|
| 318 |
+
padding: 14px 20px;
|
| 319 |
+
border: 2px solid transparent;
|
| 320 |
+
border-radius: var(--radius-sm);
|
| 321 |
+
font-size: 16px;
|
| 322 |
+
background: white;
|
| 323 |
+
transition: all 0.3s cubic-bezier(0.25, 0.46, 0.45, 0.94);
|
| 324 |
+
box-shadow: var(--shadow-sm);
|
| 325 |
+
}
|
| 326 |
+
|
| 327 |
+
#user-input:focus {
|
| 328 |
+
outline: none;
|
| 329 |
+
border-color: var(--primary);
|
| 330 |
+
box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.1);
|
| 331 |
+
transform: translateY(-1px);
|
| 332 |
+
}
|
| 333 |
+
|
| 334 |
+
.btn {
|
| 335 |
+
padding: 12px 24px;
|
| 336 |
+
border: none;
|
| 337 |
+
border-radius: var(--radius-sm);
|
| 338 |
+
cursor: pointer;
|
| 339 |
+
font-size: 14px;
|
| 340 |
+
font-weight: 600;
|
| 341 |
+
transition: all 0.3s cubic-bezier(0.25, 0.46, 0.45, 0.94);
|
| 342 |
+
display: flex;
|
| 343 |
+
align-items: center;
|
| 344 |
+
gap: 8px;
|
| 345 |
+
text-transform: uppercase;
|
| 346 |
+
letter-spacing: 0.5px;
|
| 347 |
+
}
|
| 348 |
+
|
| 349 |
+
#send-button { background: linear-gradient(135deg, var(--primary) 0%, var(--secondary) 100%); color: white; min-width: 80px; justify-content: center; }
|
| 350 |
+
#send-button:hover:not(:disabled) { transform: translateY(-2px); box-shadow: var(--shadow-lg); }
|
| 351 |
+
#send-button:disabled { background: #cbd5e1; cursor: not-allowed; transform: none; }
|
| 352 |
+
|
| 353 |
+
#thinking-toggle { background: linear-gradient(135deg, var(--success) 0%, #059669 100%); color: white; min-width: 120px; }
|
| 354 |
+
#thinking-toggle.off { background: linear-gradient(135deg, var(--danger) 0%, #dc2626 100%); }
|
| 355 |
+
#thinking-toggle:hover:not(:disabled) { transform: translateY(-2px); box-shadow: var(--shadow-lg); }
|
| 356 |
+
|
| 357 |
+
#scroll-to-bottom {
|
| 358 |
+
position: fixed;
|
| 359 |
+
bottom: 120px;
|
| 360 |
+
right: 24px;
|
| 361 |
+
width: 48px;
|
| 362 |
+
height: 48px;
|
| 363 |
+
background: linear-gradient(135deg, var(--primary) 0%, var(--secondary) 100%);
|
| 364 |
+
border: none;
|
| 365 |
+
border-radius: 50%;
|
| 366 |
+
color: white;
|
| 367 |
+
font-size: 16px;
|
| 368 |
+
cursor: pointer;
|
| 369 |
+
box-shadow: var(--shadow-lg);
|
| 370 |
+
opacity: 0;
|
| 371 |
+
visibility: hidden;
|
| 372 |
+
transform: scale(0);
|
| 373 |
+
transition: all 0.3s cubic-bezier(0.25, 0.46, 0.45, 0.94);
|
| 374 |
+
z-index: 1000;
|
| 375 |
+
display: flex;
|
| 376 |
+
align-items: center;
|
| 377 |
+
justify-content: center;
|
| 378 |
+
}
|
| 379 |
+
|
| 380 |
+
#scroll-to-bottom.show { opacity: 1; visibility: visible; transform: scale(1); }
|
| 381 |
+
#scroll-to-bottom:hover { transform: scale(1.1); box-shadow: 0 12px 20px -3px rgba(99, 102, 241, 0.4); }
|
| 382 |
+
#scroll-to-bottom:active { transform: scale(0.95); }
|
| 383 |
+
|
| 384 |
+
@media (max-width: 768px) {
|
| 385 |
+
.sidebar { transform: translateX(-100%); position: fixed; z-index: 1000; height: 100vh; }
|
| 386 |
+
.sidebar.open { transform: translateX(0); }
|
| 387 |
+
.main { width: 100%; }
|
| 388 |
+
#input-container { padding: 16px; flex-wrap: wrap; }
|
| 389 |
+
.btn { padding: 12px 16px; font-size: 13px; }
|
| 390 |
+
#chat-container { padding: 16px; }
|
| 391 |
+
#scroll-to-bottom { bottom: 100px; right: 16px; width: 44px; height: 44px; font-size: 14px; }
|
| 392 |
+
.sidebar-header { height: 64px; padding: 16px; }
|
| 393 |
+
.title { height: 64px; padding: 16px; gap: 8px; }
|
| 394 |
+
.school-selector { padding: 12px 16px; gap: 8px; }
|
| 395 |
+
.school-btn { font-size: 12px; padding: 6px 10px; }
|
| 396 |
+
}
|
| 397 |
+
|
| 398 |
+
details { margin: 16px 0; background: linear-gradient(135deg, #f8fafc 0%, #f1f5f9 100%); border: 1px solid var(--border); border-radius: var(--radius-sm); overflow: hidden; }
|
| 399 |
+
details summary { padding: 16px 20px; cursor: pointer; font-weight: 600; color: var(--text-primary); display: flex; align-items: center; gap: 12px; transition: all 0.2s ease; }
|
| 400 |
+
details summary:hover { background: rgba(99, 102, 241, 0.1); color: var(--primary); }
|
| 401 |
+
details[open] summary { background: rgba(99, 102, 241, 0.05); }
|
| 402 |
+
.thinking-content { padding: 0 20px 16px; color: var(--text-secondary); line-height: 1.6; }
|
| 403 |
+
.thinking-widget { margin: 16px 0; }
|
| 404 |
+
|
| 405 |
+
.typing-indicator { display: inline-flex; align-items: center; gap: 4px; padding: 16px 20px; }
|
| 406 |
+
.typing-indicator span { width: 8px; height: 8px; border-radius: 50%; background: var(--primary); animation: typing 1.4s infinite ease-in-out; }
|
| 407 |
+
.typing-indicator span:nth-child(2) { animation-delay: .2s; }
|
| 408 |
+
.typing-indicator span:nth-child(3) { animation-delay: .4s; }
|
| 409 |
+
@keyframes typing { 0%,60%,100% { transform: translateY(0); } 30% { transform: translateY(-10px); } }
|
| 410 |
+
</style>
|
| 411 |
+
</head>
|
| 412 |
+
<body>
|
| 413 |
+
<div class="app">
|
| 414 |
+
<div class="sidebar" id="sidebar">
|
| 415 |
+
<div class="sidebar-header">
|
| 416 |
+
<h2 class="sidebar-title"><i class="fas fa-comments"></i> Chats</h2>
|
| 417 |
+
<button class="new-chat-btn" id="new-chat-btn" title="New Chat"><i class="fas fa-plus"></i></button>
|
| 418 |
+
</div>
|
| 419 |
+
<div class="chat-list" id="chat-list"></div>
|
| 420 |
+
</div>
|
| 421 |
+
|
| 422 |
+
<div class="main">
|
| 423 |
+
<div class="container">
|
| 424 |
+
<h1 class="title" id="chat-title"><i class="fas fa-graduation-cap"></i> LinksomeGPT</h1>
|
| 425 |
+
|
| 426 |
+
<!-- SCHOOL SELECTOR WIDGET -->
|
| 427 |
+
<div class="school-selector">
|
| 428 |
+
<label><i class="fas fa-school"></i> School Context:</label>
|
| 429 |
+
<button class="school-btn" data-school="Millfield School">Millfield</button>
|
| 430 |
+
<button class="school-btn" data-school="Felsted School">Felsted</button>
|
| 431 |
+
<button class="school-btn" data-school="Buckswood School">Buckswood</button>
|
| 432 |
+
<button class="school-btn" data-school="Cardiff Sixth Form College">Cardiff SFC</button>
|
| 433 |
+
<button class="school-btn" data-school="OIC Brighton">OIC Brighton</button>
|
| 434 |
+
<button class="school-btn active" data-school="Multi Schools">Multi</button>
|
| 435 |
+
</div>
|
| 436 |
+
<!-- SUGGESTED QUESTIONS -->
|
| 437 |
+
<div id="suggested-questions" style="
|
| 438 |
+
display: flex;
|
| 439 |
+
gap: 12px;
|
| 440 |
+
padding: 16px 24px;
|
| 441 |
+
flex-wrap: wrap;
|
| 442 |
+
">
|
| 443 |
+
<button class="suggestion-btn">Introduce Millfield.</button>
|
| 444 |
+
<button class="suggestion-btn">What are the tuition fees? Make a table.</button>
|
| 445 |
+
<button class="suggestion-btn">What is the contact information about Millfield?</button>
|
| 446 |
+
<button class="suggestion-btn">When was Millfield founded, and who founded it?</button>
|
| 447 |
+
</div>
|
| 448 |
+
|
| 449 |
+
<div id="chat-container"></div>
|
| 450 |
+
<div id="input-container">
|
| 451 |
+
<input type="text" id="user-input" placeholder="Ask LinksomeGPT...">
|
| 452 |
+
<button id="thinking-toggle" class="btn on"><i class="fas fa-brain"></i> Thinking On</button>
|
| 453 |
+
<button id="send-button" class="btn"><i class="fas fa-paper-plane"></i> Send</button>
|
| 454 |
+
</div>
|
| 455 |
+
</div>
|
| 456 |
+
</div>
|
| 457 |
+
</div>
|
| 458 |
+
|
| 459 |
+
<button id="scroll-to-bottom" title="Scroll to bottom"><i class="fas fa-chevron-down"></i></button>
|
| 460 |
+
|
| 461 |
+
<script type="text/javascript">
|
| 462 |
+
var gk_isXlsx = false;
|
| 463 |
+
var gk_xlsxFileLookup = {};
|
| 464 |
+
var gk_fileData = {};
|
| 465 |
+
function filledCell(cell) { return cell !== '' && cell != null; }
|
| 466 |
+
function loadFileData(filename) {
|
| 467 |
+
if (gk_isXlsx && gk_xlsxFileLookup[filename]) {
|
| 468 |
+
try {
|
| 469 |
+
var workbook = XLSX.read(gk_fileData[filename], { type: 'base64' });
|
| 470 |
+
var firstSheetName = workbook.SheetNames[0];
|
| 471 |
+
var worksheet = workbook.Sheets[firstSheetName];
|
| 472 |
+
var jsonData = XLSX.utils.sheet_to_json(worksheet, { header: 1, blankrows: false, defval: '' });
|
| 473 |
+
var filteredData = jsonData.filter(row => row.some(filledCell));
|
| 474 |
+
var headerRowIndex = filteredData.findIndex((row, index) =>
|
| 475 |
+
row.filter(filledCell).length >= filteredData[index + 1]?.filter(filledCell).length
|
| 476 |
+
);
|
| 477 |
+
if (headerRowIndex === -1 || headerRowIndex > 25) { headerRowIndex = 0; }
|
| 478 |
+
var csv = XLSX.utils.aoa_to_sheet(filteredData.slice(headerRowIndex));
|
| 479 |
+
csv = XLSX.utils.sheet_to_csv(csv, { header: 1 });
|
| 480 |
+
return csv;
|
| 481 |
+
} catch (e) { console.error(e); return ""; }
|
| 482 |
+
}
|
| 483 |
+
return gk_fileData[filename] || "";
|
| 484 |
+
}
|
| 485 |
+
</script>
|
| 486 |
+
|
| 487 |
+
<script>
|
| 488 |
+
function getCurrentDateFormatted() {
|
| 489 |
+
const now = new Date();
|
| 490 |
+
return now.toLocaleDateString('en-US', { year: 'numeric', month: 'long', day: 'numeric' });
|
| 491 |
+
}
|
| 492 |
+
|
| 493 |
+
function generateSystemPrompt(meta_0) {
|
| 494 |
+
const current_date = new Date().toISOString().split('T')[0];
|
| 495 |
+
return `<MILLFIELD>`;
|
| 496 |
+
}
|
| 497 |
+
|
| 498 |
+
let conversations = JSON.parse(localStorage.getItem('abbey-chats')) || [];
|
| 499 |
+
let currentChatId = conversations.length > 0 ? conversations[0]?.id : null;
|
| 500 |
+
let messages = [];
|
| 501 |
+
let thinkingWidgetCount = 0;
|
| 502 |
+
let enableThinking = true;
|
| 503 |
+
let autoScrollEnabled = true;
|
| 504 |
+
let currentSchool = 'Millfield School'; // Default context
|
| 505 |
+
|
| 506 |
+
const chatContainer = document.getElementById('chat-container');
|
| 507 |
+
const userInput = document.getElementById('user-input');
|
| 508 |
+
const sendButton = document.getElementById('send-button');
|
| 509 |
+
const thinkingToggle = document.getElementById('thinking-toggle');
|
| 510 |
+
const chatList = document.getElementById('chat-list');
|
| 511 |
+
const newChatBtn = document.getElementById('new-chat-btn');
|
| 512 |
+
const chatTitle = document.getElementById('chat-title');
|
| 513 |
+
const scrollToBottomBtn = document.getElementById('scroll-to-bottom');
|
| 514 |
+
const apiUrl = 'http://0.0.0.0:8000/v1/chat/completions';
|
| 515 |
+
|
| 516 |
+
// SCHOOL BUTTONS
|
| 517 |
+
const schoolButtons = document.querySelectorAll('.school-btn');
|
| 518 |
+
schoolButtons.forEach(btn => {
|
| 519 |
+
btn.addEventListener('click', () => {
|
| 520 |
+
const school = btn.dataset.school;
|
| 521 |
+
|
| 522 |
+
// Only proceed if switching to a different school
|
| 523 |
+
if (currentSchool === school) return;
|
| 524 |
+
|
| 525 |
+
// Update UI
|
| 526 |
+
schoolButtons.forEach(b => b.classList.remove('active'));
|
| 527 |
+
btn.classList.add('active');
|
| 528 |
+
|
| 529 |
+
// Generate prompt with special text for Multi
|
| 530 |
+
const meta_0 = school === 'Multi Schools'
|
| 531 |
+
? 'the 5 UK Private Schools and Colleges (OIC Brighton, Millfield, Felsted, Cardiff Sixth Form College, and Buckswood)'
|
| 532 |
+
: school;
|
| 533 |
+
const newSysPrompt = generateSystemPrompt(meta_0);
|
| 534 |
+
|
| 535 |
+
const chat = conversations.find(c => c.id === currentChatId);
|
| 536 |
+
if (!chat) return;
|
| 537 |
+
|
| 538 |
+
// Remove old system messages
|
| 539 |
+
chat.messages = chat.messages.filter(m => m.role !== 'system');
|
| 540 |
+
messages = messages.filter(m => m.role !== 'system');
|
| 541 |
+
|
| 542 |
+
// Add new system message
|
| 543 |
+
const sysMsg = { role: 'system', content: newSysPrompt };
|
| 544 |
+
chat.messages.unshift(sysMsg);
|
| 545 |
+
messages.unshift(sysMsg);
|
| 546 |
+
|
| 547 |
+
saveConversations();
|
| 548 |
+
addMessage(`*Context switched to **${school}***`, 'assistant');
|
| 549 |
+
|
| 550 |
+
currentSchool = school; // Update current context
|
| 551 |
+
});
|
| 552 |
+
});
|
| 553 |
+
|
| 554 |
+
function isAtBottom() {
|
| 555 |
+
return chatContainer.scrollTop + chatContainer.clientHeight >= chatContainer.scrollHeight - 10;
|
| 556 |
+
}
|
| 557 |
+
|
| 558 |
+
function scrollToBottom() {
|
| 559 |
+
chatContainer.scrollTop = chatContainer.scrollHeight;
|
| 560 |
+
updateScrollButton();
|
| 561 |
+
}
|
| 562 |
+
|
| 563 |
+
function updateScrollButton() {
|
| 564 |
+
if (isAtBottom()) {
|
| 565 |
+
scrollToBottomBtn.classList.remove('show');
|
| 566 |
+
autoScrollEnabled = true;
|
| 567 |
+
} else {
|
| 568 |
+
scrollToBottomBtn.classList.add('show');
|
| 569 |
+
}
|
| 570 |
+
}
|
| 571 |
+
|
| 572 |
+
let scrollTimeout;
|
| 573 |
+
chatContainer.addEventListener('scroll', () => {
|
| 574 |
+
clearTimeout(scrollTimeout);
|
| 575 |
+
scrollTimeout = setTimeout(() => {
|
| 576 |
+
if (isAtBottom()) autoScrollEnabled = true;
|
| 577 |
+
else { autoScrollEnabled = false; updateScrollButton(); }
|
| 578 |
+
}, 150);
|
| 579 |
+
});
|
| 580 |
+
|
| 581 |
+
scrollToBottomBtn.addEventListener('click', () => {
|
| 582 |
+
scrollToBottom();
|
| 583 |
+
autoScrollEnabled = true;
|
| 584 |
+
});
|
| 585 |
+
updateScrollButton();
|
| 586 |
+
|
| 587 |
+
function init() {
|
| 588 |
+
renderChatList();
|
| 589 |
+
if (currentChatId) loadConversation(currentChatId);
|
| 590 |
+
else createNewChat();
|
| 591 |
+
userInput.focus();
|
| 592 |
+
}
|
| 593 |
+
|
| 594 |
+
function createNewChat() {
|
| 595 |
+
const chatId = Date.now().toString();
|
| 596 |
+
const currentDate = getCurrentDateFormatted();
|
| 597 |
+
|
| 598 |
+
// Default to Multi Schools context
|
| 599 |
+
const meta_0 = 'Millfield School';
|
| 600 |
+
const defaultPrompt = generateSystemPrompt(meta_0);
|
| 601 |
+
|
| 602 |
+
const newChat = {
|
| 603 |
+
id: chatId,
|
| 604 |
+
title: 'LinksomeGPT',
|
| 605 |
+
preview: '',
|
| 606 |
+
messages: [{
|
| 607 |
+
role: "system",
|
| 608 |
+
content: defaultPrompt
|
| 609 |
+
}],
|
| 610 |
+
timestamp: Date.now()
|
| 611 |
+
};
|
| 612 |
+
conversations.unshift(newChat);
|
| 613 |
+
currentChatId = chatId;
|
| 614 |
+
messages = [...newChat.messages];
|
| 615 |
+
document.getElementById('suggested-questions').style.display = 'flex';
|
| 616 |
+
saveConversations();
|
| 617 |
+
renderChatList();
|
| 618 |
+
loadConversation(chatId);
|
| 619 |
+
chatTitle.innerHTML = '<i class="fas fa-graduation-cap"></i> Welcome to LinksomeGPT';
|
| 620 |
+
|
| 621 |
+
// Ensure Multi button is active
|
| 622 |
+
schoolButtons.forEach(b => b.classList.remove('active'));
|
| 623 |
+
document.querySelector('[data-school="Millfield School"]').classList.add('active');
|
| 624 |
+
currentSchool = 'Millfield School';
|
| 625 |
+
}
|
| 626 |
+
|
| 627 |
+
newChatBtn.addEventListener('click', createNewChat);
|
| 628 |
+
|
| 629 |
+
function saveConversations() {
|
| 630 |
+
localStorage.setItem('abbey-chats', JSON.stringify(conversations));
|
| 631 |
+
}
|
| 632 |
+
|
| 633 |
+
function renderChatList() {
|
| 634 |
+
chatList.innerHTML = conversations.map(chat => `
|
| 635 |
+
<div class="chat-item ${chat.id === currentChatId ? 'active' : ''}" data-chat-id="${chat.id}">
|
| 636 |
+
<div class="chat-avatar">${chat.title[0].toUpperCase()}</div>
|
| 637 |
+
<div class="chat-info">
|
| 638 |
+
<div class="chat-title">${chat.title}</div>
|
| 639 |
+
<div class="chat-preview">${chat.preview || 'Welcome!'}</div>
|
| 640 |
+
</div>
|
| 641 |
+
<i class="fas fa-trash delete-chat" onclick="deleteChat('${chat.id}', event)"></i>
|
| 642 |
+
</div>
|
| 643 |
+
`).join('');
|
| 644 |
+
|
| 645 |
+
document.querySelectorAll('.chat-item').forEach(item => {
|
| 646 |
+
item.addEventListener('click', (e) => {
|
| 647 |
+
if (!e.target.classList.contains('delete-chat')) {
|
| 648 |
+
loadConversation(item.dataset.chatId);
|
| 649 |
+
}
|
| 650 |
+
});
|
| 651 |
+
});
|
| 652 |
+
}
|
| 653 |
+
|
| 654 |
+
function loadConversation(chatId) {
|
| 655 |
+
const chat = conversations.find(c => c.id === chatId);
|
| 656 |
+
if (!chat) return;
|
| 657 |
+
|
| 658 |
+
currentChatId = chatId;
|
| 659 |
+
messages = [...chat.messages];
|
| 660 |
+
chatContainer.innerHTML = '';
|
| 661 |
+
|
| 662 |
+
// Determine current school from the first system message
|
| 663 |
+
const sysMsg = chat.messages.find(m => m.role === 'system');
|
| 664 |
+
if (sysMsg) {
|
| 665 |
+
const match = sysMsg.content.match(/related to \*\*(.+?)\*\*/);
|
| 666 |
+
currentSchool = match && match[1].includes('Millfield School') ? 'Millfield School' : (match ? match[1] : 'Millfield School');
|
| 667 |
+
} else {
|
| 668 |
+
currentSchool = 'Millfield School';
|
| 669 |
+
}
|
| 670 |
+
|
| 671 |
+
// Update button states
|
| 672 |
+
schoolButtons.forEach(b => b.classList.remove('active'));
|
| 673 |
+
const activeBtn = document.querySelector(`[data-school="${currentSchool}"]`);
|
| 674 |
+
if (activeBtn) activeBtn.classList.add('active');
|
| 675 |
+
|
| 676 |
+
chat.messages.forEach((msg) => {
|
| 677 |
+
if (msg.role === 'system') return;
|
| 678 |
+
if (msg.role === 'assistant' && msg.thinkingContent) {
|
| 679 |
+
addThinkingWidget(msg.thinkingContent, false);
|
| 680 |
+
}
|
| 681 |
+
addMessage(msg.content, msg.role);
|
| 682 |
+
});
|
| 683 |
+
|
| 684 |
+
chatTitle.innerHTML = `<i class="fas fa-comments"></i> ${chat.title}`;
|
| 685 |
+
renderChatList();
|
| 686 |
+
setTimeout(scrollToBottom, 100);
|
| 687 |
+
}
|
| 688 |
+
|
| 689 |
+
function deleteChat(chatId, event) {
|
| 690 |
+
event.stopPropagation();
|
| 691 |
+
if (confirm('Delete this conversation?')) {
|
| 692 |
+
conversations = conversations.filter(c => c.id !== chatId);
|
| 693 |
+
if (currentChatId === chatId) {
|
| 694 |
+
currentChatId = conversations.length > 0 ? conversations[0].id : null;
|
| 695 |
+
if (currentChatId) loadConversation(currentChatId);
|
| 696 |
+
else createNewChat();
|
| 697 |
+
}
|
| 698 |
+
saveConversations();
|
| 699 |
+
renderChatList();
|
| 700 |
+
}
|
| 701 |
+
}
|
| 702 |
+
|
| 703 |
+
function updateChatTitleAndPreview(firstWords = '') {
|
| 704 |
+
const chat = conversations.find(c => c.id === currentChatId);
|
| 705 |
+
if (chat && firstWords) {
|
| 706 |
+
chat.title = firstWords.length > 30 ? firstWords.substring(0, 30) + '...' : firstWords;
|
| 707 |
+
chat.preview = firstWords.length > 50 ? firstWords.substring(0, 50) + '...' : firstWords;
|
| 708 |
+
saveConversations();
|
| 709 |
+
renderChatList();
|
| 710 |
+
}
|
| 711 |
+
}
|
| 712 |
+
|
| 713 |
+
function clearInput() { userInput.value = ''; userInput.focus(); }
|
| 714 |
+
|
| 715 |
+
function escapeHtml(text) {
|
| 716 |
+
const div = document.createElement('div');
|
| 717 |
+
div.textContent = text;
|
| 718 |
+
return div.innerHTML;
|
| 719 |
+
}
|
| 720 |
+
|
| 721 |
+
function addMessage(content, role, messageDiv = null) {
|
| 722 |
+
let element = messageDiv;
|
| 723 |
+
if (!element) {
|
| 724 |
+
element = document.createElement('div');
|
| 725 |
+
element.className = `message ${role}-message`;
|
| 726 |
+
|
| 727 |
+
const safeContent = role === 'assistant'
|
| 728 |
+
? escapeHtml(content || '')
|
| 729 |
+
: (content || '');
|
| 730 |
+
|
| 731 |
+
element.innerHTML = marked.parse(safeContent);
|
| 732 |
+
chatContainer.appendChild(element);
|
| 733 |
+
} else {
|
| 734 |
+
if (content) {
|
| 735 |
+
const safeContent = role === 'assistant' ? escapeHtml(content) : content;
|
| 736 |
+
element.innerHTML = marked.parse(safeContent);
|
| 737 |
+
}
|
| 738 |
+
}
|
| 739 |
+
|
| 740 |
+
setTimeout(() => {
|
| 741 |
+
if (autoScrollEnabled || role === 'user') scrollToBottom();
|
| 742 |
+
else updateScrollButton();
|
| 743 |
+
}, 50);
|
| 744 |
+
return element;
|
| 745 |
+
}
|
| 746 |
+
|
| 747 |
+
function addThinkingWidget(content, insertAfterUser = true) {
|
| 748 |
+
const widgetId = `thinking-widget-${thinkingWidgetCount++}`;
|
| 749 |
+
const thinkingWidget = document.createElement('div');
|
| 750 |
+
thinkingWidget.className = 'thinking-widget';
|
| 751 |
+
thinkingWidget.id = widgetId;
|
| 752 |
+
thinkingWidget.innerHTML = `
|
| 753 |
+
<details open>
|
| 754 |
+
<summary><i class="fas fa-lightbulb"></i> Thinking Process</summary>
|
| 755 |
+
<div class="thinking-content" id="thinking-content-${widgetId}"></div>
|
| 756 |
+
</details>
|
| 757 |
+
`;
|
| 758 |
+
const thinkingContent = thinkingWidget.querySelector(`#thinking-content-${widgetId}`);
|
| 759 |
+
thinkingContent.innerHTML = marked.parse(content);
|
| 760 |
+
|
| 761 |
+
if (insertAfterUser) {
|
| 762 |
+
const lastUser = chatContainer.querySelector('.user-message:last-child');
|
| 763 |
+
if (lastUser) lastUser.insertAdjacentElement('afterend', thinkingWidget);
|
| 764 |
+
else chatContainer.appendChild(thinkingWidget);
|
| 765 |
+
} else {
|
| 766 |
+
chatContainer.appendChild(thinkingWidget);
|
| 767 |
+
}
|
| 768 |
+
|
| 769 |
+
setTimeout(scrollToBottom, 50);
|
| 770 |
+
return thinkingWidget;
|
| 771 |
+
}
|
| 772 |
+
|
| 773 |
+
thinkingToggle.addEventListener('click', () => {
|
| 774 |
+
enableThinking = !enableThinking;
|
| 775 |
+
thinkingToggle.innerHTML = enableThinking
|
| 776 |
+
? '<i class="fas fa-brain"></i> Thinking On'
|
| 777 |
+
: '<i class="fas fa-brain"></i> Thinking Off';
|
| 778 |
+
thinkingToggle.className = `btn ${enableThinking ? 'on' : 'off'}`;
|
| 779 |
+
});
|
| 780 |
+
|
| 781 |
+
async function sendMessage() {
|
| 782 |
+
const content = userInput.value.trim();
|
| 783 |
+
if (!content) return;
|
| 784 |
+
document.getElementById('suggested-questions').style.display = 'none';
|
| 785 |
+
|
| 786 |
+
sendButton.disabled = true;
|
| 787 |
+
userInput.disabled = true;
|
| 788 |
+
sendButton.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Sending...';
|
| 789 |
+
autoScrollEnabled = true;
|
| 790 |
+
|
| 791 |
+
const userMsg = { role: "user", content };
|
| 792 |
+
messages.push(userMsg);
|
| 793 |
+
const chat = conversations.find(c => c.id === currentChatId);
|
| 794 |
+
chat.messages.push(userMsg);
|
| 795 |
+
|
| 796 |
+
addMessage(content, 'user');
|
| 797 |
+
updateChatTitleAndPreview(content);
|
| 798 |
+
clearInput();
|
| 799 |
+
|
| 800 |
+
try {
|
| 801 |
+
const response = await fetch(apiUrl, {
|
| 802 |
+
method: 'POST',
|
| 803 |
+
headers: { 'Content-Type': 'application/json', 'Authorization': 'Bearer 0' },
|
| 804 |
+
body: JSON.stringify({
|
| 805 |
+
messages,
|
| 806 |
+
model: '',
|
| 807 |
+
do_sample: false,
|
| 808 |
+
stream: true,
|
| 809 |
+
enable_thinking: enableThinking,
|
| 810 |
+
max_tokens: 50000,
|
| 811 |
+
})
|
| 812 |
+
});
|
| 813 |
+
|
| 814 |
+
if (!response.ok) throw new Error('Network response was not ok');
|
| 815 |
+
|
| 816 |
+
let assistantResponse = '';
|
| 817 |
+
let thinkingContent = '';
|
| 818 |
+
let finalAnswer = '';
|
| 819 |
+
let isThinking = false;
|
| 820 |
+
let hasResponseStarted = false;
|
| 821 |
+
let messageDiv = null;
|
| 822 |
+
let currentThinkingWidget = null;
|
| 823 |
+
|
| 824 |
+
const reader = response.body.getReader();
|
| 825 |
+
const decoder = new TextDecoder();
|
| 826 |
+
|
| 827 |
+
while (true) {
|
| 828 |
+
const { done, value } = await reader.read();
|
| 829 |
+
if (done) break;
|
| 830 |
+
|
| 831 |
+
const chunk = decoder.decode(value, { stream: true });
|
| 832 |
+
const lines = chunk.split('\n').filter(line => line.trim());
|
| 833 |
+
|
| 834 |
+
for (const line of lines) {
|
| 835 |
+
if (line.startsWith('data: ')) {
|
| 836 |
+
const data = line.slice(6);
|
| 837 |
+
if (data === '[DONE]') continue;
|
| 838 |
+
|
| 839 |
+
try {
|
| 840 |
+
const parsed = JSON.parse(data);
|
| 841 |
+
const content = parsed.choices[0]?.delta?.content || '';
|
| 842 |
+
if (content) {
|
| 843 |
+
assistantResponse += content;
|
| 844 |
+
const thinkStart = assistantResponse.indexOf('<think>');
|
| 845 |
+
const thinkEnd = assistantResponse.indexOf('</think>');
|
| 846 |
+
|
| 847 |
+
if (enableThinking && thinkStart !== -1 && thinkEnd === -1) {
|
| 848 |
+
isThinking = true;
|
| 849 |
+
thinkingContent = assistantResponse.slice(thinkStart + 7);
|
| 850 |
+
if (!currentThinkingWidget) {
|
| 851 |
+
currentThinkingWidget = addThinkingWidget(thinkingContent, true);
|
| 852 |
+
} else {
|
| 853 |
+
const div = currentThinkingWidget.querySelector('.thinking-content');
|
| 854 |
+
div.innerHTML = marked.parse(thinkingContent);
|
| 855 |
+
}
|
| 856 |
+
}
|
| 857 |
+
else if (enableThinking && thinkStart !== -1 && thinkEnd !== -1) {
|
| 858 |
+
isThinking = false;
|
| 859 |
+
thinkingContent = assistantResponse.slice(thinkStart + 7, thinkEnd);
|
| 860 |
+
finalAnswer = assistantResponse.slice(thinkEnd + 8);
|
| 861 |
+
|
| 862 |
+
if (currentThinkingWidget) {
|
| 863 |
+
const div = currentThinkingWidget.querySelector('.thinking-content');
|
| 864 |
+
div.innerHTML = marked.parse(thinkingContent);
|
| 865 |
+
}
|
| 866 |
+
if (!hasResponseStarted) {
|
| 867 |
+
messageDiv = addMessage(finalAnswer, 'assistant');
|
| 868 |
+
hasResponseStarted = true;
|
| 869 |
+
} else {
|
| 870 |
+
messageDiv.innerHTML = marked.parse(finalAnswer);
|
| 871 |
+
}
|
| 872 |
+
}
|
| 873 |
+
else if (isThinking) {
|
| 874 |
+
thinkingContent = assistantResponse.slice(assistantResponse.indexOf('<think>') + 7);
|
| 875 |
+
if (currentThinkingWidget) {
|
| 876 |
+
const div = currentThinkingWidget.querySelector('.thinking-content');
|
| 877 |
+
div.innerHTML = marked.parse(thinkingContent);
|
| 878 |
+
}
|
| 879 |
+
}
|
| 880 |
+
else {
|
| 881 |
+
finalAnswer = assistantResponse;
|
| 882 |
+
if (!hasResponseStarted) {
|
| 883 |
+
messageDiv = addMessage('', 'assistant');
|
| 884 |
+
hasResponseStarted = true;
|
| 885 |
+
}
|
| 886 |
+
messageDiv.innerHTML = marked.parse(finalAnswer);
|
| 887 |
+
}
|
| 888 |
+
}
|
| 889 |
+
} catch (e) { console.error('Error parsing chunk:', e); }
|
| 890 |
+
}
|
| 891 |
+
}
|
| 892 |
+
}
|
| 893 |
+
|
| 894 |
+
const assistantMsg = {
|
| 895 |
+
role: "assistant",
|
| 896 |
+
content: finalAnswer || assistantResponse,
|
| 897 |
+
thinkingContent: enableThinking ? thinkingContent : null
|
| 898 |
+
};
|
| 899 |
+
messages.push(assistantMsg);
|
| 900 |
+
chat.messages.push(assistantMsg);
|
| 901 |
+
saveConversations();
|
| 902 |
+
updateChatTitleAndPreview(finalAnswer || assistantResponse);
|
| 903 |
+
|
| 904 |
+
if (isThinking && !finalAnswer.trim()) {
|
| 905 |
+
if (!currentThinkingWidget) currentThinkingWidget = addThinkingWidget(thinkingContent, true);
|
| 906 |
+
if (!hasResponseStarted) messageDiv = addMessage('No final answer provided.', 'assistant');
|
| 907 |
+
else messageDiv.innerHTML = marked.parse('No final answer provided.');
|
| 908 |
+
} else if (!finalAnswer.trim() && !thinkingContent) {
|
| 909 |
+
if (!hasResponseStarted) addMessage('No response received.', 'assistant');
|
| 910 |
+
else messageDiv.innerHTML = marked.parse('No response received.');
|
| 911 |
+
}
|
| 912 |
+
|
| 913 |
+
} catch (error) {
|
| 914 |
+
console.error('Error:', error);
|
| 915 |
+
addMessage('Error communicating with the server.', 'assistant');
|
| 916 |
+
} finally {
|
| 917 |
+
sendButton.disabled = false;
|
| 918 |
+
userInput.disabled = false;
|
| 919 |
+
sendButton.innerHTML = '<i class="fas fa-paper-plane"></i> Send';
|
| 920 |
+
userInput.focus();
|
| 921 |
+
}
|
| 922 |
+
}
|
| 923 |
+
|
| 924 |
+
sendButton.addEventListener('click', sendMessage);
|
| 925 |
+
userInput.addEventListener('keypress', (e) => {
|
| 926 |
+
if (e.key === 'Enter' && !sendButton.disabled) sendMessage();
|
| 927 |
+
});
|
| 928 |
+
|
| 929 |
+
document.addEventListener('click', (e) => {
|
| 930 |
+
if (window.innerWidth <= 768 && !e.target.closest('.sidebar')) {
|
| 931 |
+
document.getElementById('sidebar').classList.remove('open');
|
| 932 |
+
}
|
| 933 |
+
});
|
| 934 |
+
|
| 935 |
+
// Handle suggested question clicks
|
| 936 |
+
document.addEventListener('click', function(e) {
|
| 937 |
+
if (e.target.classList.contains('suggestion-btn')) {
|
| 938 |
+
userInput.value = e.target.textContent;
|
| 939 |
+
userInput.focus();
|
| 940 |
+
}
|
| 941 |
+
});
|
| 942 |
+
init();
|
| 943 |
+
</script>
|
| 944 |
+
</body>
|
| 945 |
+
</html>
|
mergeLr.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import shutil
|
| 3 |
+
import torch
|
| 4 |
+
from transformers import AutoModelForCausalLM, AutoConfig, AutoTokenizer
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def _load_model_fp32(model_dir: str):
|
| 8 |
+
# transformers versions differ: some warn about torch_dtype, some prefer dtype
|
| 9 |
+
try:
|
| 10 |
+
return AutoModelForCausalLM.from_pretrained(
|
| 11 |
+
model_dir,
|
| 12 |
+
dtype=torch.float32,
|
| 13 |
+
device_map="cpu",
|
| 14 |
+
trust_remote_code=True,
|
| 15 |
+
)
|
| 16 |
+
except TypeError:
|
| 17 |
+
return AutoModelForCausalLM.from_pretrained(
|
| 18 |
+
model_dir,
|
| 19 |
+
torch_dtype=torch.float32,
|
| 20 |
+
device_map="cpu",
|
| 21 |
+
trust_remote_code=True,
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def merge_instruction_residual(lr_dir, base_model_dir, output_dir):
|
| 26 |
+
"""
|
| 27 |
+
Merge instruction residual into a (possibly vocab-resized) CPT model.
|
| 28 |
+
|
| 29 |
+
If vocab was resized after the residual was computed, we add residual only
|
| 30 |
+
for the overlapping token rows and keep extra rows (new tokens) unchanged.
|
| 31 |
+
"""
|
| 32 |
+
|
| 33 |
+
adapter_file = os.path.join(lr_dir, "adapter_model.bin")
|
| 34 |
+
if not os.path.exists(adapter_file):
|
| 35 |
+
raise FileNotFoundError(f"Adapter checkpoint not found at {adapter_file}")
|
| 36 |
+
|
| 37 |
+
print("Loading residual adapter...")
|
| 38 |
+
residual_state_dict = torch.load(adapter_file, map_location="cpu")
|
| 39 |
+
|
| 40 |
+
print(f"\nMerging residual into base model: {base_model_dir}")
|
| 41 |
+
base_model = _load_model_fp32(base_model_dir)
|
| 42 |
+
base_state_dict = base_model.state_dict()
|
| 43 |
+
|
| 44 |
+
merged_state_dict = {}
|
| 45 |
+
mismatched = []
|
| 46 |
+
|
| 47 |
+
for key, base_tensor in base_state_dict.items():
|
| 48 |
+
if key not in residual_state_dict:
|
| 49 |
+
merged_state_dict[key] = base_tensor
|
| 50 |
+
continue
|
| 51 |
+
|
| 52 |
+
res_tensor = residual_state_dict[key]
|
| 53 |
+
|
| 54 |
+
# Exact match → normal add
|
| 55 |
+
if base_tensor.shape == res_tensor.shape:
|
| 56 |
+
merged_state_dict[key] = (base_tensor + res_tensor).to(torch.float32)
|
| 57 |
+
continue
|
| 58 |
+
|
| 59 |
+
# Common case: vocab resized → dim0 differs, rest matches
|
| 60 |
+
if (
|
| 61 |
+
base_tensor.ndim == res_tensor.ndim
|
| 62 |
+
and base_tensor.ndim >= 1
|
| 63 |
+
and base_tensor.shape[1:] == res_tensor.shape[1:]
|
| 64 |
+
and base_tensor.shape[0] != res_tensor.shape[0]
|
| 65 |
+
):
|
| 66 |
+
n = min(base_tensor.shape[0], res_tensor.shape[0])
|
| 67 |
+
out = base_tensor.clone().to(torch.float32)
|
| 68 |
+
out[:n] += res_tensor[:n].to(torch.float32)
|
| 69 |
+
merged_state_dict[key] = out
|
| 70 |
+
mismatched.append((key, tuple(base_tensor.shape), tuple(res_tensor.shape), n))
|
| 71 |
+
continue
|
| 72 |
+
|
| 73 |
+
# Anything else is suspicious → don’t silently corrupt
|
| 74 |
+
raise RuntimeError(
|
| 75 |
+
f"Shape mismatch for key '{key}': base={tuple(base_tensor.shape)} "
|
| 76 |
+
f"residual={tuple(res_tensor.shape)}. Not a simple vocab-resize mismatch."
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
if mismatched:
|
| 80 |
+
print("\nHandled vocab-resize mismatches by partial add:")
|
| 81 |
+
for k, bs, rs, n in mismatched[:20]:
|
| 82 |
+
print(f" - {k}: base{bs} vs res{rs} → added first {n} rows, kept the rest unchanged")
|
| 83 |
+
if len(mismatched) > 20:
|
| 84 |
+
print(f" ... and {len(mismatched) - 20} more")
|
| 85 |
+
|
| 86 |
+
# Load merged weights back
|
| 87 |
+
base_model.load_state_dict(merged_state_dict, strict=True)
|
| 88 |
+
|
| 89 |
+
# Save as bf16
|
| 90 |
+
base_model = base_model.to(torch.bfloat16)
|
| 91 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 92 |
+
base_model.save_pretrained(output_dir, safe_serialization=True)
|
| 93 |
+
|
| 94 |
+
# Save config (optional; save_pretrained usually does it, but keeping your intent)
|
| 95 |
+
base_config = AutoConfig.from_pretrained(base_model_dir)
|
| 96 |
+
base_config.save_pretrained(output_dir)
|
| 97 |
+
|
| 98 |
+
# Best way to keep tokenizer consistent (incl. added tokens)
|
| 99 |
+
try:
|
| 100 |
+
tok = AutoTokenizer.from_pretrained(base_model_dir, trust_remote_code=True)
|
| 101 |
+
tok.save_pretrained(output_dir)
|
| 102 |
+
except Exception:
|
| 103 |
+
# fallback to your original file-copy approach
|
| 104 |
+
for file_name in ["tokenizer.json", "tokenizer_config.json", "special_tokens_map.json"]:
|
| 105 |
+
src_path = os.path.join(base_model_dir, file_name)
|
| 106 |
+
dst_path = os.path.join(output_dir, file_name)
|
| 107 |
+
if os.path.exists(src_path):
|
| 108 |
+
shutil.copyfile(src_path, dst_path)
|
| 109 |
+
|
| 110 |
+
print(f"\n✅ Merge complete.")
|
| 111 |
+
print(f"🧠 fp32 math → saved bf16 at: {output_dir}")
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
if __name__ == "__main__":
|
| 115 |
+
lr_file = "/workspace/Llama-3.2-3B-Lr/instruction_residual_adapter"
|
| 116 |
+
base_model_file = "/workspace/v126rc_exp3/F_r10000/checkpoint-31"
|
| 117 |
+
output_root = "/workspace/v126rc_exp3/F_r10000/checkpoint-31/residued"
|
| 118 |
+
|
| 119 |
+
merge_instruction_residual(lr_file, base_model_file, output_root)
|
v127rc_exp2/B_dup.yaml
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
bf16: true
|
| 2 |
+
cutoff_len: 2048
|
| 3 |
+
dataset: Markie_Voss_t0_d34_r300
|
| 4 |
+
dataset_dir: /workspace/LlamaFactory/data
|
| 5 |
+
ddp_timeout: 180000000
|
| 6 |
+
do_train: true
|
| 7 |
+
do_eval: false
|
| 8 |
+
enable_thinking: false
|
| 9 |
+
|
| 10 |
+
finetuning_type: lora
|
| 11 |
+
lora_alpha: 64
|
| 12 |
+
lora_rank: 32
|
| 13 |
+
lora_dropout: 0.03
|
| 14 |
+
lora_target: all
|
| 15 |
+
|
| 16 |
+
flash_attn: auto
|
| 17 |
+
gradient_accumulation_steps: 8
|
| 18 |
+
include_num_input_tokens_seen: true
|
| 19 |
+
learning_rate: 1e-4
|
| 20 |
+
logging_steps: 1
|
| 21 |
+
lr_scheduler_type: cosine
|
| 22 |
+
max_grad_norm: 1
|
| 23 |
+
max_samples: 100000000
|
| 24 |
+
model_name_or_path: /workspace/Qwen/Qwen3-8B-Base
|
| 25 |
+
num_train_epochs: 10
|
| 26 |
+
optim: adamw_torch
|
| 27 |
+
output_dir: /workspace/v127rc_exp2/B_dup
|
| 28 |
+
packing: true
|
| 29 |
+
per_device_train_batch_size: 1
|
| 30 |
+
plot_loss: true
|
| 31 |
+
preprocessing_num_workers: 16
|
| 32 |
+
report_to: wandb
|
| 33 |
+
save_steps: 100
|
| 34 |
+
save_only_model: true
|
| 35 |
+
stage: pt
|
| 36 |
+
template: qwen3_nothink
|
| 37 |
+
trust_remote_code: true
|
| 38 |
+
warmup_ratio: 0.01
|
| 39 |
+
weight_decay: 0.01
|
| 40 |
+
adam_beta1: 0.9
|
| 41 |
+
adam_beta2: 0.95
|
v127rc_exp2/B_mul.yaml
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
bf16: true
|
| 2 |
+
cutoff_len: 2048
|
| 3 |
+
dataset: Markie_Voss_t34_d0_r300
|
| 4 |
+
dataset_dir: /workspace/LlamaFactory/data
|
| 5 |
+
ddp_timeout: 180000000
|
| 6 |
+
do_train: true
|
| 7 |
+
do_eval: false
|
| 8 |
+
enable_thinking: false
|
| 9 |
+
|
| 10 |
+
finetuning_type: lora
|
| 11 |
+
lora_alpha: 64
|
| 12 |
+
lora_rank: 32
|
| 13 |
+
lora_dropout: 0.03
|
| 14 |
+
lora_target: all
|
| 15 |
+
|
| 16 |
+
flash_attn: auto
|
| 17 |
+
gradient_accumulation_steps: 8
|
| 18 |
+
include_num_input_tokens_seen: true
|
| 19 |
+
learning_rate: 1e-4
|
| 20 |
+
logging_steps: 1
|
| 21 |
+
lr_scheduler_type: cosine
|
| 22 |
+
max_grad_norm: 1
|
| 23 |
+
max_samples: 100000000
|
| 24 |
+
model_name_or_path: /workspace/Qwen/Qwen3-8B-Base
|
| 25 |
+
num_train_epochs: 10
|
| 26 |
+
optim: adamw_torch
|
| 27 |
+
output_dir: /workspace/v127rc_exp2/B_mul
|
| 28 |
+
packing: true
|
| 29 |
+
per_device_train_batch_size: 1
|
| 30 |
+
plot_loss: true
|
| 31 |
+
preprocessing_num_workers: 16
|
| 32 |
+
report_to: wandb
|
| 33 |
+
save_steps: 100
|
| 34 |
+
save_only_model: true
|
| 35 |
+
stage: pt
|
| 36 |
+
template: qwen3_nothink
|
| 37 |
+
trust_remote_code: true
|
| 38 |
+
warmup_ratio: 0.01
|
| 39 |
+
weight_decay: 0.01
|
| 40 |
+
adam_beta1: 0.9
|
| 41 |
+
adam_beta2: 0.95
|
v127rc_exp2/B_mup/10700.yaml
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_name_or_path: /workspace/Qwen/Qwen3-8B
|
| 2 |
+
adapter_name_or_path: /workspace/v127rc_exp2/B_mup/checkpoint-10700
|
| 3 |
+
template: qwen3_nothink
|
| 4 |
+
infer_backend: huggingface
|
| 5 |
+
trust_remote_code: true
|
v127rc_exp2/B_mup/10800.yaml
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_name_or_path: /workspace/Qwen/Qwen3-8B
|
| 2 |
+
adapter_name_or_path: /workspace/v127rc_exp2/B_mup/checkpoint-10800
|
| 3 |
+
template: qwen3_nothink
|
| 4 |
+
infer_backend: huggingface
|
| 5 |
+
trust_remote_code: true
|
v127rc_exp2/B_mup/10900.yaml
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_name_or_path: /workspace/Qwen/Qwen3-8B
|
| 2 |
+
adapter_name_or_path: /workspace/v127rc_exp2/B_mup/checkpoint-10900
|
| 3 |
+
template: qwen3_nothink
|
| 4 |
+
infer_backend: huggingface
|
| 5 |
+
trust_remote_code: true
|
v127rc_exp2/B_mup/11k.yaml
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_name_or_path: /workspace/Qwen/Qwen3-8B
|
| 2 |
+
adapter_name_or_path: /workspace/v127rc_exp2/B_mup/checkpoint-11000
|
| 3 |
+
template: qwen3_nothink
|
| 4 |
+
infer_backend: huggingface
|
| 5 |
+
trust_remote_code: true
|
v127rc_exp2/B_mup/checkpoint-5800/chat_template.jinja
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{%- if tools %}
|
| 2 |
+
{{- '<|im_start|>system\n' }}
|
| 3 |
+
{%- if messages[0].role == 'system' %}
|
| 4 |
+
{{- messages[0].content + '\n\n' }}
|
| 5 |
+
{%- endif %}
|
| 6 |
+
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
| 7 |
+
{%- for tool in tools %}
|
| 8 |
+
{{- "\n" }}
|
| 9 |
+
{{- tool | tojson }}
|
| 10 |
+
{%- endfor %}
|
| 11 |
+
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
| 12 |
+
{%- else %}
|
| 13 |
+
{%- if messages[0].role == 'system' %}
|
| 14 |
+
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
| 15 |
+
{%- endif %}
|
| 16 |
+
{%- endif %}
|
| 17 |
+
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
| 18 |
+
{%- for message in messages[::-1] %}
|
| 19 |
+
{%- set index = (messages|length - 1) - loop.index0 %}
|
| 20 |
+
{%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
|
| 21 |
+
{%- set ns.multi_step_tool = false %}
|
| 22 |
+
{%- set ns.last_query_index = index %}
|
| 23 |
+
{%- endif %}
|
| 24 |
+
{%- endfor %}
|
| 25 |
+
{%- for message in messages %}
|
| 26 |
+
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
| 27 |
+
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
|
| 28 |
+
{%- elif message.role == "assistant" %}
|
| 29 |
+
{%- set content = message.content %}
|
| 30 |
+
{%- set reasoning_content = '' %}
|
| 31 |
+
{%- if message.reasoning_content is defined and message.reasoning_content is not none %}
|
| 32 |
+
{%- set reasoning_content = message.reasoning_content %}
|
| 33 |
+
{%- else %}
|
| 34 |
+
{%- if '</think>' in message.content %}
|
| 35 |
+
{%- set content = message.content.split('</think>')[-1].lstrip('\n') %}
|
| 36 |
+
{%- set reasoning_content = message.content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
| 37 |
+
{%- endif %}
|
| 38 |
+
{%- endif %}
|
| 39 |
+
{%- if loop.index0 > ns.last_query_index %}
|
| 40 |
+
{%- if loop.last or (not loop.last and reasoning_content) %}
|
| 41 |
+
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
|
| 42 |
+
{%- else %}
|
| 43 |
+
{{- '<|im_start|>' + message.role + '\n' + content }}
|
| 44 |
+
{%- endif %}
|
| 45 |
+
{%- else %}
|
| 46 |
+
{{- '<|im_start|>' + message.role + '\n' + content }}
|
| 47 |
+
{%- endif %}
|
| 48 |
+
{%- if message.tool_calls %}
|
| 49 |
+
{%- for tool_call in message.tool_calls %}
|
| 50 |
+
{%- if (loop.first and content) or (not loop.first) %}
|
| 51 |
+
{{- '\n' }}
|
| 52 |
+
{%- endif %}
|
| 53 |
+
{%- if tool_call.function %}
|
| 54 |
+
{%- set tool_call = tool_call.function %}
|
| 55 |
+
{%- endif %}
|
| 56 |
+
{{- '<tool_call>\n{"name": "' }}
|
| 57 |
+
{{- tool_call.name }}
|
| 58 |
+
{{- '", "arguments": ' }}
|
| 59 |
+
{%- if tool_call.arguments is string %}
|
| 60 |
+
{{- tool_call.arguments }}
|
| 61 |
+
{%- else %}
|
| 62 |
+
{{- tool_call.arguments | tojson }}
|
| 63 |
+
{%- endif %}
|
| 64 |
+
{{- '}\n</tool_call>' }}
|
| 65 |
+
{%- endfor %}
|
| 66 |
+
{%- endif %}
|
| 67 |
+
{{- '<|im_end|>\n' }}
|
| 68 |
+
{%- elif message.role == "tool" %}
|
| 69 |
+
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
| 70 |
+
{{- '<|im_start|>user' }}
|
| 71 |
+
{%- endif %}
|
| 72 |
+
{{- '\n<tool_response>\n' }}
|
| 73 |
+
{{- message.content }}
|
| 74 |
+
{{- '\n</tool_response>' }}
|
| 75 |
+
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
| 76 |
+
{{- '<|im_end|>\n' }}
|
| 77 |
+
{%- endif %}
|
| 78 |
+
{%- endif %}
|
| 79 |
+
{%- endfor %}
|
| 80 |
+
{%- if add_generation_prompt %}
|
| 81 |
+
{{- '<|im_start|>assistant\n' }}
|
| 82 |
+
{%- if enable_thinking is defined and enable_thinking is false %}
|
| 83 |
+
{{- '<think>\n\n</think>\n\n' }}
|
| 84 |
+
{%- endif %}
|
| 85 |
+
{%- endif %}
|
v127rc_exp2/B_mup/checkpoint-5800/tokenizer_config.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": false,
|
| 3 |
+
"backend": "tokenizers",
|
| 4 |
+
"bos_token": null,
|
| 5 |
+
"clean_up_tokenization_spaces": false,
|
| 6 |
+
"eos_token": "<|im_end|>",
|
| 7 |
+
"errors": "replace",
|
| 8 |
+
"extra_special_tokens": [
|
| 9 |
+
"<think>",
|
| 10 |
+
"</think>"
|
| 11 |
+
],
|
| 12 |
+
"is_local": true,
|
| 13 |
+
"model_max_length": 131072,
|
| 14 |
+
"pad_token": "<|endoftext|>",
|
| 15 |
+
"padding_side": "right",
|
| 16 |
+
"split_special_tokens": false,
|
| 17 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
| 18 |
+
"unk_token": null
|
| 19 |
+
}
|