Jakubrd4 commited on
Commit
9857299
·
verified ·
1 Parent(s): 769a472

Upload full_cloud_eval.sh with huggingface_hub

Browse files
Files changed (1) hide show
  1. full_cloud_eval.sh +70 -8
full_cloud_eval.sh CHANGED
@@ -5,14 +5,20 @@ HF_TOKEN="$1"
5
  if [ -z "$HF_TOKEN" ]; then echo "Usage: bash full_cloud_eval.sh <HF_TOKEN>"; exit 1; fi
6
  HF_REPO="Jakubrd4/bielik-quip-e8p12"
7
  LIMIT=200
 
8
 
9
  echo "========================================"
10
  echo " QuIP# Bielik Eval - FULL AUTO SETUP"
 
11
  echo "========================================"
12
  echo "Start: $(date)"
 
13
  echo ""
14
 
15
- echo "[1/7] Cloning QuIP#..."
 
 
 
16
  cd $WORKDIR
17
  if [ -d quip-sharp ]; then
18
  echo " Already exists, skipping clone"
@@ -21,7 +27,10 @@ else
21
  fi
22
  cd quip-sharp
23
 
24
- echo "[2/7] Applying patches..."
 
 
 
25
  sed -i 's/from \.lm_eval_adaptor import.*/# disabled for lm-eval 0.4.x/' lib/utils/__init__.py
26
  echo " __init__.py patched"
27
 
@@ -38,6 +47,10 @@ if "model_type == 'mistral'" not in code:
38
  old = " else:\n raise Exception"
39
  new = " elif model_type == 'mistral':\n model_str = transformers.MistralConfig.from_pretrained(path)._name_or_path\n model_cls = MistralForCausalLM\n else:\n raise Exception"
40
  code = code.replace(old, new)
 
 
 
 
41
  with open(path, 'w') as f:
42
  f.write(code)
43
  print(' unsafe_import.py patched for Mistral')
@@ -64,15 +77,39 @@ else:
64
  print(' llama.py: patch not needed or already applied')
65
  PATCHPY2
66
 
67
- echo "[3/7] Compiling QuIP# CUDA kernels..."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  cd $WORKDIR/quip-sharp/quiptools
69
  pip install --no-build-isolation -e . 2>&1 | tail -5
70
  echo " quiptools installed"
71
  echo " Installing fast-hadamard-transform..."
72
- pip install --no-build-isolation fast-hadamard-transform 2>&1 | tail -3
 
 
 
73
  echo " fast-hadamard-transform installed"
74
 
75
- echo "[4/7] Installing lm-evaluation-harness (Polish fork)..."
 
 
 
76
  cd $WORKDIR
77
  if [ -d lm-evaluation-harness ]; then
78
  echo " Already exists, skipping clone"
@@ -83,7 +120,10 @@ cd lm-evaluation-harness
83
  pip install -e . 2>&1 | tail -5
84
  echo " lm-eval installed"
85
 
86
- echo "[5/7] Downloading model from HuggingFace..."
 
 
 
87
  python3 << DLPY
88
  from huggingface_hub import snapshot_download
89
  print(" Starting download...")
@@ -93,7 +133,29 @@ DLPY
93
  echo " Model files:"
94
  ls -lh $WORKDIR/model/
95
 
96
- echo "[6/7] Creating eval script..."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  cat > $WORKDIR/run_eval.py << 'PYEOF'
98
  import sys, os, json, time, torch, argparse
99
  sys.path.insert(0, "/workspace/quip-sharp")
@@ -197,7 +259,7 @@ print("Saved to %s" % fn)
197
  PYEOF
198
  echo " Eval script created"
199
 
200
- echo "[7/7] Running evaluation with limit=$LIMIT..."
201
  echo "========================================"
202
  cd $WORKDIR/quip-sharp
203
  python3 $WORKDIR/run_eval.py --limit $LIMIT
 
5
  if [ -z "$HF_TOKEN" ]; then echo "Usage: bash full_cloud_eval.sh <HF_TOKEN>"; exit 1; fi
6
  HF_REPO="Jakubrd4/bielik-quip-e8p12"
7
  LIMIT=200
8
+ export HF_DATASETS_TRUST_REMOTE_CODE=1
9
 
10
  echo "========================================"
11
  echo " QuIP# Bielik Eval - FULL AUTO SETUP"
12
+ echo " RTX 4090 / A100 / H100 (NOT Blackwell)"
13
  echo "========================================"
14
  echo "Start: $(date)"
15
+ echo "GPU: $(python3 -c 'import torch; print(torch.cuda.get_device_name(0))' 2>/dev/null || echo 'unknown')"
16
  echo ""
17
 
18
+ # ============================================
19
+ # 1. Clone QuIP#
20
+ # ============================================
21
+ echo "[1/8] Cloning QuIP#..."
22
  cd $WORKDIR
23
  if [ -d quip-sharp ]; then
24
  echo " Already exists, skipping clone"
 
27
  fi
28
  cd quip-sharp
29
 
30
+ # ============================================
31
+ # 2. Apply patches
32
+ # ============================================
33
+ echo "[2/8] Applying patches..."
34
  sed -i 's/from \.lm_eval_adaptor import.*/# disabled for lm-eval 0.4.x/' lib/utils/__init__.py
35
  echo " __init__.py patched"
36
 
 
47
  old = " else:\n raise Exception"
48
  new = " elif model_type == 'mistral':\n model_str = transformers.MistralConfig.from_pretrained(path)._name_or_path\n model_cls = MistralForCausalLM\n else:\n raise Exception"
49
  code = code.replace(old, new)
50
+
51
+ # Also force eager attention (QuIP# fused qkv_proj breaks sdpa)
52
+ code = code.replace("attn_implementation='sdpa'", "attn_implementation='eager'")
53
+
54
  with open(path, 'w') as f:
55
  f.write(code)
56
  print(' unsafe_import.py patched for Mistral')
 
77
  print(' llama.py: patch not needed or already applied')
78
  PATCHPY2
79
 
80
+ # Patch: add rope_theta default for Mistral config
81
+ sed -i 's/self.rope_theta = config.rope_theta/self.rope_theta = getattr(config, "rope_theta", 1000000.0)/' model/mistral.py 2>/dev/null || true
82
+ echo " rope_theta patched"
83
+
84
+ # ============================================
85
+ # 3. Fix Python dependencies
86
+ # ============================================
87
+ echo "[3/8] Fixing Python dependencies..."
88
+ pip install glog primefac protobuf 2>&1 | tail -3
89
+ pip install 'transformers==4.38.0' 2>&1 | tail -3
90
+ pip install 'datasets==2.20.0' 2>&1 | tail -3
91
+ # peft compatible with transformers 4.38
92
+ pip install 'peft==0.9.0' 2>&1 | tail -3
93
+ echo " Dependencies fixed"
94
+
95
+ # ============================================
96
+ # 4. Compile QuIP# CUDA kernels
97
+ # ============================================
98
+ echo "[4/8] Compiling QuIP# CUDA kernels..."
99
  cd $WORKDIR/quip-sharp/quiptools
100
  pip install --no-build-isolation -e . 2>&1 | tail -5
101
  echo " quiptools installed"
102
  echo " Installing fast-hadamard-transform..."
103
+ pip install --no-build-isolation fast-hadamard-transform 2>&1 | tail -3 || {
104
+ echo " PyPI install failed, trying from git..."
105
+ pip install --no-build-isolation git+https://github.com/Dao-AILab/fast-hadamard-transform.git 2>&1 | tail -3
106
+ }
107
  echo " fast-hadamard-transform installed"
108
 
109
+ # ============================================
110
+ # 5. Install lm-eval Polish fork
111
+ # ============================================
112
+ echo "[5/8] Installing lm-evaluation-harness (Polish fork)..."
113
  cd $WORKDIR
114
  if [ -d lm-evaluation-harness ]; then
115
  echo " Already exists, skipping clone"
 
120
  pip install -e . 2>&1 | tail -5
121
  echo " lm-eval installed"
122
 
123
+ # ============================================
124
+ # 6. Download model from HuggingFace
125
+ # ============================================
126
+ echo "[6/8] Downloading model from HuggingFace..."
127
  python3 << DLPY
128
  from huggingface_hub import snapshot_download
129
  print(" Starting download...")
 
133
  echo " Model files:"
134
  ls -lh $WORKDIR/model/
135
 
136
+ # ============================================
137
+ # 7. Add rope_theta to model config if missing
138
+ # ============================================
139
+ echo "[7/8] Checking model config..."
140
+ python3 << 'CFGPY'
141
+ import json
142
+ p = '/workspace/model/config.json'
143
+ c = json.load(open(p))
144
+ changed = False
145
+ if 'rope_theta' not in c:
146
+ c['rope_theta'] = 1000000.0
147
+ changed = True
148
+ if changed:
149
+ json.dump(c, open(p, 'w'), indent=2)
150
+ print(" Added rope_theta to config")
151
+ else:
152
+ print(" Config OK")
153
+ CFGPY
154
+
155
+ # ============================================
156
+ # 8. Create eval script and run
157
+ # ============================================
158
+ echo "[8/8] Creating eval script and running..."
159
  cat > $WORKDIR/run_eval.py << 'PYEOF'
160
  import sys, os, json, time, torch, argparse
161
  sys.path.insert(0, "/workspace/quip-sharp")
 
259
  PYEOF
260
  echo " Eval script created"
261
 
262
+ echo "Running evaluation with limit=$LIMIT..."
263
  echo "========================================"
264
  cd $WORKDIR/quip-sharp
265
  python3 $WORKDIR/run_eval.py --limit $LIMIT