Jellyfish042 commited on
Commit
3d88788
·
1 Parent(s): 7a459aa

update to g1d

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +7 -7
  3. precompute_example.py +4 -4
README.md CHANGED
@@ -36,7 +36,7 @@ Compare the byte-level prediction performance between models.
36
  | Model | Type | Parameters | Architecture |
37
  |-------|------|------------|--------------|
38
  | Qwen3-1.7B-Base | Transformer | 1.7B | Dense attention |
39
- | RWKV7-G1C-1.5B | RWKV | 1.5B | Linear attention |
40
 
41
  ## Technical Details
42
 
 
36
  | Model | Type | Parameters | Architecture |
37
  |-------|------|------------|--------------|
38
  | Qwen3-1.7B-Base | Transformer | 1.7B | Dense attention |
39
+ | RWKV7-G1D-1.5B | RWKV | 1.5B | Linear attention |
40
 
41
  ## Technical Details
42
 
app.py CHANGED
@@ -1,7 +1,7 @@
1
  """
2
  UncheatableEval Visualization - Hugging Face Space
3
 
4
- Compare byte-level prediction performance between Qwen3-1.7B-Base and RWKV7-G1C-1.5B.
5
  """
6
 
7
  import gc
@@ -19,8 +19,8 @@ IS_CPU = DEVICE == "cpu"
19
 
20
  # Model configuration
21
  QWEN_MODEL_ID = "Qwen/Qwen3-1.7B-Base"
22
- RWKV_MODEL_URL = "https://huggingface.co/BlinkDL/rwkv7-g1/resolve/main/rwkv7-g1c-1.5b-20260110-ctx8192.pth"
23
- RWKV_MODEL_FILENAME = "rwkv7-g1c-1.5b-20260110-ctx8192.pth"
24
 
25
  # Get the directory where this script is located
26
  SCRIPT_DIR = Path(__file__).parent.absolute()
@@ -85,7 +85,7 @@ def load_qwen_model():
85
 
86
 
87
  def load_rwkv7_model(model_path: str):
88
- """Load RWKV7-G1C-1.5B model."""
89
  os.environ["RWKV_JIT_ON"] = "1"
90
  os.environ["RWKV_V7_ON"] = "1"
91
 
@@ -172,7 +172,7 @@ def initialize_models():
172
  _qwen_model, _qwen_tokenizer = load_qwen_model()
173
 
174
  # Load RWKV7 model
175
- print("Loading RWKV7-G1C-1.5B...")
176
  _rwkv_model, _rwkv_tokenizer = load_rwkv7_model(_rwkv_model_path)
177
 
178
  # Initialize stats manager
@@ -267,7 +267,7 @@ def run_evaluation(text: str, progress=gr.Progress()):
267
  text=text,
268
  byte_losses_a=result_rwkv["byte_wise_losses"],
269
  byte_losses_b=result_qwen["byte_wise_losses"],
270
- model_a_name="RWKV7-G1C-1.5B",
271
  model_b_name="Qwen3-1.7B-Base",
272
  topk_predictions_a=result_rwkv["top5_predictions"],
273
  topk_predictions_b=result_qwen["top5_predictions"],
@@ -352,7 +352,7 @@ with gr.Blocks(
352
  """
353
  <div style="text-align: center; margin-bottom: 20px;">
354
  <h1 style="margin-bottom: 10px;">🔬 Compression-Lens: RWKV-7 vs Qwen3 Byte-Level Comparison</h1>
355
- <p style="margin-bottom: 15px; color: #666;">Compare the byte-level prediction performance between <strong>RWKV7-G1C-1.5B</strong> and <strong>Qwen3-1.7B-Base</strong>.</p>
356
  <div style="display: flex; justify-content: center; align-items: center; gap: 10px;">
357
  <a href="https://github.com/Jellyfish042/uncheatable_eval" target="_blank" style="text-decoration: none;">
358
  <img src="https://img.shields.io/badge/GitHub-Project-181717?logo=github" alt="GitHub Project">
 
1
  """
2
  UncheatableEval Visualization - Hugging Face Space
3
 
4
+ Compare byte-level prediction performance between Qwen3-1.7B-Base and RWKV7-G1D-1.5B.
5
  """
6
 
7
  import gc
 
19
 
20
  # Model configuration
21
  QWEN_MODEL_ID = "Qwen/Qwen3-1.7B-Base"
22
+ RWKV_MODEL_URL = "https://huggingface.co/BlinkDL/rwkv7-g1/resolve/main/rwkv7-g1d-1.5b-20260212-ctx8192.pth"
23
+ RWKV_MODEL_FILENAME = "rwkv7-g1d-1.5b-20260212-ctx8192.pth"
24
 
25
  # Get the directory where this script is located
26
  SCRIPT_DIR = Path(__file__).parent.absolute()
 
85
 
86
 
87
  def load_rwkv7_model(model_path: str):
88
+ """Load RWKV7-G1D-1.5B model."""
89
  os.environ["RWKV_JIT_ON"] = "1"
90
  os.environ["RWKV_V7_ON"] = "1"
91
 
 
172
  _qwen_model, _qwen_tokenizer = load_qwen_model()
173
 
174
  # Load RWKV7 model
175
+ print("Loading RWKV7-G1D-1.5B...")
176
  _rwkv_model, _rwkv_tokenizer = load_rwkv7_model(_rwkv_model_path)
177
 
178
  # Initialize stats manager
 
267
  text=text,
268
  byte_losses_a=result_rwkv["byte_wise_losses"],
269
  byte_losses_b=result_qwen["byte_wise_losses"],
270
+ model_a_name="RWKV7-G1D-1.5B",
271
  model_b_name="Qwen3-1.7B-Base",
272
  topk_predictions_a=result_rwkv["top5_predictions"],
273
  topk_predictions_b=result_qwen["top5_predictions"],
 
352
  """
353
  <div style="text-align: center; margin-bottom: 20px;">
354
  <h1 style="margin-bottom: 10px;">🔬 Compression-Lens: RWKV-7 vs Qwen3 Byte-Level Comparison</h1>
355
+ <p style="margin-bottom: 15px; color: #666;">Compare the byte-level prediction performance between <strong>RWKV7-G1D-1.5B</strong> and <strong>Qwen3-1.7B-Base</strong>.</p>
356
  <div style="display: flex; justify-content: center; align-items: center; gap: 10px;">
357
  <a href="https://github.com/Jellyfish042/uncheatable_eval" target="_blank" style="text-decoration: none;">
358
  <img src="https://img.shields.io/badge/GitHub-Project-181717?logo=github" alt="GitHub Project">
precompute_example.py CHANGED
@@ -23,7 +23,7 @@ PRECOMPUTED_DIR = SCRIPT_DIR / "precomputed"
23
 
24
  # Model configuration
25
  QWEN_MODEL_ID = "Qwen/Qwen3-1.7B-Base"
26
- RWKV_MODEL_FILENAME = "rwkv7-g1c-1.5b-20260110-ctx8192.pth"
27
 
28
  # Detect device
29
  # DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
@@ -69,7 +69,7 @@ def load_qwen_model():
69
 
70
 
71
  def load_rwkv7_model(model_path: str):
72
- """Load RWKV7-G1C-1.5B model."""
73
  os.environ["RWKV_JIT_ON"] = "1"
74
  os.environ["RWKV_V7_ON"] = "1"
75
 
@@ -116,7 +116,7 @@ def precompute_example():
116
  print("Loading Qwen3-1.7B-Base...")
117
  qwen_model, qwen_tokenizer = load_qwen_model()
118
 
119
- print("Loading RWKV7-G1C-1.5B...")
120
  rwkv_model, rwkv_tokenizer = load_rwkv7_model(rwkv_model_path)
121
 
122
  # Run evaluations
@@ -134,7 +134,7 @@ def precompute_example():
134
  text=example_text,
135
  byte_losses_a=result_rwkv["byte_wise_losses"],
136
  byte_losses_b=result_qwen["byte_wise_losses"],
137
- model_a_name="RWKV7-G1C-1.5B",
138
  model_b_name="Qwen3-1.7B-Base",
139
  topk_predictions_a=result_rwkv["top5_predictions"],
140
  topk_predictions_b=result_qwen["top5_predictions"],
 
23
 
24
  # Model configuration
25
  QWEN_MODEL_ID = "Qwen/Qwen3-1.7B-Base"
26
+ RWKV_MODEL_FILENAME = "rwkv7-g1d-1.5b-20260212-ctx8192.pth"
27
 
28
  # Detect device
29
  # DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 
69
 
70
 
71
  def load_rwkv7_model(model_path: str):
72
+ """Load RWKV7-G1D-1.5B model."""
73
  os.environ["RWKV_JIT_ON"] = "1"
74
  os.environ["RWKV_V7_ON"] = "1"
75
 
 
116
  print("Loading Qwen3-1.7B-Base...")
117
  qwen_model, qwen_tokenizer = load_qwen_model()
118
 
119
+ print("Loading RWKV7-G1D-1.5B...")
120
  rwkv_model, rwkv_tokenizer = load_rwkv7_model(rwkv_model_path)
121
 
122
  # Run evaluations
 
134
  text=example_text,
135
  byte_losses_a=result_rwkv["byte_wise_losses"],
136
  byte_losses_b=result_qwen["byte_wise_losses"],
137
+ model_a_name="RWKV7-G1D-1.5B",
138
  model_b_name="Qwen3-1.7B-Base",
139
  topk_predictions_a=result_rwkv["top5_predictions"],
140
  topk_predictions_b=result_qwen["top5_predictions"],