WCNegentropy commited on
Commit
8601a92
·
verified ·
1 Parent(s): 8a102fa

Remove test_dataset_small.py - cleanup for OS launch

Browse files
Files changed (1) hide show
  1. test_dataset_small.py +0 -59
test_dataset_small.py DELETED
@@ -1,59 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Test script for BitTransformerLM dataset creation (small version)
4
- """
5
-
6
- import os
7
- import sys
8
- from pathlib import Path
9
- sys.path.insert(0, str(Path(__file__).parent))
10
-
11
- # Test the dataset builder with a small sample
12
- from bit_transformer.dataset_builder import BitTransformerDatasetBuilder
13
-
14
- def test_small_dataset():
15
- print("🧪 Testing BitTransformerLM Dataset Builder...")
16
-
17
- # Create builder with your token
18
- hf_token = "os.environ.get('HF_TOKEN', 'your-token-here')"
19
- repo_id = "BitTransformerLM"
20
-
21
- builder = BitTransformerDatasetBuilder(hf_token, repo_id)
22
-
23
- # Test text-to-bits generation
24
- print("📝 Testing text-to-bits conversion...")
25
- test_texts = [
26
- "Hello, world!",
27
- "The quick brown fox jumps over the lazy dog.",
28
- "Binary data processing with transformers.",
29
- "Information theory meets deep learning.",
30
- "Parity-protected bit sequences for safety."
31
- ]
32
-
33
- text_samples = builder.generate_text_to_bits_data(test_texts, max_len=128)
34
- print(f"✅ Generated {len(text_samples)} text-to-bits samples")
35
-
36
- # Test synthetic patterns
37
- print("🎨 Testing synthetic patterns...")
38
- synthetic_samples = builder.generate_synthetic_patterns(10, max_len=64)
39
- print(f"✅ Generated {len(synthetic_samples)} synthetic samples")
40
-
41
- # Test safety benchmarks
42
- print("🛡️ Testing safety benchmarks...")
43
- safety_samples = builder.generate_safety_benchmarks(8)
44
- print(f"✅ Generated {len(safety_samples)} safety samples")
45
-
46
- # Show sample structure
47
- print("\n📊 Sample Structure:")
48
- sample = text_samples[0]
49
- for key, value in sample.items():
50
- if key == "bit_sequence":
51
- print(f" {key}: [{len(value)} bits] {value[:10]}...")
52
- else:
53
- print(f" {key}: {value}")
54
-
55
- print("\n🎉 All tests passed! Dataset builder is working correctly.")
56
- return True
57
-
58
- if __name__ == "__main__":
59
- test_small_dataset()