tvkain commited on
Commit
fed1832
·
verified ·
1 Parent(s): 75dc976

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +349 -0
  2. README.md +16 -0
  3. activation.py +101 -0
  4. activation_all.log +0 -0
  5. activation_all.py +192 -0
  6. activation_all.sh +16 -0
  7. activation_llama.py +101 -0
  8. activation_llama_all.py +146 -0
  9. activation_qwen.py +109 -0
  10. activation_qwen_all.py +126 -0
  11. activation_single.py +76 -0
  12. activation_single.sh +55 -0
  13. activations-old/README.md +1 -0
  14. activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-1200 +3 -0
  15. activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-1500 +3 -0
  16. activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-1800 +3 -0
  17. activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-2100 +3 -0
  18. activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-2400 +3 -0
  19. activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-2700 +3 -0
  20. activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-300 +3 -0
  21. activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-3000 +3 -0
  22. activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-3231 +3 -0
  23. activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-600 +3 -0
  24. activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-900 +3 -0
  25. activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-1200 +3 -0
  26. activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-1500 +3 -0
  27. activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-1800 +3 -0
  28. activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-2100 +3 -0
  29. activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-2400 +3 -0
  30. activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-2700 +3 -0
  31. activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-300 +3 -0
  32. activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-3000 +3 -0
  33. activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-3231 +3 -0
  34. activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-600 +3 -0
  35. activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-900 +3 -0
  36. activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-1200 +3 -0
  37. activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-1500 +3 -0
  38. activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-1800 +3 -0
  39. activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-2100 +3 -0
  40. activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-2400 +3 -0
  41. activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-2700 +3 -0
  42. activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-2972 +3 -0
  43. activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-300 +3 -0
  44. activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-600 +3 -0
  45. activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-900 +3 -0
  46. activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.eu.train.qwen-checkpoint-1200 +3 -0
  47. activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.eu.train.qwen-checkpoint-1500 +3 -0
  48. activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.eu.train.qwen-checkpoint-1800 +3 -0
  49. activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.eu.train.qwen-checkpoint-2100 +3 -0
  50. activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.eu.train.qwen-checkpoint-2400 +3 -0
.gitattributes CHANGED
@@ -33,3 +33,352 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ activations/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-1200 filter=lfs diff=lfs merge=lfs -text
37
+ activations/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-1500 filter=lfs diff=lfs merge=lfs -text
38
+ activations/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-1800 filter=lfs diff=lfs merge=lfs -text
39
+ activations/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-2100 filter=lfs diff=lfs merge=lfs -text
40
+ activations/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-2400 filter=lfs diff=lfs merge=lfs -text
41
+ activations/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-2700 filter=lfs diff=lfs merge=lfs -text
42
+ activations/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-300 filter=lfs diff=lfs merge=lfs -text
43
+ activations/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-3000 filter=lfs diff=lfs merge=lfs -text
44
+ activations/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-3231 filter=lfs diff=lfs merge=lfs -text
45
+ activations/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-600 filter=lfs diff=lfs merge=lfs -text
46
+ activations/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-900 filter=lfs diff=lfs merge=lfs -text
47
+ activations/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-1200 filter=lfs diff=lfs merge=lfs -text
48
+ activations/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-1500 filter=lfs diff=lfs merge=lfs -text
49
+ activations/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-1800 filter=lfs diff=lfs merge=lfs -text
50
+ activations/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-2100 filter=lfs diff=lfs merge=lfs -text
51
+ activations/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-2400 filter=lfs diff=lfs merge=lfs -text
52
+ activations/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-2700 filter=lfs diff=lfs merge=lfs -text
53
+ activations/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-300 filter=lfs diff=lfs merge=lfs -text
54
+ activations/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-3000 filter=lfs diff=lfs merge=lfs -text
55
+ activations/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-3231 filter=lfs diff=lfs merge=lfs -text
56
+ activations/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-600 filter=lfs diff=lfs merge=lfs -text
57
+ activations/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-900 filter=lfs diff=lfs merge=lfs -text
58
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-1200 filter=lfs diff=lfs merge=lfs -text
59
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-1500 filter=lfs diff=lfs merge=lfs -text
60
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-1800 filter=lfs diff=lfs merge=lfs -text
61
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-2100 filter=lfs diff=lfs merge=lfs -text
62
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-2400 filter=lfs diff=lfs merge=lfs -text
63
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-2700 filter=lfs diff=lfs merge=lfs -text
64
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-2972 filter=lfs diff=lfs merge=lfs -text
65
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-300 filter=lfs diff=lfs merge=lfs -text
66
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-600 filter=lfs diff=lfs merge=lfs -text
67
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-900 filter=lfs diff=lfs merge=lfs -text
68
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.eu.train.qwen-checkpoint-1200 filter=lfs diff=lfs merge=lfs -text
69
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.eu.train.qwen-checkpoint-1500 filter=lfs diff=lfs merge=lfs -text
70
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.eu.train.qwen-checkpoint-1800 filter=lfs diff=lfs merge=lfs -text
71
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.eu.train.qwen-checkpoint-2100 filter=lfs diff=lfs merge=lfs -text
72
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.eu.train.qwen-checkpoint-2400 filter=lfs diff=lfs merge=lfs -text
73
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.eu.train.qwen-checkpoint-2700 filter=lfs diff=lfs merge=lfs -text
74
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.eu.train.qwen-checkpoint-2972 filter=lfs diff=lfs merge=lfs -text
75
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.eu.train.qwen-checkpoint-300 filter=lfs diff=lfs merge=lfs -text
76
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.eu.train.qwen-checkpoint-600 filter=lfs diff=lfs merge=lfs -text
77
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.eu.train.qwen-checkpoint-900 filter=lfs diff=lfs merge=lfs -text
78
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.en.train.qwen-checkpoint-1200 filter=lfs diff=lfs merge=lfs -text
79
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.en.train.qwen-checkpoint-1500 filter=lfs diff=lfs merge=lfs -text
80
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.en.train.qwen-checkpoint-1800 filter=lfs diff=lfs merge=lfs -text
81
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.en.train.qwen-checkpoint-2100 filter=lfs diff=lfs merge=lfs -text
82
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.en.train.qwen-checkpoint-2400 filter=lfs diff=lfs merge=lfs -text
83
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.en.train.qwen-checkpoint-2700 filter=lfs diff=lfs merge=lfs -text
84
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.en.train.qwen-checkpoint-300 filter=lfs diff=lfs merge=lfs -text
85
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.en.train.qwen-checkpoint-3000 filter=lfs diff=lfs merge=lfs -text
86
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.en.train.qwen-checkpoint-3300 filter=lfs diff=lfs merge=lfs -text
87
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.en.train.qwen-checkpoint-3600 filter=lfs diff=lfs merge=lfs -text
88
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.en.train.qwen-checkpoint-3900 filter=lfs diff=lfs merge=lfs -text
89
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.en.train.qwen-checkpoint-4200 filter=lfs diff=lfs merge=lfs -text
90
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.en.train.qwen-checkpoint-4500 filter=lfs diff=lfs merge=lfs -text
91
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.en.train.qwen-checkpoint-4800 filter=lfs diff=lfs merge=lfs -text
92
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.en.train.qwen-checkpoint-4888 filter=lfs diff=lfs merge=lfs -text
93
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.en.train.qwen-checkpoint-600 filter=lfs diff=lfs merge=lfs -text
94
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.en.train.qwen-checkpoint-900 filter=lfs diff=lfs merge=lfs -text
95
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.ga.train.qwen-checkpoint-1200 filter=lfs diff=lfs merge=lfs -text
96
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.ga.train.qwen-checkpoint-1500 filter=lfs diff=lfs merge=lfs -text
97
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.ga.train.qwen-checkpoint-1800 filter=lfs diff=lfs merge=lfs -text
98
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.ga.train.qwen-checkpoint-2100 filter=lfs diff=lfs merge=lfs -text
99
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.ga.train.qwen-checkpoint-2400 filter=lfs diff=lfs merge=lfs -text
100
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.ga.train.qwen-checkpoint-2700 filter=lfs diff=lfs merge=lfs -text
101
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.ga.train.qwen-checkpoint-300 filter=lfs diff=lfs merge=lfs -text
102
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.ga.train.qwen-checkpoint-3000 filter=lfs diff=lfs merge=lfs -text
103
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.ga.train.qwen-checkpoint-3300 filter=lfs diff=lfs merge=lfs -text
104
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.ga.train.qwen-checkpoint-3600 filter=lfs diff=lfs merge=lfs -text
105
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.ga.train.qwen-checkpoint-3900 filter=lfs diff=lfs merge=lfs -text
106
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.ga.train.qwen-checkpoint-4200 filter=lfs diff=lfs merge=lfs -text
107
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.ga.train.qwen-checkpoint-4500 filter=lfs diff=lfs merge=lfs -text
108
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.ga.train.qwen-checkpoint-4800 filter=lfs diff=lfs merge=lfs -text
109
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.ga.train.qwen-checkpoint-4888 filter=lfs diff=lfs merge=lfs -text
110
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.ga.train.qwen-checkpoint-600 filter=lfs diff=lfs merge=lfs -text
111
+ activations/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.ga.train.qwen-checkpoint-900 filter=lfs diff=lfs merge=lfs -text
112
+ activations/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/activation.en.train.qwen-checkpoint-1200 filter=lfs diff=lfs merge=lfs -text
113
+ activations/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/activation.en.train.qwen-checkpoint-1500 filter=lfs diff=lfs merge=lfs -text
114
+ activations/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/activation.en.train.qwen-checkpoint-1800 filter=lfs diff=lfs merge=lfs -text
115
+ activations/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/activation.en.train.qwen-checkpoint-1962 filter=lfs diff=lfs merge=lfs -text
116
+ activations/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/activation.en.train.qwen-checkpoint-300 filter=lfs diff=lfs merge=lfs -text
117
+ activations/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/activation.en.train.qwen-checkpoint-600 filter=lfs diff=lfs merge=lfs -text
118
+ activations/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/activation.en.train.qwen-checkpoint-900 filter=lfs diff=lfs merge=lfs -text
119
+ activations/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/activation.zh.train.qwen-checkpoint-1200 filter=lfs diff=lfs merge=lfs -text
120
+ activations/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/activation.zh.train.qwen-checkpoint-1500 filter=lfs diff=lfs merge=lfs -text
121
+ activations/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/activation.zh.train.qwen-checkpoint-1800 filter=lfs diff=lfs merge=lfs -text
122
+ activations/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/activation.zh.train.qwen-checkpoint-1962 filter=lfs diff=lfs merge=lfs -text
123
+ activations/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/activation.zh.train.qwen-checkpoint-300 filter=lfs diff=lfs merge=lfs -text
124
+ activations/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/activation.zh.train.qwen-checkpoint-600 filter=lfs diff=lfs merge=lfs -text
125
+ activations/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/activation.zh.train.qwen-checkpoint-900 filter=lfs diff=lfs merge=lfs -text
126
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-1200 filter=lfs diff=lfs merge=lfs -text
127
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-1500 filter=lfs diff=lfs merge=lfs -text
128
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-1800 filter=lfs diff=lfs merge=lfs -text
129
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-2100 filter=lfs diff=lfs merge=lfs -text
130
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-2400 filter=lfs diff=lfs merge=lfs -text
131
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-2700 filter=lfs diff=lfs merge=lfs -text
132
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-300 filter=lfs diff=lfs merge=lfs -text
133
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-3000 filter=lfs diff=lfs merge=lfs -text
134
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-3300 filter=lfs diff=lfs merge=lfs -text
135
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-3600 filter=lfs diff=lfs merge=lfs -text
136
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-3900 filter=lfs diff=lfs merge=lfs -text
137
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-4200 filter=lfs diff=lfs merge=lfs -text
138
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-4500 filter=lfs diff=lfs merge=lfs -text
139
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-4800 filter=lfs diff=lfs merge=lfs -text
140
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-5100 filter=lfs diff=lfs merge=lfs -text
141
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-5400 filter=lfs diff=lfs merge=lfs -text
142
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-5700 filter=lfs diff=lfs merge=lfs -text
143
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-5784 filter=lfs diff=lfs merge=lfs -text
144
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-600 filter=lfs diff=lfs merge=lfs -text
145
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-900 filter=lfs diff=lfs merge=lfs -text
146
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-1200 filter=lfs diff=lfs merge=lfs -text
147
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-1500 filter=lfs diff=lfs merge=lfs -text
148
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-1800 filter=lfs diff=lfs merge=lfs -text
149
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-2100 filter=lfs diff=lfs merge=lfs -text
150
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-2400 filter=lfs diff=lfs merge=lfs -text
151
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-2700 filter=lfs diff=lfs merge=lfs -text
152
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-300 filter=lfs diff=lfs merge=lfs -text
153
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-3000 filter=lfs diff=lfs merge=lfs -text
154
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-3300 filter=lfs diff=lfs merge=lfs -text
155
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-3600 filter=lfs diff=lfs merge=lfs -text
156
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-3900 filter=lfs diff=lfs merge=lfs -text
157
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-4200 filter=lfs diff=lfs merge=lfs -text
158
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-4500 filter=lfs diff=lfs merge=lfs -text
159
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-4800 filter=lfs diff=lfs merge=lfs -text
160
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-5100 filter=lfs diff=lfs merge=lfs -text
161
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-5400 filter=lfs diff=lfs merge=lfs -text
162
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-5700 filter=lfs diff=lfs merge=lfs -text
163
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-5784 filter=lfs diff=lfs merge=lfs -text
164
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-600 filter=lfs diff=lfs merge=lfs -text
165
+ activations/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-900 filter=lfs diff=lfs merge=lfs -text
166
+ activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-1200 filter=lfs diff=lfs merge=lfs -text
167
+ activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-1500 filter=lfs diff=lfs merge=lfs -text
168
+ activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-1800 filter=lfs diff=lfs merge=lfs -text
169
+ activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-2100 filter=lfs diff=lfs merge=lfs -text
170
+ activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-2400 filter=lfs diff=lfs merge=lfs -text
171
+ activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-2700 filter=lfs diff=lfs merge=lfs -text
172
+ activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-300 filter=lfs diff=lfs merge=lfs -text
173
+ activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-3000 filter=lfs diff=lfs merge=lfs -text
174
+ activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-3231 filter=lfs diff=lfs merge=lfs -text
175
+ activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-600 filter=lfs diff=lfs merge=lfs -text
176
+ activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-900 filter=lfs diff=lfs merge=lfs -text
177
+ activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-1200 filter=lfs diff=lfs merge=lfs -text
178
+ activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-1500 filter=lfs diff=lfs merge=lfs -text
179
+ activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-1800 filter=lfs diff=lfs merge=lfs -text
180
+ activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-2100 filter=lfs diff=lfs merge=lfs -text
181
+ activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-2400 filter=lfs diff=lfs merge=lfs -text
182
+ activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-2700 filter=lfs diff=lfs merge=lfs -text
183
+ activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-300 filter=lfs diff=lfs merge=lfs -text
184
+ activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-3000 filter=lfs diff=lfs merge=lfs -text
185
+ activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-3231 filter=lfs diff=lfs merge=lfs -text
186
+ activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-600 filter=lfs diff=lfs merge=lfs -text
187
+ activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-900 filter=lfs diff=lfs merge=lfs -text
188
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-1200 filter=lfs diff=lfs merge=lfs -text
189
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-1500 filter=lfs diff=lfs merge=lfs -text
190
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-1800 filter=lfs diff=lfs merge=lfs -text
191
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-2100 filter=lfs diff=lfs merge=lfs -text
192
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-2400 filter=lfs diff=lfs merge=lfs -text
193
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-2700 filter=lfs diff=lfs merge=lfs -text
194
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-2972 filter=lfs diff=lfs merge=lfs -text
195
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-300 filter=lfs diff=lfs merge=lfs -text
196
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-600 filter=lfs diff=lfs merge=lfs -text
197
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-900 filter=lfs diff=lfs merge=lfs -text
198
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.eu.train.qwen-checkpoint-1200 filter=lfs diff=lfs merge=lfs -text
199
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.eu.train.qwen-checkpoint-1500 filter=lfs diff=lfs merge=lfs -text
200
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.eu.train.qwen-checkpoint-1800 filter=lfs diff=lfs merge=lfs -text
201
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.eu.train.qwen-checkpoint-2100 filter=lfs diff=lfs merge=lfs -text
202
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.eu.train.qwen-checkpoint-2400 filter=lfs diff=lfs merge=lfs -text
203
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.eu.train.qwen-checkpoint-2700 filter=lfs diff=lfs merge=lfs -text
204
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.eu.train.qwen-checkpoint-2972 filter=lfs diff=lfs merge=lfs -text
205
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.eu.train.qwen-checkpoint-300 filter=lfs diff=lfs merge=lfs -text
206
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.eu.train.qwen-checkpoint-600 filter=lfs diff=lfs merge=lfs -text
207
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.eu.train.qwen-checkpoint-900 filter=lfs diff=lfs merge=lfs -text
208
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.en.train.qwen-checkpoint-1200 filter=lfs diff=lfs merge=lfs -text
209
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.en.train.qwen-checkpoint-1500 filter=lfs diff=lfs merge=lfs -text
210
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.en.train.qwen-checkpoint-1800 filter=lfs diff=lfs merge=lfs -text
211
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.en.train.qwen-checkpoint-2100 filter=lfs diff=lfs merge=lfs -text
212
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.en.train.qwen-checkpoint-2400 filter=lfs diff=lfs merge=lfs -text
213
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.en.train.qwen-checkpoint-2700 filter=lfs diff=lfs merge=lfs -text
214
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.en.train.qwen-checkpoint-300 filter=lfs diff=lfs merge=lfs -text
215
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.en.train.qwen-checkpoint-3000 filter=lfs diff=lfs merge=lfs -text
216
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.en.train.qwen-checkpoint-3300 filter=lfs diff=lfs merge=lfs -text
217
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.en.train.qwen-checkpoint-3600 filter=lfs diff=lfs merge=lfs -text
218
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.en.train.qwen-checkpoint-3900 filter=lfs diff=lfs merge=lfs -text
219
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.en.train.qwen-checkpoint-4200 filter=lfs diff=lfs merge=lfs -text
220
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.en.train.qwen-checkpoint-4500 filter=lfs diff=lfs merge=lfs -text
221
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.en.train.qwen-checkpoint-4800 filter=lfs diff=lfs merge=lfs -text
222
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.en.train.qwen-checkpoint-4888 filter=lfs diff=lfs merge=lfs -text
223
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.en.train.qwen-checkpoint-600 filter=lfs diff=lfs merge=lfs -text
224
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.en.train.qwen-checkpoint-900 filter=lfs diff=lfs merge=lfs -text
225
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.ga.train.qwen-checkpoint-1200 filter=lfs diff=lfs merge=lfs -text
226
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.ga.train.qwen-checkpoint-1500 filter=lfs diff=lfs merge=lfs -text
227
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.ga.train.qwen-checkpoint-1800 filter=lfs diff=lfs merge=lfs -text
228
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.ga.train.qwen-checkpoint-2100 filter=lfs diff=lfs merge=lfs -text
229
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.ga.train.qwen-checkpoint-2400 filter=lfs diff=lfs merge=lfs -text
230
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.ga.train.qwen-checkpoint-2700 filter=lfs diff=lfs merge=lfs -text
231
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.ga.train.qwen-checkpoint-300 filter=lfs diff=lfs merge=lfs -text
232
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.ga.train.qwen-checkpoint-3000 filter=lfs diff=lfs merge=lfs -text
233
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.ga.train.qwen-checkpoint-3300 filter=lfs diff=lfs merge=lfs -text
234
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.ga.train.qwen-checkpoint-3600 filter=lfs diff=lfs merge=lfs -text
235
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.ga.train.qwen-checkpoint-3900 filter=lfs diff=lfs merge=lfs -text
236
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.ga.train.qwen-checkpoint-4200 filter=lfs diff=lfs merge=lfs -text
237
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.ga.train.qwen-checkpoint-4500 filter=lfs diff=lfs merge=lfs -text
238
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.ga.train.qwen-checkpoint-4800 filter=lfs diff=lfs merge=lfs -text
239
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.ga.train.qwen-checkpoint-4888 filter=lfs diff=lfs merge=lfs -text
240
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.ga.train.qwen-checkpoint-600 filter=lfs diff=lfs merge=lfs -text
241
+ activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/activation.ga.train.qwen-checkpoint-900 filter=lfs diff=lfs merge=lfs -text
242
+ activations-old/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/activation.en.train.qwen-checkpoint-1200 filter=lfs diff=lfs merge=lfs -text
243
+ activations-old/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/activation.en.train.qwen-checkpoint-1500 filter=lfs diff=lfs merge=lfs -text
244
+ activations-old/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/activation.en.train.qwen-checkpoint-1800 filter=lfs diff=lfs merge=lfs -text
245
+ activations-old/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/activation.en.train.qwen-checkpoint-1962 filter=lfs diff=lfs merge=lfs -text
246
+ activations-old/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/activation.en.train.qwen-checkpoint-300 filter=lfs diff=lfs merge=lfs -text
247
+ activations-old/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/activation.en.train.qwen-checkpoint-600 filter=lfs diff=lfs merge=lfs -text
248
+ activations-old/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/activation.en.train.qwen-checkpoint-900 filter=lfs diff=lfs merge=lfs -text
249
+ activations-old/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/activation.zh.train.qwen-checkpoint-1200 filter=lfs diff=lfs merge=lfs -text
250
+ activations-old/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/activation.zh.train.qwen-checkpoint-1500 filter=lfs diff=lfs merge=lfs -text
251
+ activations-old/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/activation.zh.train.qwen-checkpoint-1800 filter=lfs diff=lfs merge=lfs -text
252
+ activations-old/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/activation.zh.train.qwen-checkpoint-1962 filter=lfs diff=lfs merge=lfs -text
253
+ activations-old/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/activation.zh.train.qwen-checkpoint-300 filter=lfs diff=lfs merge=lfs -text
254
+ activations-old/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/activation.zh.train.qwen-checkpoint-600 filter=lfs diff=lfs merge=lfs -text
255
+ activations-old/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/activation.zh.train.qwen-checkpoint-900 filter=lfs diff=lfs merge=lfs -text
256
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-1200 filter=lfs diff=lfs merge=lfs -text
257
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-1500 filter=lfs diff=lfs merge=lfs -text
258
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-1800 filter=lfs diff=lfs merge=lfs -text
259
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-2100 filter=lfs diff=lfs merge=lfs -text
260
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-2400 filter=lfs diff=lfs merge=lfs -text
261
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-2700 filter=lfs diff=lfs merge=lfs -text
262
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-300 filter=lfs diff=lfs merge=lfs -text
263
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-3000 filter=lfs diff=lfs merge=lfs -text
264
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-3300 filter=lfs diff=lfs merge=lfs -text
265
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-3600 filter=lfs diff=lfs merge=lfs -text
266
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-3900 filter=lfs diff=lfs merge=lfs -text
267
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-4200 filter=lfs diff=lfs merge=lfs -text
268
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-4500 filter=lfs diff=lfs merge=lfs -text
269
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-4800 filter=lfs diff=lfs merge=lfs -text
270
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-5100 filter=lfs diff=lfs merge=lfs -text
271
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-5400 filter=lfs diff=lfs merge=lfs -text
272
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-5700 filter=lfs diff=lfs merge=lfs -text
273
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-5784 filter=lfs diff=lfs merge=lfs -text
274
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-600 filter=lfs diff=lfs merge=lfs -text
275
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.en.train.qwen-checkpoint-900 filter=lfs diff=lfs merge=lfs -text
276
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-1200 filter=lfs diff=lfs merge=lfs -text
277
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-1500 filter=lfs diff=lfs merge=lfs -text
278
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-1800 filter=lfs diff=lfs merge=lfs -text
279
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-2100 filter=lfs diff=lfs merge=lfs -text
280
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-2400 filter=lfs diff=lfs merge=lfs -text
281
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-2700 filter=lfs diff=lfs merge=lfs -text
282
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-300 filter=lfs diff=lfs merge=lfs -text
283
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-3000 filter=lfs diff=lfs merge=lfs -text
284
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-3300 filter=lfs diff=lfs merge=lfs -text
285
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-3600 filter=lfs diff=lfs merge=lfs -text
286
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-3900 filter=lfs diff=lfs merge=lfs -text
287
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-4200 filter=lfs diff=lfs merge=lfs -text
288
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-4500 filter=lfs diff=lfs merge=lfs -text
289
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-4800 filter=lfs diff=lfs merge=lfs -text
290
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-5100 filter=lfs diff=lfs merge=lfs -text
291
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-5400 filter=lfs diff=lfs merge=lfs -text
292
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-5700 filter=lfs diff=lfs merge=lfs -text
293
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-5784 filter=lfs diff=lfs merge=lfs -text
294
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-600 filter=lfs diff=lfs merge=lfs -text
295
+ activations-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/activation.ga.train.qwen-checkpoint-900 filter=lfs diff=lfs merge=lfs -text
296
+ activations-old/test/activation.en.train.qwen-checkpoint-300 filter=lfs diff=lfs merge=lfs -text
297
+ activations-old/test/activation.ga.train.qwen-checkpoint-300 filter=lfs diff=lfs merge=lfs -text
298
+ job_scripts/logs/1483-activation-multiple-all.out filter=lfs diff=lfs merge=lfs -text
299
+ job_scripts/logs/1484-activation-multiple-all.out filter=lfs diff=lfs merge=lfs -text
300
+ new_activations_pt_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
301
+ old_ids/qwen2.5-0.5/id.en.train.qwen2.5-0.5 filter=lfs diff=lfs merge=lfs -text
302
+ old_ids/qwen2.5-0.5/id.eu.train.qwen2.5-0.5 filter=lfs diff=lfs merge=lfs -text
303
+ old_ids/qwen2.5-0.5/id.ga.train.qwen2.5-0.5 filter=lfs diff=lfs merge=lfs -text
304
+ old_ids/qwen2.5-0.5/id.zh.train.qwen2.5-0.5 filter=lfs diff=lfs merge=lfs -text
305
+ oscar_ids/l2-13b/id.en.train.l2-13b filter=lfs diff=lfs merge=lfs -text
306
+ oscar_ids/l2-13b/id.ga.train.l2-13b filter=lfs diff=lfs merge=lfs -text
307
+ oscar_ids/l2-7b/id.en.train.l2-7b filter=lfs diff=lfs merge=lfs -text
308
+ oscar_ids/l2-7b/id.eu.train.l2-7b filter=lfs diff=lfs merge=lfs -text
309
+ oscar_ids/q2.5/id.en.train.q2.5 filter=lfs diff=lfs merge=lfs -text
310
+ oscar_ids/q2.5/id.eu.train.q2.5 filter=lfs diff=lfs merge=lfs -text
311
+ oscar_ids/q2.5/id.ga.train.q2.5 filter=lfs diff=lfs merge=lfs -text
312
+ oscar_ids/q2.5/id.zh.train.q2.5 filter=lfs diff=lfs merge=lfs -text
313
+ qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus_qwen-checkpoint-1200_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
314
+ qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus_qwen-checkpoint-1500_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
315
+ qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus_qwen-checkpoint-1800_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
316
+ qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus_qwen-checkpoint-1962_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
317
+ qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus_qwen-checkpoint-300_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
318
+ qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus_qwen-checkpoint-600_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
319
+ qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus_qwen-checkpoint-900_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
320
+ results-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus_qwen-checkpoint-1200_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
321
+ results-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus_qwen-checkpoint-1500_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
322
+ results-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus_qwen-checkpoint-1800_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
323
+ results-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus_qwen-checkpoint-2100_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
324
+ results-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus_qwen-checkpoint-2400_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
325
+ results-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus_qwen-checkpoint-2700_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
326
+ results-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus_qwen-checkpoint-3000_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
327
+ results-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus_qwen-checkpoint-300_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
328
+ results-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus_qwen-checkpoint-3231_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
329
+ results-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus_qwen-checkpoint-600_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
330
+ results-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus_qwen-checkpoint-900_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
331
+ results-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus_qwen-checkpoint-1200_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
332
+ results-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus_qwen-checkpoint-1500_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
333
+ results-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus_qwen-checkpoint-1800_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
334
+ results-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus_qwen-checkpoint-2100_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
335
+ results-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus_qwen-checkpoint-2400_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
336
+ results-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus_qwen-checkpoint-2700_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
337
+ results-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus_qwen-checkpoint-2972_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
338
+ results-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus_qwen-checkpoint-300_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
339
+ results-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus_qwen-checkpoint-600_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
340
+ results-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus_qwen-checkpoint-900_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
341
+ results-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus_qwen-checkpoint-1200_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
342
+ results-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus_qwen-checkpoint-1500_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
343
+ results-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus_qwen-checkpoint-1800_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
344
+ results-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus_qwen-checkpoint-2100_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
345
+ results-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus_qwen-checkpoint-2400_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
346
+ results-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus_qwen-checkpoint-2700_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
347
+ results-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus_qwen-checkpoint-3000_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
348
+ results-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus_qwen-checkpoint-300_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
349
+ results-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus_qwen-checkpoint-3300_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
350
+ results-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus_qwen-checkpoint-3600_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
351
+ results-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus_qwen-checkpoint-3900_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
352
+ results-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus_qwen-checkpoint-4200_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
353
+ results-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus_qwen-checkpoint-4500_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
354
+ results-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus_qwen-checkpoint-4800_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
355
+ results-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus_qwen-checkpoint-4888_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
356
+ results-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus_qwen-checkpoint-600_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
357
+ results-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus/qwen2.5-0.5b_english_wiki_300M_en_1.5Birish_corpus_qwen-checkpoint-900_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
358
+ results-old/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus_qwen-checkpoint-1200_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
359
+ results-old/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus_qwen-checkpoint-1500_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
360
+ results-old/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus_qwen-checkpoint-1800_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
361
+ results-old/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus_qwen-checkpoint-1962_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
362
+ results-old/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus_qwen-checkpoint-300_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
363
+ results-old/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus_qwen-checkpoint-600_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
364
+ results-old/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus_qwen-checkpoint-900_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
365
+ results-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler_qwen-checkpoint-1200_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
366
+ results-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler_qwen-checkpoint-1500_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
367
+ results-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler_qwen-checkpoint-1800_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
368
+ results-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler_qwen-checkpoint-2100_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
369
+ results-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler_qwen-checkpoint-2400_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
370
+ results-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler_qwen-checkpoint-2700_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
371
+ results-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler_qwen-checkpoint-3000_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
372
+ results-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler_qwen-checkpoint-300_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
373
+ results-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler_qwen-checkpoint-3300_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
374
+ results-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler_qwen-checkpoint-3600_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
375
+ results-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler_qwen-checkpoint-3900_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
376
+ results-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler_qwen-checkpoint-4200_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
377
+ results-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler_qwen-checkpoint-4500_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
378
+ results-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler_qwen-checkpoint-4800_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
379
+ results-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler_qwen-checkpoint-5100_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
380
+ results-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler_qwen-checkpoint-5400_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
381
+ results-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler_qwen-checkpoint-5700_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
382
+ results-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler_qwen-checkpoint-5784_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
383
+ results-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler_qwen-checkpoint-600_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
384
+ results-old/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler/qwen2.5-0.5b_english_wiki_irish_corpus_custom_sampler_qwen-checkpoint-900_neurons_bar.png filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Language Specific Neuron SLA
2
+
3
+ This is done specifically for the Qwen2.5 family of models
4
+
5
+ ## Guide
6
+
7
+ 1. Run `load_data.py` to fetch data from https://huggingface.co/datasets/wikimedia/wikipedia/viewer/20231101
8
+ 2. Calculate the activation from the fetched data with `activation.py`
9
+ 3. Identify language specific neurons with `identify.py`
10
+
11
+ ## Ref
12
+ - https://github.com/ReML-AI/DCL-CoT
13
+ - https://github.com/RUCAIBox/Language-Specific-Neurons
14
+
15
+ ## Note taking
16
+ python3 load_data_oscar.py --languages en,zh,eu,ga --model-id qwen2.5 --tokenizer Qwen/Qwen2.5-0.5B
activation.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Track per-neuron activations in Qwen2 MLP layers using Hugging Face Transformers
4
+ with explicit device management.
5
+ """
6
+
7
+ import argparse
8
+ import os
9
+ from types import MethodType
10
+
11
+ import torch
12
+ from torch import Tensor
13
+ from tqdm import tqdm
14
+ from transformers import AutoModelForCausalLM
15
+
16
+ # ---------------------- Activation Tracker ----------------------
17
+ class ActivationTracker:
18
+ def __init__(self, num_layers: int, intermediate_size: int):
19
+ # store on CPU to avoid memory issues
20
+ self.over_zero = torch.zeros(
21
+ num_layers, intermediate_size, dtype=torch.int32, device="cpu"
22
+ )
23
+
24
+ def make_qwen_hook(self, index: int):
25
+ over_zero = self.over_zero
26
+
27
+ def qwen_forward(self, x: Tensor):
28
+ gate_activation = self.act_fn(self.gate_proj(x))
29
+ with torch.no_grad():
30
+ over_zero[index, :] += (gate_activation > 0).sum(dim=(0, 1)).to("cpu")
31
+ return self.down_proj(gate_activation * self.up_proj(x))
32
+
33
+ return qwen_forward
34
+
35
+ # ---------------------- Arguments ----------------------
36
+ parser = argparse.ArgumentParser()
37
+ parser.add_argument("--model", type=str, required=True, help="HF model ID or local folder path")
38
+ parser.add_argument("--lang", type=str, required=True, help="Language code for dataset")
39
+ parser.add_argument("--data-path", type=str, required=True, help="Path to tokenized dataset (torch tensor)")
40
+ parser.add_argument("--output-dir", type=str, default="activations", help="Directory to save over_zero")
41
+ parser.add_argument("--batch-size", type=int, default=1, help="Batch size per device")
42
+ parser.add_argument("--chunk-size", type=int, default=4096, help="Max sequence length to process at once")
43
+ args = parser.parse_args()
44
+
45
+ # ---------------------- Setup Device ----------------------
46
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
47
+ print(f"Using device: {device}")
48
+ os.makedirs(args.output_dir, exist_ok=True)
49
+
50
+ # ---------------------- Load Data ----------------------
51
+ print("Loading data...")
52
+ ids = torch.load(args.data_path, map_location="cpu") # Load to CPU first
53
+
54
+ # ---------------------- Load Model ----------------------
55
+ print(f"Loading model: {args.model}")
56
+ model = AutoModelForCausalLM.from_pretrained(
57
+ args.model,
58
+ device_map="auto", # Let it automatically distribute across available GPUs
59
+ torch_dtype=torch.bfloat16 # reduce memory
60
+ )
61
+ model.eval()
62
+
63
+ num_layers = model.config.num_hidden_layers
64
+ intermediate_size = model.config.intermediate_size
65
+ max_len = model.config.max_position_embeddings
66
+
67
+ # Setup tracker
68
+ tracker = ActivationTracker(num_layers=num_layers, intermediate_size=intermediate_size)
69
+
70
+ # Monkey-patch MLP layers
71
+ for i, layer in enumerate(model.model.layers):
72
+ layer.mlp.forward = MethodType(tracker.make_qwen_hook(i), layer.mlp)
73
+
74
+ # Prepare input - use chunk_size instead of max_len for memory efficiency
75
+ chunk_size = min(args.chunk_size, max_len)
76
+ n = (ids.size(0) // chunk_size) * chunk_size
77
+ input_ids = ids[:n].reshape(-1, chunk_size)
78
+
79
+ print(f"Processing {input_ids.size(0)} sequences of length {chunk_size}")
80
+
81
+ # ---------------------- Run Inference ----------------------
82
+ with torch.no_grad():
83
+ for i in tqdm(range(0, input_ids.size(0), args.batch_size), desc="Processing", unit="batch"):
84
+ batch = input_ids[i:i + args.batch_size]
85
+
86
+ # Move batch to the same device as the model's first parameter
87
+ # This works with device_map="auto"
88
+ batch = batch.to(next(model.parameters()).device)
89
+
90
+ # Clear cache before each batch to prevent memory buildup
91
+ if torch.cuda.is_available():
92
+ torch.cuda.empty_cache()
93
+
94
+ model(input_ids=batch)
95
+
96
+ # ---------------------- Save activations ----------------------
97
+ model_name = os.path.basename(args.model.rstrip("/"))
98
+ out_path = os.path.join(args.output_dir, f"activation_{model_name}_{args.lang}.pt")
99
+ torch.save(tracker.over_zero, out_path)
100
+ print(f"Saved activation counts to {out_path}")
101
+ print("Activation single job done")
activation_all.log ADDED
The diff for this file is too large to render. See raw diff
 
activation_all.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Per-neuron activation tracker for LLaMA-2 and Qwen MLP layers.
4
+ Runs on a fixed set of models and multiple input ID files per model.
5
+ """
6
+
7
+ import torch
8
+ import os
9
+ from types import MethodType
10
+ from vllm import LLM, SamplingParams # Keep original import since hook logic depends on vLLM
11
+
12
+ # ---------------------- Config ----------------------
13
+ BASE_PATH = "/home/khanh/sla/sla_cpt"
14
+ ID_BASE_PATH = "./oscar_ids"
15
+
16
+ RUN_CONFIGS = [
17
+ # {
18
+ # 'name': 'l2-13b',
19
+ # 'model': f'{BASE_PATH}/uccix/checkpoint-4280',
20
+ # 'ids_list': [
21
+ # {"path": './ids/l2-13b/id.ga.train.l2-13b', "lang": "ga"},
22
+ # {"path": './ids/l2-13b/id.en.train.l2-13b', "lang": "en"}
23
+ # ],
24
+ # 'type': 'llama'
25
+ # },
26
+ # {
27
+ # 'name': 'l2-7b',
28
+ # 'model': f'{BASE_PATH}/llama2_7b_full_basque_corpus_grad_clip_1/checkpoint-10200',
29
+ # 'ids_list': [
30
+ # {"path": './ids/l2-7b/id.eu.train.l2-7b', "lang": "eu"},
31
+ # {"path": './ids/l2-7b/id.en.train.l2-7b', "lang": "en"}
32
+ # ],
33
+ # 'type': 'llama'
34
+ # },
35
+ {
36
+ 'name': 'q2.5-zh',
37
+ 'model': f'{BASE_PATH}/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus_2e_240925/checkpoint-2944',
38
+ 'ids_list': [
39
+ {"path": f'{ID_BASE_PATH}/q2.5/id.zh.train.qwen2.5-0.5', "lang": "zh"},
40
+ {"path": f'{ID_BASE_PATH}/q2.5/id.en.train.qwen2.5-0.5', "lang": "en"}
41
+ ],
42
+ 'type': 'qwen'
43
+ },
44
+ # {
45
+ # 'name': 'q2.5-en+zh',
46
+ # 'model': f'{BASE_PATH}/qwen2.5-0.5b_english_wiki_150M_en_750M_chinese_wikipedia_corpus_2e_240925/checkpoint-3494',
47
+ # 'ids_list': [
48
+ # {"path": '{ID_BASE_PATH}/q2.5/id.zh.train.qwen2.5-0.5', "lang": "zh"},
49
+ # {"path": '{ID_BASE_PATH}/q2.5/id.en.train.qwen2.5-0.5', "lang": "en"}
50
+ # ],
51
+ # 'type': 'qwen'
52
+ # },
53
+ # {
54
+ # 'name': 'q2.5-ga',
55
+ # 'model': f'{BASE_PATH}/qwen2.5-0.5b_english_wiki_1.5B_irish_corpus_240925/checkpoint-2854',
56
+ # 'ids_list': [
57
+ # {"path": '{ID_BASE_PATH}/q2.5/id.en.train.qwen2.5-0.5', "lang": "en"},
58
+ # {"path": '{ID_BASE_PATH}/q2.5/id.ga.train.qwen2.5-0.5', "lang": "ga"}
59
+ # ],
60
+ # 'type': 'qwen'
61
+ # },
62
+ # # {
63
+ # # 'name': 'q2.5-en+ga',
64
+ # # 'model': f'{BASE_PATH}/qwen2.5-0.5_full_english_corpus_grad_clip_1/checkpoint-3231',
65
+ # # 'ids_list': [
66
+ # # {"path": './ids/qwen2.5-0.5/id.en.train.qwen2.5-0.5', "lang": "en"},
67
+ # # {"path": './ids/qwen2.5-0.5/id.ga.train.qwen2.5-0.5', "lang": "ga"}
68
+ # # ],
69
+ # # 'type': 'qwen'
70
+ # # },
71
+ # {
72
+ # 'name': 'q2.5-eu',
73
+ # 'model': f'{BASE_PATH}/qwen2.5-0.5b_english_wiki_1.5Bbasque_corpus_240925/checkpoint-2424',
74
+ # 'ids_list': [
75
+ # {"path": '{ID_BASE_PATH}/q2.5/id.eu.train.qwen2.5-0.5', "lang": "eu"},
76
+ # {"path": '{ID_BASE_PATH}/q2.5/id.en.train.qwen2.5-0.5', "lang": "en"}
77
+ # ],
78
+ # 'type': 'qwen'
79
+ # },
80
+ # {
81
+ # 'name': 'q2.5-en+eu',
82
+ # 'model': f'{BASE_PATH}/qwen2.5-0.5_full_basque_corpus_grad_clip_1/checkpoint-7800',
83
+ # 'ids_list': [
84
+ # {"path": './ids/qwen2.5-0.5/id.eu.train.qwen2.5-0.5', "lang": "eu"},
85
+ # {"path": './ids/qwen2.5-0.5/id.en.train.qwen2.5-0.5', "lang": "en"}
86
+ # ],
87
+ # }
88
+ ]
89
+
90
+ SAVE_FOLDER = "new_activations"
91
+ os.makedirs(SAVE_FOLDER, exist_ok=True)
92
+
93
+ # ---------------------- Hook Functions ----------------------
94
+ def make_llama_hook(idx):
95
+ def llama_forward(self, x):
96
+ gate_up, _ = self.gate_up_proj(x) # l, 2i
97
+ i = gate_up.size(-1)
98
+ gate_up[:, : i // 2] = torch.nn.SiLU()(gate_up[:, : i // 2])
99
+ activation = gate_up[:, : i // 2].float() # l, i
100
+ over_zero[idx, :] += (activation > 0).sum(dim=0)
101
+ x = gate_up[:, : i // 2] * gate_up[:, i // 2 :]
102
+ x, _ = self.down_proj(x)
103
+ return x
104
+ return llama_forward
105
+
106
+ def make_qwen_hook(idx):
107
+ def qwen_forward(self, x):
108
+ gate_up, _ = self.gate_up_proj(x) # (s, 2h)
109
+ intermediate_size = gate_up.size(-1) // 2
110
+ gate = gate_up[..., :intermediate_size] # (s, h)
111
+ up = gate_up[..., intermediate_size:] # (s, h)
112
+ gate_activation = torch.nn.functional.silu(gate)
113
+ over_zero[idx, :] += (gate_activation > 0).sum(dim=0)
114
+ x, _ = self.down_proj(gate_activation * up)
115
+ return x
116
+ return qwen_forward
117
+
118
+ # ---------------------- Run All Configs ----------------------
119
+ for config in RUN_CONFIGS:
120
+ model_name = config['model']
121
+ save_name = config.get('name', model_name)
122
+ model_type = config.get('type', 'llama')
123
+ ids_list = config.get('ids_list', [])
124
+
125
+ print(f"\n=== Processing model: {model_name}, type: {model_type} ===")
126
+
127
+ # Load model
128
+ model = LLM(
129
+ model=model_name,
130
+ tensor_parallel_size=1,
131
+ enforce_eager=True,
132
+ trust_remote_code=True
133
+ )
134
+
135
+ max_length = model.llm_engine.model_config.max_model_len
136
+ num_layers = model.llm_engine.model_config.hf_config.num_hidden_layers
137
+ intermediate_size = model.llm_engine.model_config.hf_config.intermediate_size
138
+
139
+ print(f"Layers: {num_layers}, Intermediate size: {intermediate_size}, Max length: {max_length}")
140
+
141
+ # Setup activation tracker
142
+ over_zero = torch.zeros(num_layers, intermediate_size, dtype=torch.int32).to('cuda')
143
+
144
+ # Hook MLP layers
145
+ for i in range(num_layers):
146
+ mlp = model.llm_engine.model_executor.driver_worker.model_runner.model.model.layers[i].mlp
147
+ if model_type == 'llama':
148
+ mlp.forward = MethodType(make_llama_hook(i), mlp)
149
+ elif model_type == 'qwen':
150
+ mlp.forward = MethodType(make_qwen_hook(i), mlp)
151
+ else:
152
+ raise ValueError(f"Unknown model type: {model_type}")
153
+
154
+ # Iterate over all ID files
155
+ for id_dict in ids_list:
156
+ ids_path = id_dict['path']
157
+ lang = id_dict.get('lang', 'unknown') # Use lang in dict for output filename
158
+
159
+ print(f"\nLoading IDs from {ids_path} (lang: {lang})...")
160
+ ids = torch.load(ids_path)
161
+ print(f"ID shape: {ids.shape}")
162
+
163
+ l = ids.size(0)
164
+ l = min(l, 99999744) // max_length * max_length
165
+ input_ids = ids[:l].reshape(-1, max_length)
166
+ print(f"Processing {input_ids.size(0)} sequences of length {max_length}")
167
+
168
+ # Run inference
169
+ print("Running inference...")
170
+ _ = model.generate(
171
+ prompt_token_ids=input_ids.tolist(),
172
+ sampling_params=SamplingParams(max_tokens=1)
173
+ )
174
+
175
+ # Save results for this ID file
176
+ output_path = os.path.join(SAVE_FOLDER, f'activation.{lang}.train.{save_name}.pt')
177
+ torch.save({
178
+ 'n': l,
179
+ 'over_zero': over_zero.cpu(),
180
+ 'num_layers': num_layers,
181
+ 'intermediate_size': intermediate_size
182
+ }, output_path)
183
+
184
+ print(f"Saved activation counts to {output_path}")
185
+ print(f"Processed {l} tokens total")
186
+
187
+ print(f"\nActivation analysis complete for model: {save_name}!")
188
+
189
+ del model
190
+ torch.cuda.empty_cache()
191
+ import gc
192
+ gc.collect()
activation_all.sh ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH --job-name=activation_all # Job name
3
+ #SBATCH --output=activation_all.log # Standard output log
4
+ #SBATCH --error=activation_all.log # Standard error log
5
+ #SBATCH --ntasks=1 # Number of tasks (processes)
6
+ #SBATCH --cpus-per-task=32 # Number of CPU cores
7
+ #SBATCH --mem=256G # Memory
8
+ #SBATCH --gres=gpu:1 # Number of GPUs
9
+ #SBATCH --partition=physical-gpu # Partition/queue name
10
+
11
+ # Your command here
12
+
13
+ export VLLM_USE_V1=0
14
+ export PYTHON_UNBUFFERED=1
15
+
16
+ srun python3 activation_all.py
activation_llama.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Simple per-neuron activation tracker for LLaMA-2 MLP layers.
4
+ Runs on a fixed set of models and input IDs.
5
+ """
6
+
7
+ import torch
8
+ import os
9
+ from types import MethodType
10
+ from vllm import LLM, SamplingParams # Keep your original import since hook logic depends on vLLM
11
+
12
+ # ---------------------- Config ----------------------
13
+ RUN_CONFIGS = [
14
+ {
15
+ 'model': 'qwen2.5-0.5', # replace with LLaMA2 HF model if needed
16
+ 'ids_path': '../ids/qwen2.5-0.5/id.en.train.qwen2.5-0.5',
17
+ 'lang': 'en'
18
+ },
19
+ {
20
+ 'model': 'qwen2.5-0.5',
21
+ 'ids_path': '../ids/qwen2.5-0.5/id.de.train.qwen2.5-0.5',
22
+ 'lang': 'de'
23
+ },
24
+ # Add more entries here
25
+ ]
26
+
27
+ SAVE_FOLDER = "data"
28
+ os.makedirs(SAVE_FOLDER, exist_ok=True)
29
+
30
+ # ---------------------- Original Hook Function ----------------------
31
+ def make_llama_hook(idx):
32
+ def llama_forward(self, x):
33
+ gate_up, _ = self.gate_up_proj(x) # b, l, 2i
34
+ i = gate_up.size(-1)
35
+ gate_up[:, :, : i // 2] = torch.nn.SiLU()(gate_up[:, :, : i // 2])
36
+ activation = gate_up[:, :, : i // 2].float() # b, l, i
37
+ over_zero[idx, :] += (activation > 0).sum(dim=(0,1))
38
+ x = gate_up[:, :, : i // 2] * gate_up[:, :, i // 2 :]
39
+ x, _ = self.down_proj(x)
40
+ return x
41
+ return llama_forward
42
+
43
+ # ---------------------- Run All Configs ----------------------
44
+ for config in RUN_CONFIGS:
45
+ model_name = config['model']
46
+ lang = config['lang']
47
+ ids_path = config['ids_path']
48
+
49
+ print(f"\n=== Processing model: {model_name}, lang: {lang} ===")
50
+
51
+ # Load model
52
+ model = LLM(
53
+ model=model_name,
54
+ tensor_parallel_size=1,
55
+ enforce_eager=True,
56
+ trust_remote_code=True
57
+ )
58
+
59
+ max_length = model.llm_engine.model_config.max_model_len
60
+ num_layers = model.llm_engine.model_config.hf_config.num_hidden_layers
61
+ intermediate_size = model.llm_engine.model_config.hf_config.intermediate_size
62
+
63
+ print(f"Layers: {num_layers}, Intermediate size: {intermediate_size}, Max length: {max_length}")
64
+
65
+ # Setup activation tracker
66
+ over_zero = torch.zeros(num_layers, intermediate_size, dtype=torch.int32).to('cuda')
67
+
68
+ # Hook MLP layers
69
+ for i in range(num_layers):
70
+ mlp = model.llm_engine.model_executor.driver_worker.model_runner.model.model.layers[i].mlp
71
+ mlp.forward = MethodType(make_llama_hook(i), mlp)
72
+
73
+ # Load input IDs
74
+ print("Loading IDs...")
75
+ ids = torch.load(ids_path)
76
+ print(f"ID shape: {ids.shape}")
77
+
78
+ l = ids.size(0)
79
+ l = min(l, 99999744) // max_length * max_length
80
+ input_ids = ids[:l].reshape(-1, max_length)
81
+ print(f"Processing {input_ids.size(0)} sequences of length {max_length}")
82
+
83
+ # Run inference
84
+ print("Running inference...")
85
+ _ = model.generate(
86
+ prompt_token_ids=input_ids.tolist(),
87
+ sampling_params=SamplingParams(max_tokens=1)
88
+ )
89
+
90
+ # Save results
91
+ output_path = os.path.join(SAVE_FOLDER, f'activation.{lang}.train.{model_name.split("/")[-1]}')
92
+ torch.save({
93
+ 'n': l,
94
+ 'over_zero': over_zero.cpu(),
95
+ 'num_layers': num_layers,
96
+ 'intermediate_size': intermediate_size
97
+ }, output_path)
98
+
99
+ print(f"Saved activation counts to {output_path}")
100
+ print(f"Processed {l} tokens total")
101
+ print("Activation analysis complete!")
activation_llama_all.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Per-neuron activation tracker for LLaMA-2 and Qwen MLP layers.
4
+ Runs on a fixed set of models and input IDs.
5
+ """
6
+
7
+ import torch
8
+ import os
9
+ from types import MethodType
10
+ from vllm import LLM, SamplingParams # Keep original import since hook logic depends on vLLM
11
+
12
+ # ---------------------- Config ----------------------
13
+ BASE_PATH = "/home/khanh/sla/sla_cpt"
14
+
15
+ RUN_CONFIGS = [
16
+ {
17
+ 'name': 'l2-7b-eu',
18
+ 'model': f'{BASE_PATH}/llama2_7b_full_basque_corpus_grad_clip_1/checkpoint-10200',
19
+ 'ids_path': './ids/l2-7b/id.eu.train.l2-7b',
20
+ 'lang': 'eu',
21
+ 'type': 'llama'
22
+ },
23
+ {
24
+ 'name': 'l2-13b-ga',
25
+ 'model': f'{BASE_PATH}/llama2_13b_full_irish_corpus_grad_clip_1/checkpoint-4280',
26
+ 'ids_path': '.ids/l2-13b/id.ga.train.l2-13b',
27
+ 'lang': 'en',
28
+ 'type': 'llama'
29
+ },
30
+ {
31
+ 'name': 'q2.5-zh',
32
+ 'model': f'{BASE_PATH}/qwen2.5-0.5_full_chinese_corpus_grad_clip_1/checkpoint-7800',
33
+ 'ids_path': './ids/qwen2.5-0.5/id.zh.train.qwen2.5-0.5',
34
+ 'lang': 'zh',
35
+ 'type': 'qwen'
36
+ },
37
+ {
38
+ 'name': 'q2.5-ga',
39
+ 'model': f'{BASE_PATH}/qwen2.5-0.5_full_english_corpus_grad_clip_1/checkpoint-3231',
40
+ 'ids_path': './ids/qwen2.5-0.5/id.en.train.qwen2.5-0.5',
41
+ 'lang': 'ga',
42
+ 'type': 'qwen'
43
+ },
44
+ {
45
+ 'name': 'q2.5-en+ga',
46
+ 'model': f'{BASE_PATH}/qwen2.5-0.5_full_english_corpus_grad_clip_1/checkpoint-3231',
47
+ 'ids_path': './ids/qwen2.5-0.5/id.en+ga.train.qwen2.5-0.5',
48
+ 'lang': 'ga',
49
+ 'type': 'qwen'
50
+ }
51
+ ]
52
+
53
+ SAVE_FOLDER = "new_activations"
54
+ os.makedirs(SAVE_FOLDER, exist_ok=True)
55
+
56
+ # ---------------------- Hook Functions ----------------------
57
+ def make_llama_hook(idx):
58
+ def llama_forward(self, x):
59
+ gate_up, _ = self.gate_up_proj(x) # l, 2i
60
+ i = gate_up.size(-1)
61
+ gate_up[:, : i // 2] = torch.nn.SiLU()(gate_up[:, : i // 2])
62
+ activation = gate_up[:, : i // 2].float() # l, i
63
+ over_zero[idx, :] += (activation > 0).sum(dim=(0))
64
+ x = gate_up[:, : i // 2] * gate_up[:, i // 2 :]
65
+ x, _ = self.down_proj(x)
66
+ return x
67
+ return llama_forward
68
+
69
+ def make_qwen_hook(idx):
70
+ def qwen_forward(self, x):
71
+ gate_up, _ = self.gate_up_proj(x) # (s, 2h)
72
+ intermediate_size = gate_up.size(-1) // 2
73
+ gate = gate_up[..., :intermediate_size] # (s, h)
74
+ up = gate_up[..., intermediate_size:] # (s, h)
75
+ gate_activation = torch.nn.functional.silu(gate)
76
+ over_zero[idx, :] += (gate_activation > 0).sum(dim=(0))
77
+ x, _ = self.down_proj(gate_activation * up)
78
+ return x
79
+ return qwen_forward
80
+
81
+ # ---------------------- Run All Configs ----------------------
82
+ for config in RUN_CONFIGS:
83
+ model_name = config['model']
84
+ lang = config['lang']
85
+ ids_path = config['ids_path']
86
+ save_name = config.get('name', model_name)
87
+ model_type = config.get('type', 'llama') # default to 'llama'
88
+
89
+ print(f"\n=== Processing model: {model_name}, lang: {lang}, type: {model_type} ===")
90
+
91
+ # Load model
92
+ model = LLM(
93
+ model=model_name,
94
+ tensor_parallel_size=1,
95
+ enforce_eager=True,
96
+ trust_remote_code=True
97
+ )
98
+
99
+ max_length = model.llm_engine.model_config.max_model_len
100
+ num_layers = model.llm_engine.model_config.hf_config.num_hidden_layers
101
+ intermediate_size = model.llm_engine.model_config.hf_config.intermediate_size
102
+
103
+ print(f"Layers: {num_layers}, Intermediate size: {intermediate_size}, Max length: {max_length}")
104
+
105
+ # Setup activation tracker
106
+ over_zero = torch.zeros(num_layers, intermediate_size, dtype=torch.int32).to('cuda')
107
+
108
+ # Hook MLP layers
109
+ for i in range(num_layers):
110
+ mlp = model.llm_engine.model_executor.driver_worker.model_runner.model.model.layers[i].mlp
111
+ if model_type == 'llama':
112
+ mlp.forward = MethodType(make_llama_hook(i), mlp)
113
+ elif model_type == 'qwen':
114
+ mlp.forward = MethodType(make_qwen_hook(i), mlp)
115
+ else:
116
+ raise ValueError(f"Unknown model type: {model_type}")
117
+
118
+ # Load input IDs
119
+ print("Loading IDs...")
120
+ ids = torch.load(ids_path)
121
+ print(f"ID shape: {ids.shape}")
122
+
123
+ l = ids.size(0)
124
+ l = min(l, 99999744) // max_length * max_length
125
+ input_ids = ids[:l].reshape(-1, max_length)
126
+ print(f"Processing {input_ids.size(0)} sequences of length {max_length}")
127
+
128
+ # Run inference
129
+ print("Running inference...")
130
+ _ = model.generate(
131
+ prompt_token_ids=input_ids.tolist(),
132
+ sampling_params=SamplingParams(max_tokens=1)
133
+ )
134
+
135
+ # Save results
136
+ output_path = os.path.join(SAVE_FOLDER, f'activation.{lang}.train.{save_name}.pt')
137
+ torch.save({
138
+ 'n': l,
139
+ 'over_zero': over_zero.cpu(),
140
+ 'num_layers': num_layers,
141
+ 'intermediate_size': intermediate_size
142
+ }, output_path)
143
+
144
+ print(f"Saved activation counts to {output_path}")
145
+ print(f"Processed {l} tokens total")
146
+ print("Activation analysis complete!")
activation_qwen.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Simple per-neuron activation tracker for Qwen2 MLP layers using vLLM.
4
+ """
5
+
6
+ import argparse
7
+ from types import MethodType
8
+ import torch
9
+ import os
10
+ from vllm import LLM, SamplingParams
11
+
12
+ # ---------------------- Arguments ----------------------
13
+ parser = argparse.ArgumentParser()
14
+ parser.add_argument("-m", "--model", type=str, required=True, help="HF model ID or local folder path")
15
+ parser.add_argument("-l", "--lang", type=str, required=True, help="Language code for dataset")
16
+ parser.add_argument("--save-folder", type=str, default="data", help="Folder to save activation results")
17
+ args = parser.parse_args()
18
+
19
+ # ---------------------- Ensure Save Folder Exists ----------------------
20
+ os.makedirs(args.save_folder, exist_ok=True)
21
+
22
+ # ---------------------- Setup Model ----------------------
23
+ model = LLM(
24
+ model=args.model,
25
+ tensor_parallel_size=1,
26
+ enforce_eager=True,
27
+ trust_remote_code=True
28
+ )
29
+
30
+ max_length = model.llm_engine.model_config.max_model_len
31
+ num_layers = model.llm_engine.model_config.hf_config.num_hidden_layers
32
+ intermediate_size = model.llm_engine.model_config.hf_config.intermediate_size
33
+
34
+ print(f"Model: {args.model}")
35
+ print(f"Layers: {num_layers}, Intermediate size: {intermediate_size}, Max length: {max_length}")
36
+
37
+ # ---------------------- Setup Tracker ----------------------
38
+ over_zero = torch.zeros(num_layers, intermediate_size, dtype=torch.int32).to('cuda')
39
+
40
+ def make_qwen_hook(idx):
41
+ def qwen_forward(self, x):
42
+ """
43
+ x: (s, d)
44
+ s: number of tokens
45
+ d: number of features for each tokens
46
+ """
47
+
48
+ # Qwen2 uses fused gate_up_proj that returns both gate and up in one tensor
49
+ # (s, 2h)
50
+ gate_up, _ = self.gate_up_proj(x)
51
+
52
+ # Split the concatenated gate and up projections
53
+ intermediate_size = gate_up.size(-1) // 2
54
+ # (s, h)
55
+ gate = gate_up[..., :intermediate_size]
56
+ # (s, h)
57
+ up = gate_up[..., intermediate_size:]
58
+
59
+ # Apply activation function to gate
60
+ # (s, h)
61
+ gate_activation = torch.nn.functional.silu(gate)
62
+
63
+ # Track activations > 0
64
+ over_zero[idx, :] += (gate_activation > 0).sum(dim=(0)) # (h)
65
+
66
+ # Complete forward pass: gate * up -> down projection
67
+ x, _ = self.down_proj(gate_activation * up)
68
+ return x
69
+
70
+ return qwen_forward
71
+
72
+ # ---------------------- Hook MLP Layers ----------------------
73
+ print("Setting up activation hooks...")
74
+ for i in range(num_layers):
75
+ mlp = model.llm_engine.model_executor.driver_worker.model_runner.model.model.layers[i].mlp
76
+ mlp.forward = MethodType(make_qwen_hook(i), mlp)
77
+
78
+ # ---------------------- Load Data ----------------------
79
+ print("Loading ids...")
80
+ ids = torch.load(f'../ids/qwen2.5-0.5/id.{args.lang}.train.qwen2.5-0.5')
81
+ print(f"ID shape {ids.shape}")
82
+
83
+ # Process sequences
84
+ l = ids.size(0)
85
+ l = min(l, 99999744) // max_length * max_length
86
+ input_ids = ids[:l].reshape(-1, max_length)
87
+ print(f"Processing {input_ids.size(0)} sequences of length {max_length}")
88
+
89
+ # ---------------------- Run Inference ----------------------
90
+ print("Running inference...")
91
+ output = model.generate(
92
+ prompt_token_ids=input_ids.tolist(),
93
+ sampling_params=SamplingParams(max_tokens=1)
94
+ )
95
+
96
+ # ---------------------- Save Results ----------------------
97
+ result = {
98
+ 'n': l,
99
+ 'over_zero': over_zero.to('cpu'),
100
+ 'num_layers': num_layers,
101
+ 'intermediate_size': intermediate_size
102
+ }
103
+
104
+ output_path = os.path.join(args.save_folder, f'activation.{args.lang}.train.qwen-{args.model.split("/")[-1]}')
105
+ torch.save(result, output_path)
106
+
107
+ print(f"Saved activation counts to {output_path}")
108
+ print(f"Processed {l} tokens total")
109
+ print("Activation analysis complete!")
activation_qwen_all.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Simple per-neuron activation tracker for LLaMA-2 MLP layers.
4
+ Runs on a fixed set of models and input IDs.
5
+ """
6
+
7
+ import torch
8
+ import os
9
+ from types import MethodType
10
+ from vllm import LLM, SamplingParams # Keep your original import since hook logic depends on vLLM
11
+
12
+ # ---------------------- Config ----------------------
13
+
14
+ BASE_PATH = "/home/khanh/sla/sla_cpt"
15
+
16
+ RUN_CONFIGS = [
17
+ {
18
+ 'name': 'q2.5-zh', # custom name for output file
19
+ 'model': f'{BASE_PATH}/llama2_7b_full_basque_corpus_grad_clip_1/checkpoint-10200', # replace with LLaMA2 HF model if needed
20
+ 'ids_path': './ids/l2-7b/id.eu.train.l2-7b',
21
+ 'lang': 'eu'
22
+ },
23
+ {
24
+ 'name': 'q2.5-ga',
25
+ 'model': f'{BASE_PATH}/llama2_13b_full_irish_corpus_grad_clip_1/checkpoint-4280',
26
+ 'ids_path': '.ids/l2-13b/id.ga.train.l2-13b',
27
+ 'lang': 'en'
28
+ }
29
+ ]
30
+
31
+ SAVE_FOLDER = "new_activations"
32
+ os.makedirs(SAVE_FOLDER, exist_ok=True)
33
+
34
+ # ---------------------- Original Hook Function ----------------------
35
+ def make_qwen_hook(idx):
36
+ def qwen_forward(self, x):
37
+ """
38
+ x: (s, d)
39
+ s: number of tokens
40
+ d: number of features for each tokens
41
+ """
42
+
43
+ # Qwen2 uses fused gate_up_proj that returns both gate and up in one tensor
44
+ # (s, 2h)
45
+ gate_up, _ = self.gate_up_proj(x)
46
+
47
+ # Split the concatenated gate and up projections
48
+ intermediate_size = gate_up.size(-1) // 2
49
+ # (s, h)
50
+ gate = gate_up[..., :intermediate_size]
51
+ # (s, h)
52
+ up = gate_up[..., intermediate_size:]
53
+
54
+ # Apply activation function to gate
55
+ # (s, h)
56
+ gate_activation = torch.nn.functional.silu(gate)
57
+
58
+ # Track activations > 0
59
+ over_zero[idx, :] += (gate_activation > 0).sum(dim=(0)) # (h)
60
+
61
+ # Complete forward pass: gate * up -> down projection
62
+ x, _ = self.down_proj(gate_activation * up)
63
+ return x
64
+
65
+ return qwen_forward
66
+
67
+ # ---------------------- Run All Configs ----------------------
68
+ for config in RUN_CONFIGS:
69
+ model_name = config['model']
70
+ lang = config['lang']
71
+ ids_path = config['ids_path']
72
+ save_name = config.get('name', model_name) # use 'name' key if present, otherwise fallback to model_name
73
+
74
+ print(f"\n=== Processing model: {model_name}, lang: {lang} ===")
75
+
76
+ # Load model
77
+ model = LLM(
78
+ model=model_name,
79
+ tensor_parallel_size=1,
80
+ enforce_eager=True,
81
+ trust_remote_code=True
82
+ )
83
+
84
+ max_length = model.llm_engine.model_config.max_model_len
85
+ num_layers = model.llm_engine.model_config.hf_config.num_hidden_layers
86
+ intermediate_size = model.llm_engine.model_config.hf_config.intermediate_size
87
+
88
+ print(f"Layers: {num_layers}, Intermediate size: {intermediate_size}, Max length: {max_length}")
89
+
90
+ # Setup activation tracker
91
+ over_zero = torch.zeros(num_layers, intermediate_size, dtype=torch.int32).to('cuda')
92
+
93
+ # Hook MLP layers
94
+ for i in range(num_layers):
95
+ mlp = model.llm_engine.model_executor.driver_worker.model_runner.model.model.layers[i].mlp
96
+ mlp.forward = MethodType(make_qwen_hook(i), mlp)
97
+
98
+ # Load input IDs
99
+ print("Loading IDs...")
100
+ ids = torch.load(ids_path)
101
+ print(f"ID shape: {ids.shape}")
102
+
103
+ l = ids.size(0)
104
+ l = min(l, 99999744) // max_length * max_length
105
+ input_ids = ids[:l].reshape(-1, max_length)
106
+ print(f"Processing {input_ids.size(0)} sequences of length {max_length}")
107
+
108
+ # Run inference
109
+ print("Running inference...")
110
+ _ = model.generate(
111
+ prompt_token_ids=input_ids.tolist(),
112
+ sampling_params=SamplingParams(max_tokens=1)
113
+ )
114
+
115
+ # Save results using the 'name' key
116
+ output_path = os.path.join(SAVE_FOLDER, f'activation.{lang}.train.{save_name}.pt')
117
+ torch.save({
118
+ 'n': l,
119
+ 'over_zero': over_zero.cpu(),
120
+ 'num_layers': num_layers,
121
+ 'intermediate_size': intermediate_size
122
+ }, output_path)
123
+
124
+ print(f"Saved activation counts to {output_path}")
125
+ print(f"Processed {l} tokens total")
126
+ print("Activation analysis complete!")
activation_single.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ from types import MethodType
4
+
5
+ import torch
6
+ from vllm import LLM, SamplingParams
7
+
8
+
9
+ parser = argparse.ArgumentParser()
10
+ parser.add_argument("-m", "--model", type=str, default="meta-llama/Llama-2-7b-hf")
11
+ parser.add_argument("-i", "--id_path", type=str, required=True)
12
+ parser.add_argument("-t", "--type", type=str, default="llama")
13
+ parser.add_argument("-s", "--save_folder", type=str, required=True)
14
+ parser.add_argument("-n", "--name", type=str, required=True)
15
+ args = parser.parse_args()
16
+
17
+ model = LLM(model=args.model, tensor_parallel_size=torch.cuda.device_count(), enforce_eager=True)
18
+
19
+ max_length = model.llm_engine.model_config.max_model_len
20
+ num_layers = model.llm_engine.model_config.hf_config.num_hidden_layers
21
+ intermediate_size = model.llm_engine.model_config.hf_config.intermediate_size
22
+
23
+ over_zero = torch.zeros(num_layers, intermediate_size, dtype=torch.int32).to('cuda')
24
+
25
+ def extract_lang(id_path):
26
+ parts = id_path.split('/')
27
+ id = parts[-1]
28
+
29
+ lang = id.split('.')[1]
30
+ return lang
31
+
32
+ def factory(idx):
33
+ def llama_forward(self, x):
34
+ gate_up, _ = self.gate_up_proj(x) # l, 2i
35
+ i = gate_up.size(-1)
36
+ gate_up[:, : i // 2] = torch.nn.SiLU()(gate_up[:, : i // 2])
37
+ activation = gate_up[:, : i // 2].float() # l, i
38
+ over_zero[idx, :] += (activation > 0).sum(dim=0)
39
+ x = gate_up[:, : i // 2] * gate_up[:, i // 2 :]
40
+ x, _ = self.down_proj(x)
41
+ return x
42
+
43
+ def qwen_forward(self, x):
44
+ gate_up, _ = self.gate_up_proj(x) # (s, 2h)
45
+ intermediate_size = gate_up.size(-1) // 2
46
+ gate = gate_up[..., :intermediate_size] # (s, h)
47
+ up = gate_up[..., intermediate_size:] # (s, h)
48
+ gate_activation = torch.nn.functional.silu(gate)
49
+ over_zero[idx, :] += (gate_activation > 0).sum(dim=0)
50
+ x, _ = self.down_proj(gate_activation * up)
51
+ return x
52
+
53
+ if args.type == 'llama':
54
+ return llama_forward
55
+ else:
56
+ return qwen_forward
57
+
58
+ for i in range(num_layers):
59
+ obj = model.llm_engine.driver_worker.model_runner.model.model.layers[i].mlp
60
+ obj.forward = MethodType(factory(i), obj)
61
+
62
+ lang = args.lang
63
+
64
+ ids = torch.load(args.id_path)
65
+
66
+ l = ids.size(0)
67
+ l = min(l, 99999744) // max_length * max_length
68
+ input_ids = ids[:l].reshape(-1, max_length)
69
+
70
+ output = model.generate(prompt_token_ids=input_ids.tolist(), sampling_params=SamplingParams(max_tokens=1))
71
+
72
+ output = dict(n=l, over_zero=over_zero.to('cpu'))
73
+
74
+ save_path = os.path.join(args.save_folder, f"activation.{lang}.{args.name}.pt")
75
+
76
+ torch.save(output, save_path)
activation_single.sh ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH --job-name=activation_single # Job name
3
+ #SBATCH --output=activation_single.log # Standard output log
4
+ #SBATCH --error=activation_single.log # Standard error log
5
+ #SBATCH --ntasks=1 # Number of tasks (processes)
6
+ #SBATCH --cpus-per-task=32 # Number of CPU cores
7
+ #SBATCH --mem=256G # Memory
8
+ #SBATCH --gres=gpu:1 # Number of GPUs
9
+ #SBATCH --partition=physical-gpu # Partition/queue name
10
+
11
+ #!/usr/bin/env bash
12
+
13
+ BASE_PATH="/home/khanh/sla/sla_cpt"
14
+ ID_BASE_PATH="./oscar_ids"
15
+ ACTIVATION_BASE_PATH="./new_new_activations"
16
+
17
+ # --- Run Configs (clean style) ---
18
+ # run_l2_13b=(
19
+ # name="l2-13b"
20
+ # model="$BASE_PATH/uccix/checkpoint-4280"
21
+ # type="llama"
22
+ # ids=(
23
+ # "$ID_BASE_PATH/l2-13b/id.ga.train.l2-13b:ga"
24
+ # "$ID_BASE_PATH/l2-13b/id.en.train.l2-13b:en"
25
+ # )
26
+ # )
27
+
28
+ run_q25_zh=(
29
+ name="q2.5-zh"
30
+ model="$BASE_PATH/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus_2e_240925/checkpoint-2944"
31
+ type="qwen"
32
+ ids=(
33
+ "$ID_BASE_PATH/q2.5/id.zh.train.q2.5"
34
+ "$ID_BASE_PATH/q2.5/id.en.train.q2.5"
35
+ )
36
+ )
37
+
38
+ # --- Put them in a list ---
39
+ ALL_RUNS=(run_q25_zh)
40
+
41
+ # --- Loop ---
42
+ for run in "${ALL_RUNS[@]}"; do
43
+ declare -n cfg="$run"
44
+
45
+ echo "=== Running ${cfg[name]} ==="
46
+ for id_path in "${cfg[ids][@]}"; do
47
+ echo " -> $id_path"
48
+ python3 activation_single.py \
49
+ -m "${cfg[model]}" \
50
+ -t "${cfg[type]}" \
51
+ -i "$id_path" \
52
+ -n "${cfg[name]}" \
53
+ -s "$ACTIVATION_BASE_PATH"
54
+ done
55
+ done
activations-old/README.md ADDED
@@ -0,0 +1 @@
 
 
1
+ Activations on WIKIPEDIA DATASET
activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-1200 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cea277fb7636f145b1bf551617d1ed05fa07e67dac1d95842061d53f4d3c4e9
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-1500 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1749441df8a2860e14a41583860cf7fb836534f2b2e181c13e81cb123726667c
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-1800 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:726d65b3d0020771d34abf07322cc2b9166ba78b13169f403643ca47a6d63880
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-2100 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0bbb84b21288c13c83debf4e979bfb533a2f3b429427da7db76fcec78f12a88
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-2400 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe5cb042ba2bf8008f279aa0647f3fdc60671374b76677f1997308831ca60b73
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-2700 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:817783e55b07a2703698edf7f1381a92c45ae29d464a3992f4979b821aca0159
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-300 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d133b8a1d9c909a8079b91236d7e0ce079cc0fd5f304d9404f17070518700b57
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-3000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fb497bf1342c7566510502713b8035ef26fb5c5fe19cbc9b72bbbfe7a07dfd0
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-3231 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c856e80ecb87701a23f4984429da9b9c0decb14c1ad7d503bbbb524b29fb299f
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-600 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8740d524564999015bd919f5a166deab4b03e1fc4c2730564695f4f76abf9ee1
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.en.train.qwen-checkpoint-900 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5beabf8349f63597e36299db52121e47af81b4e4a02f4439b4c9f34f01817a2
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-1200 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1504808cbf37639effd1664a0bff790d5be28ec3e23b17ed65c12c7d3c08d210
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-1500 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30cce6951837c6d19d484e16603abd0779c2cbcf3f818e0e08827089bb9ffe60
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-1800 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41c625eab5041de7d461b822498f82ce20fbbcceaaa703cb8af5b970f029703a
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-2100 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad1dad46b038443c45a9df3560ba00e31e68ea85248cf2f2aa2bea0e53390309
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-2400 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4404b495aa455a0689c951c9da191b87be2ca6c91e4c1614d7d1eeb5da7572f
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-2700 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7104954d86a009d4968d3abb8c302a38789b3abbf1fee277c57541420e547c9e
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-300 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61b0911c5246ed8d8cd93e3bd55771b678e56e91266008205473b01d6d0c58aa
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-3000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21c10d4298ef08be77d858ff161dec601d9b26b610057a81ae4d3eb0dda9f3f8
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-3231 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a84e27588461f9afcbe2cd08da3b31e983315b25afe43d93806b45f95cdda4c1
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-600 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24d61fe7398bc86d25790a4b4fff6a9bf33f1b2657c3ace3b67f520f6a8e8bbd
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_1.5B_basque_corpus/activation.eu.train.qwen-checkpoint-900 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fe9a1c39f9ed33536dab2417a976b18abb4aab42b233a99ca558ed61f12e32a
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-1200 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:021759d151d96f4f10861bae0cea0104e5600361c65ae9e1f135ec88a91c219b
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-1500 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fc65a08a2936427c6abd4342967b9c67d5921641795a5f9de7fc964d9fbd400
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-1800 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fb4f35aeffedb6d1efb928cf587ce44a445f476ca3821d5cb44ad3c7b8dff0e
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-2100 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff03e3081c471e6b71899e1327a9c4160defa427a48cad6c19176fd427b5d504
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-2400 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ad1f7ab916924bb34d53647cf2bd2760b944e8870b49f63ad8056cbd6b69df2
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-2700 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6467749237cf7074443d52a4ab7a253426d68dc4caf4dc13c01d917c5e3ddbef
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-2972 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95de6afadc9155ee59b66574af57fefbee9950ca9fde08ffc6914cb13f7e9290
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-300 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d796bc5f4df79f526568aa817162eae20d29bf6d46ffa3b803893fd305a4b482
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-600 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9cc9a96849fff460e5c5294386a9d7637108234938f705f2313a8e09515d3d2
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.en.train.qwen-checkpoint-900 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7e1ea9a62d6cb3f6f69d245b02e89c1f0f9e01d8c940e4d3fc1186fa7db8e86
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.eu.train.qwen-checkpoint-1200 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:300abc0c19cee744c435c9f596152cba7d6d591be831b1a633906371d19eabc8
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.eu.train.qwen-checkpoint-1500 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:090ce2e0d3ab3aa6a197af9e47bfcc44f2490702a0cd5ccca93c54a5cbb24f70
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.eu.train.qwen-checkpoint-1800 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1bfb66fd295ceac5b29d31018905e6a5ecf9530bcd4f3bab8bfd77a8d789b5d
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.eu.train.qwen-checkpoint-2100 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:267ff05b6ccd58d67de3874ca3a56ab8adea14389d5f1cd931a8367a735f9012
3
+ size 468248
activations-old/qwen2.5-0.5b_english_wiki_300M_en_1.5Bbasque_corpus/activation.eu.train.qwen-checkpoint-2400 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f791657a1175690e59daa395eab1e2d73a98dec3c03585d8aeaef250b5bf5ba5
3
+ size 468248