Spaces:
Sleeping
Sleeping
Commit ·
d6f21db
1
Parent(s): 5b10bf0
Fix token predictions not being collected - changed condition to check for block_outputs and norm_parameters instead of logit_lens_parameter
Browse files- components/__pycache__/__init__.cpython-311.pyc +0 -0
- components/__pycache__/main_panel.cpython-311.pyc +0 -0
- components/__pycache__/model_selector.cpython-311.pyc +0 -0
- components/__pycache__/sidebar.cpython-311.pyc +0 -0
- components/__pycache__/tokenization_panel.cpython-311.pyc +0 -0
- todo.md +1 -0
- utils/__pycache__/model_patterns.cpython-311.pyc +0 -0
- utils/model_patterns.py +9 -3
components/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (177 Bytes). View file
|
|
|
components/__pycache__/main_panel.cpython-311.pyc
ADDED
|
Binary file (2.35 kB). View file
|
|
|
components/__pycache__/model_selector.cpython-311.pyc
ADDED
|
Binary file (2.87 kB). View file
|
|
|
components/__pycache__/sidebar.cpython-311.pyc
ADDED
|
Binary file (3.05 kB). View file
|
|
|
components/__pycache__/tokenization_panel.cpython-311.pyc
ADDED
|
Binary file (9.9 kB). View file
|
|
|
todo.md
CHANGED
|
@@ -2,6 +2,7 @@
|
|
| 2 |
|
| 3 |
## Recent Fixes
|
| 4 |
- [X] Fixed callback error: Changed Output from non-existent 'status-message' to existing 'model-status' component
|
|
|
|
| 5 |
|
| 6 |
## PHASE 1: Backend Infrastructure Changes
|
| 7 |
|
|
|
|
| 2 |
|
| 3 |
## Recent Fixes
|
| 4 |
- [X] Fixed callback error: Changed Output from non-existent 'status-message' to existing 'model-status' component
|
| 5 |
+
- [X] Fixed token predictions not being collected: Changed condition in extract_layer_data to check for block_outputs and norm_parameters instead of logit_lens_parameter
|
| 6 |
|
| 7 |
## PHASE 1: Backend Infrastructure Changes
|
| 8 |
|
utils/__pycache__/model_patterns.cpython-311.pyc
CHANGED
|
Binary files a/utils/__pycache__/model_patterns.cpython-311.pyc and b/utils/__pycache__/model_patterns.cpython-311.pyc differ
|
|
|
utils/model_patterns.py
CHANGED
|
@@ -1051,7 +1051,13 @@ def extract_layer_data(activation_data: Dict[str, Any], model, tokenizer) -> Lis
|
|
| 1051 |
for name in layer_modules if re.findall(r'\d+', name)]
|
| 1052 |
)
|
| 1053 |
|
| 1054 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1055 |
|
| 1056 |
# Get global top 5 tokens from final output
|
| 1057 |
global_top5_tokens = activation_data.get('global_top5_tokens', [])
|
|
@@ -1062,7 +1068,7 @@ def extract_layer_data(activation_data: Dict[str, Any], model, tokenizer) -> Lis
|
|
| 1062 |
prev_global_probs = {} # Track previous layer's global top 5 probabilities
|
| 1063 |
|
| 1064 |
for layer_num, module_name in layer_info:
|
| 1065 |
-
top_tokens = _get_top_tokens(activation_data, module_name, model, tokenizer, top_k=5) if
|
| 1066 |
|
| 1067 |
# Get top-3 attended tokens for this layer
|
| 1068 |
top_attended = _get_top_attended_tokens(activation_data, layer_num, tokenizer, top_k=3)
|
|
@@ -1070,7 +1076,7 @@ def extract_layer_data(activation_data: Dict[str, Any], model, tokenizer) -> Lis
|
|
| 1070 |
# Get probabilities for global top 5 tokens at this layer
|
| 1071 |
global_top5_probs = {}
|
| 1072 |
global_top5_deltas = {}
|
| 1073 |
-
if
|
| 1074 |
global_top5_probs = _get_token_probabilities_for_layer(
|
| 1075 |
activation_data, module_name, model, tokenizer, global_top5_token_names
|
| 1076 |
)
|
|
|
|
| 1051 |
for name in layer_modules if re.findall(r'\d+', name)]
|
| 1052 |
)
|
| 1053 |
|
| 1054 |
+
# Check if we can compute token predictions (requires block_outputs and norm_parameters)
|
| 1055 |
+
# Note: Previously, this checked for logit_lens_parameter, but that parameter is not actually
|
| 1056 |
+
# needed for computing predictions. The _get_top_tokens function only needs block_outputs
|
| 1057 |
+
# and norm_parameters to work correctly.
|
| 1058 |
+
has_block_outputs = bool(activation_data.get('block_outputs', {}))
|
| 1059 |
+
has_norm_params = bool(activation_data.get('norm_parameters', []))
|
| 1060 |
+
can_compute_predictions = has_block_outputs and has_norm_params
|
| 1061 |
|
| 1062 |
# Get global top 5 tokens from final output
|
| 1063 |
global_top5_tokens = activation_data.get('global_top5_tokens', [])
|
|
|
|
| 1068 |
prev_global_probs = {} # Track previous layer's global top 5 probabilities
|
| 1069 |
|
| 1070 |
for layer_num, module_name in layer_info:
|
| 1071 |
+
top_tokens = _get_top_tokens(activation_data, module_name, model, tokenizer, top_k=5) if can_compute_predictions else None
|
| 1072 |
|
| 1073 |
# Get top-3 attended tokens for this layer
|
| 1074 |
top_attended = _get_top_attended_tokens(activation_data, layer_num, tokenizer, top_k=3)
|
|
|
|
| 1076 |
# Get probabilities for global top 5 tokens at this layer
|
| 1077 |
global_top5_probs = {}
|
| 1078 |
global_top5_deltas = {}
|
| 1079 |
+
if can_compute_predictions and global_top5_token_names:
|
| 1080 |
global_top5_probs = _get_token_probabilities_for_layer(
|
| 1081 |
activation_data, module_name, model, tokenizer, global_top5_token_names
|
| 1082 |
)
|