Spaces:
Paused
Paused
WOrking version
Browse files
app.py
CHANGED
|
@@ -25,9 +25,12 @@ model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
|
|
| 25 |
|
| 26 |
print(f"Loading model {model_id}...")
|
| 27 |
|
| 28 |
-
model = LlamaForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, quantization_config=quantization_config, device_map="cuda", use_safetensors=True)
|
| 29 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
|
|
|
|
|
|
|
|
| 30 |
attnlrp.register(model)
|
|
|
|
| 31 |
print(f"Loaded model.")
|
| 32 |
|
| 33 |
def really_clean_tokens(tokens):
|
|
@@ -77,7 +80,7 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
|
|
| 77 |
attention_matrix = np.array([el[:len(all_relevances[0])] for el in all_relevances])
|
| 78 |
|
| 79 |
### FIND ZONES OF INTEREST
|
| 80 |
-
threshold_per_token = 0.
|
| 81 |
kernel_width = 6
|
| 82 |
context_width = 20 # Number of tokens to include as context on each side
|
| 83 |
kernel = np.ones((kernel_width, kernel_width))
|
|
|
|
| 25 |
|
| 26 |
print(f"Loading model {model_id}...")
|
| 27 |
|
|
|
|
| 28 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 29 |
+
model = LlamaForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="cuda", use_safetensors=True)
|
| 30 |
+
# model.gradient_checkpointing_enable()
|
| 31 |
+
|
| 32 |
attnlrp.register(model)
|
| 33 |
+
|
| 34 |
print(f"Loaded model.")
|
| 35 |
|
| 36 |
def really_clean_tokens(tokens):
|
|
|
|
| 80 |
attention_matrix = np.array([el[:len(all_relevances[0])] for el in all_relevances])
|
| 81 |
|
| 82 |
### FIND ZONES OF INTEREST
|
| 83 |
+
threshold_per_token = 0.1
|
| 84 |
kernel_width = 6
|
| 85 |
context_width = 20 # Number of tokens to include as context on each side
|
| 86 |
kernel = np.ones((kernel_width, kernel_width))
|