Spaces:

m-ric
/

rag_highlights

Paused

m-ric commited on Oct 1, 2024

Commit

7b3d61a

1 Parent(s): 4cbfdf7

WOrking version

Files changed (1) hide show

app.py CHANGED Viewed

@@ -25,9 +25,12 @@ model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
 print(f"Loading model {model_id}...")
-model = LlamaForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, quantization_config=quantization_config, device_map="cuda", use_safetensors=True)
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 attnlrp.register(model)
 print(f"Loaded model.")
 def really_clean_tokens(tokens):
@@ -77,7 +80,7 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
     attention_matrix = np.array([el[:len(all_relevances[0])] for el in all_relevances])
     ### FIND ZONES OF INTEREST
-    threshold_per_token = 0.05
     kernel_width = 6
     context_width = 20  # Number of tokens to include as context on each side
     kernel = np.ones((kernel_width, kernel_width))

 print(f"Loading model {model_id}...")
 tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = LlamaForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="cuda", use_safetensors=True)
+# model.gradient_checkpointing_enable()
 attnlrp.register(model)
 print(f"Loaded model.")
 def really_clean_tokens(tokens):
     attention_matrix = np.array([el[:len(all_relevances[0])] for el in all_relevances])
     ### FIND ZONES OF INTEREST
+    threshold_per_token = 0.1
     kernel_width = 6
     context_width = 20  # Number of tokens to include as context on each side
     kernel = np.ones((kernel_width, kernel_width))