aaron0eidt commited on
Commit
c1cc5c3
·
1 Parent(s): 1c6936d

Add vectors with LFS2

Browse files
.gitattributes ADDED
@@ -0,0 +1 @@
 
 
1
+ *.npz filter=lfs diff=lfs merge=lfs -text
.gitignore CHANGED
@@ -21,7 +21,7 @@ env/
21
  .DS_Store
22
 
23
  # User Data
24
- user_study/data/
25
  user_study/voice_memos/files/
26
  user_study/voice_memos/merged_files/
27
  user_study/voice_memos/transcripts/
@@ -44,10 +44,3 @@ circuit_analysis/results/attribution_graph_prompt_de_2.png
44
  circuit_analysis/results/attribution_graph_prompt_de_3.png
45
  circuit_analysis/results/clt_training_loss.png
46
  circuit_analysis/results/offline_circuit_metrics_combined.png
47
- function_vectors/data/vectors/de_category_vectors.npz
48
- function_vectors/data/vectors/en_category_vectors.npz
49
-
50
- # User Study Data
51
- user_study/
52
-
53
-
 
21
  .DS_Store
22
 
23
  # User Data
24
+
25
  user_study/voice_memos/files/
26
  user_study/voice_memos/merged_files/
27
  user_study/voice_memos/transcripts/
 
44
  circuit_analysis/results/attribution_graph_prompt_de_3.png
45
  circuit_analysis/results/clt_training_loss.png
46
  circuit_analysis/results/offline_circuit_metrics_combined.png
 
 
 
 
 
 
 
attribution_analysis/attribution_analysis_page.py CHANGED
@@ -60,6 +60,7 @@ def load_all_attribution_models():
60
  model_path = "./models/OLMo-2-1124-7B"
61
  hf_token = os.environ.get("HF_TOKEN")
62
 
 
63
  # Load tokenizer and model.
64
  tokenizer = AutoTokenizer.from_pretrained(model_path, token=hf_token, trust_remote_code=True)
65
  tokenizer.model_max_length = 512
@@ -833,6 +834,23 @@ def run_analysis(prompt, max_tokens, enable_explanations, force_exact_num_tokens
833
  print("Loading full attribution analysis from cache.")
834
  cached_result = cached_data[prompt]
835
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
836
  # Populate session state from the comprehensive cache
837
  st.session_state.generated_text = cached_result["generated_text"]
838
  st.session_state.prompt = prompt
@@ -918,9 +936,15 @@ def run_analysis(prompt, max_tokens, enable_explanations, force_exact_num_tokens
918
 
919
  # Add new result
920
  html_contents = {method: attr.show(display=False, return_html=True) for method, attr in all_attributions.items()}
 
 
 
 
 
921
  cached_data[prompt] = {
922
  "generated_text": generated_text,
923
- "html_contents": html_contents
 
924
  }
925
 
926
  # Write back to file
 
60
  model_path = "./models/OLMo-2-1124-7B"
61
  hf_token = os.environ.get("HF_TOKEN")
62
 
63
+
64
  # Load tokenizer and model.
65
  tokenizer = AutoTokenizer.from_pretrained(model_path, token=hf_token, trust_remote_code=True)
66
  tokenizer.model_max_length = 512
 
834
  print("Loading full attribution analysis from cache.")
835
  cached_result = cached_data[prompt]
836
 
837
+ # Check if influential_docs are missing and update the cache if possible
838
+ if "influential_docs" not in cached_result:
839
+ try:
840
+ print(f"Updating cache for '{prompt}' with missing influence docs...")
841
+ lang = st.session_state.get('lang', 'en')
842
+ # This call should hit the Streamlit cache and be fast
843
+ missing_docs = get_influential_docs(prompt, lang)
844
+
845
+ if missing_docs:
846
+ cached_result["influential_docs"] = missing_docs
847
+ # Save updated cache back to file
848
+ with open(cache_file, "w", encoding="utf-8") as f:
849
+ json.dump(cached_data, f, ensure_ascii=False, indent=4)
850
+ print("Cache updated successfully.")
851
+ except Exception as e:
852
+ print(f"Could not update cache with influence docs: {e}")
853
+
854
  # Populate session state from the comprehensive cache
855
  st.session_state.generated_text = cached_result["generated_text"]
856
  st.session_state.prompt = prompt
 
936
 
937
  # Add new result
938
  html_contents = {method: attr.show(display=False, return_html=True) for method, attr in all_attributions.items()}
939
+
940
+ # Also fetch influential docs to cache them
941
+ lang = st.session_state.get('lang', 'en')
942
+ docs_to_cache = get_influential_docs(prompt, lang)
943
+
944
  cached_data[prompt] = {
945
  "generated_text": generated_text,
946
+ "html_contents": html_contents,
947
+ "influential_docs": docs_to_cache
948
  }
949
 
950
  # Write back to file
function_vectors/data/vectors/de_category_vectors.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca7d23891ceeed5c24ab00b0ec0660c1fa771f021ec01f3dd24061898bf329b8
3
+ size 1706730
function_vectors/data/vectors/en_category_vectors.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4bcdbfca0bd4aaeab1e0040ca4c06567c34c9301c2acdf0debacc705734d510
3
+ size 1717296
requirements.txt CHANGED
@@ -24,3 +24,4 @@ tqdm>=4.65.0
24
  datasets>=2.14.0
25
  openai-whisper>=20230918
26
  scipy>=1.10.0
 
 
24
  datasets>=2.14.0
25
  openai-whisper>=20230918
26
  scipy>=1.10.0
27
+ kaleido>=0.2.1
user_study/data/participant_counter.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 589
user_study/data/user_data.csv ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ participant_id,feedback_timestamp,language,age,llm_experience,attr_q_visual_clarity,attr_q_cognitive_load,attr_q_influencer_plausibility,attr_s1_correct,attr_s2_correct,attr_s3_correct,fv_q_pca_clarity,fv_q_type_attribution_clarity,fv_q_layer_evolution_plausibility,fv_q1_correct,fv_q2_correct,fv_q3_correct,ct_q_main_graph_clarity,ct_q_feature_explorer_usefulness,ct_q_subnetwork_clarity,ct_q1_correct,ct_q2_correct,ct_q3_correct
2
+ 0,2025-07-27 19:08:56,en,35_44,intermediate,4,4,5,True,True,True,4,4,4,True,True,True,5,5,5,True,True,True
3
+ 1,2025-07-28 00:07:44,en,18_24,intermediate,5,3,5,True,True,True,5,5,3,True,True,True,5,4,5,True,True,True
4
+ 2,2025-07-28 21:03:37,en,18_24,novice,3,5,3,True,True,True,5,5,3,True,True,True,4,5,3,True,True,True
5
+ 3,2025-07-28 23:50:41,en,18_24,intermediate,4,2,2,True,True,False,4,5,5,True,True,True,2,4,2,True,True,True
6
+ 4,2025-07-29 16:08:13,en,18_24,novice,4,4,5,True,True,True,4,5,3,True,True,True,4,5,5,True,True,True
7
+ 5,2025-07-29 18:25:51,en,18_24,intermediate,3,4,5,True,True,True,5,5,5,True,True,True,5,5,4,True,True,True
8
+ 6,2025-07-29 20:12:29,de,18_24,novice,4,2,3,True,True,True,2,5,4,True,True,True,5,5,5,True,True,True
9
+ 7,2025-07-29 21:04:37,de,18_24,novice,2,2,3,True,False,False,4,5,3,True,True,True,4,2,1,True,True,True
10
+ 8,2025-07-30 16:35:19,en,under_18,novice,3,3,4,True,True,True,5,5,3,True,True,True,4,3,4,True,True,True
11
+ 9,2025-07-31 13:50:49,de,18_24,intermediate,5,3,2,True,True,True,5,5,4,True,True,True,3,3,3,True,True,True
12
+ 10,2025-07-31 20:06:51,en,18_24,intermediate,2,3,5,True,True,True,5,5,2,True,True,True,3,4,4,True,True,True
13
+ 11,2025-07-31 21:42:36,de,18_24,novice,4,3,5,True,True,True,3,4,3,True,True,True,3,3,3,True,True,True
14
+ 12,2025-08-01 13:53:02,en,25_34,expert,2,3,2,True,True,True,5,3,4,True,True,True,4,4,4,True,True,True
15
+ 13,2025-08-01 21:22:36,de,18_24,novice,3,3,5,True,True,False,4,5,2,True,True,True,3,4,4,True,True,True
16
+ 14,2025-08-02 15:00:41,en,18_24,expert,4,2,5,True,True,True,5,5,4,True,True,True,4,5,4,True,True,True
17
+ 15,2025-08-03 19:36:39,en,18_24,intermediate,4,3,5,True,True,True,3,5,5,True,True,True,4,5,5,True,True,True
18
+ 16,2025-08-10 14:55:04,de,18_24,novice,4,2,5,True,True,True,5,5,4,True,False,True,4,5,5,True,True,True
19
+ 17,2025-08-12 18:17:59,de,55_64,novice,3,5,3,True,True,True,4,5,3,True,True,True,3,4,2,True,True,True
utilities/utils.py CHANGED
@@ -14,9 +14,10 @@ def set_seed(seed_value=42):
14
  def init_qwen_api():
15
  # Set up the API configuration for Qwen.
16
  api_key = os.environ.get("QWEN_API_KEY")
 
17
  if not api_key:
18
- # Fallback or warning could go here, but for now we rely on the env var
19
- print("Warning: QWEN_API_KEY not set in environment variables.")
20
 
21
  return {
22
  "api_key": api_key,
 
14
  def init_qwen_api():
15
  # Set up the API configuration for Qwen.
16
  api_key = os.environ.get("QWEN_API_KEY")
17
+
18
  if not api_key:
19
+ # Fallback for local testing if env var is missing
20
+ api_key = "6e3def45d61b0b20547a1fcbab6464d8"
21
 
22
  return {
23
  "api_key": api_key,
web_app.py CHANGED
@@ -102,13 +102,6 @@ def main():
102
  if 'fv_feedback_submitted' not in st.session_state:
103
  st.session_state.fv_feedback_submitted = False
104
 
105
- st.set_page_config(
106
- page_title="LLM Analysis Suite",
107
- page_icon="🧠",
108
- layout="wide",
109
- initial_sidebar_state="expanded"
110
- )
111
-
112
  logo_path = Path(__file__).parent / "LOGO" / "Logo.png"
113
  if logo_path.exists():
114
  with open(logo_path, "rb") as logo_file:
 
102
  if 'fv_feedback_submitted' not in st.session_state:
103
  st.session_state.fv_feedback_submitted = False
104
 
 
 
 
 
 
 
 
105
  logo_path = Path(__file__).parent / "LOGO" / "Logo.png"
106
  if logo_path.exists():
107
  with open(logo_path, "rb") as logo_file: