Spaces:
Running
Running
Commit ·
c1cc5c3
1
Parent(s): 1c6936d
Add vectors with LFS2
Browse files- .gitattributes +1 -0
- .gitignore +1 -8
- attribution_analysis/attribution_analysis_page.py +25 -1
- function_vectors/data/vectors/de_category_vectors.npz +3 -0
- function_vectors/data/vectors/en_category_vectors.npz +3 -0
- requirements.txt +1 -0
- user_study/data/participant_counter.txt +1 -0
- user_study/data/user_data.csv +19 -0
- utilities/utils.py +3 -2
- web_app.py +0 -7
.gitattributes
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
.gitignore
CHANGED
|
@@ -21,7 +21,7 @@ env/
|
|
| 21 |
.DS_Store
|
| 22 |
|
| 23 |
# User Data
|
| 24 |
-
|
| 25 |
user_study/voice_memos/files/
|
| 26 |
user_study/voice_memos/merged_files/
|
| 27 |
user_study/voice_memos/transcripts/
|
|
@@ -44,10 +44,3 @@ circuit_analysis/results/attribution_graph_prompt_de_2.png
|
|
| 44 |
circuit_analysis/results/attribution_graph_prompt_de_3.png
|
| 45 |
circuit_analysis/results/clt_training_loss.png
|
| 46 |
circuit_analysis/results/offline_circuit_metrics_combined.png
|
| 47 |
-
function_vectors/data/vectors/de_category_vectors.npz
|
| 48 |
-
function_vectors/data/vectors/en_category_vectors.npz
|
| 49 |
-
|
| 50 |
-
# User Study Data
|
| 51 |
-
user_study/
|
| 52 |
-
|
| 53 |
-
|
|
|
|
| 21 |
.DS_Store
|
| 22 |
|
| 23 |
# User Data
|
| 24 |
+
|
| 25 |
user_study/voice_memos/files/
|
| 26 |
user_study/voice_memos/merged_files/
|
| 27 |
user_study/voice_memos/transcripts/
|
|
|
|
| 44 |
circuit_analysis/results/attribution_graph_prompt_de_3.png
|
| 45 |
circuit_analysis/results/clt_training_loss.png
|
| 46 |
circuit_analysis/results/offline_circuit_metrics_combined.png
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
attribution_analysis/attribution_analysis_page.py
CHANGED
|
@@ -60,6 +60,7 @@ def load_all_attribution_models():
|
|
| 60 |
model_path = "./models/OLMo-2-1124-7B"
|
| 61 |
hf_token = os.environ.get("HF_TOKEN")
|
| 62 |
|
|
|
|
| 63 |
# Load tokenizer and model.
|
| 64 |
tokenizer = AutoTokenizer.from_pretrained(model_path, token=hf_token, trust_remote_code=True)
|
| 65 |
tokenizer.model_max_length = 512
|
|
@@ -833,6 +834,23 @@ def run_analysis(prompt, max_tokens, enable_explanations, force_exact_num_tokens
|
|
| 833 |
print("Loading full attribution analysis from cache.")
|
| 834 |
cached_result = cached_data[prompt]
|
| 835 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 836 |
# Populate session state from the comprehensive cache
|
| 837 |
st.session_state.generated_text = cached_result["generated_text"]
|
| 838 |
st.session_state.prompt = prompt
|
|
@@ -918,9 +936,15 @@ def run_analysis(prompt, max_tokens, enable_explanations, force_exact_num_tokens
|
|
| 918 |
|
| 919 |
# Add new result
|
| 920 |
html_contents = {method: attr.show(display=False, return_html=True) for method, attr in all_attributions.items()}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 921 |
cached_data[prompt] = {
|
| 922 |
"generated_text": generated_text,
|
| 923 |
-
"html_contents": html_contents
|
|
|
|
| 924 |
}
|
| 925 |
|
| 926 |
# Write back to file
|
|
|
|
| 60 |
model_path = "./models/OLMo-2-1124-7B"
|
| 61 |
hf_token = os.environ.get("HF_TOKEN")
|
| 62 |
|
| 63 |
+
|
| 64 |
# Load tokenizer and model.
|
| 65 |
tokenizer = AutoTokenizer.from_pretrained(model_path, token=hf_token, trust_remote_code=True)
|
| 66 |
tokenizer.model_max_length = 512
|
|
|
|
| 834 |
print("Loading full attribution analysis from cache.")
|
| 835 |
cached_result = cached_data[prompt]
|
| 836 |
|
| 837 |
+
# Check if influential_docs are missing and update the cache if possible
|
| 838 |
+
if "influential_docs" not in cached_result:
|
| 839 |
+
try:
|
| 840 |
+
print(f"Updating cache for '{prompt}' with missing influence docs...")
|
| 841 |
+
lang = st.session_state.get('lang', 'en')
|
| 842 |
+
# This call should hit the Streamlit cache and be fast
|
| 843 |
+
missing_docs = get_influential_docs(prompt, lang)
|
| 844 |
+
|
| 845 |
+
if missing_docs:
|
| 846 |
+
cached_result["influential_docs"] = missing_docs
|
| 847 |
+
# Save updated cache back to file
|
| 848 |
+
with open(cache_file, "w", encoding="utf-8") as f:
|
| 849 |
+
json.dump(cached_data, f, ensure_ascii=False, indent=4)
|
| 850 |
+
print("Cache updated successfully.")
|
| 851 |
+
except Exception as e:
|
| 852 |
+
print(f"Could not update cache with influence docs: {e}")
|
| 853 |
+
|
| 854 |
# Populate session state from the comprehensive cache
|
| 855 |
st.session_state.generated_text = cached_result["generated_text"]
|
| 856 |
st.session_state.prompt = prompt
|
|
|
|
| 936 |
|
| 937 |
# Add new result
|
| 938 |
html_contents = {method: attr.show(display=False, return_html=True) for method, attr in all_attributions.items()}
|
| 939 |
+
|
| 940 |
+
# Also fetch influential docs to cache them
|
| 941 |
+
lang = st.session_state.get('lang', 'en')
|
| 942 |
+
docs_to_cache = get_influential_docs(prompt, lang)
|
| 943 |
+
|
| 944 |
cached_data[prompt] = {
|
| 945 |
"generated_text": generated_text,
|
| 946 |
+
"html_contents": html_contents,
|
| 947 |
+
"influential_docs": docs_to_cache
|
| 948 |
}
|
| 949 |
|
| 950 |
# Write back to file
|
function_vectors/data/vectors/de_category_vectors.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ca7d23891ceeed5c24ab00b0ec0660c1fa771f021ec01f3dd24061898bf329b8
|
| 3 |
+
size 1706730
|
function_vectors/data/vectors/en_category_vectors.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f4bcdbfca0bd4aaeab1e0040ca4c06567c34c9301c2acdf0debacc705734d510
|
| 3 |
+
size 1717296
|
requirements.txt
CHANGED
|
@@ -24,3 +24,4 @@ tqdm>=4.65.0
|
|
| 24 |
datasets>=2.14.0
|
| 25 |
openai-whisper>=20230918
|
| 26 |
scipy>=1.10.0
|
|
|
|
|
|
| 24 |
datasets>=2.14.0
|
| 25 |
openai-whisper>=20230918
|
| 26 |
scipy>=1.10.0
|
| 27 |
+
kaleido>=0.2.1
|
user_study/data/participant_counter.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
589
|
user_study/data/user_data.csv
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
participant_id,feedback_timestamp,language,age,llm_experience,attr_q_visual_clarity,attr_q_cognitive_load,attr_q_influencer_plausibility,attr_s1_correct,attr_s2_correct,attr_s3_correct,fv_q_pca_clarity,fv_q_type_attribution_clarity,fv_q_layer_evolution_plausibility,fv_q1_correct,fv_q2_correct,fv_q3_correct,ct_q_main_graph_clarity,ct_q_feature_explorer_usefulness,ct_q_subnetwork_clarity,ct_q1_correct,ct_q2_correct,ct_q3_correct
|
| 2 |
+
0,2025-07-27 19:08:56,en,35_44,intermediate,4,4,5,True,True,True,4,4,4,True,True,True,5,5,5,True,True,True
|
| 3 |
+
1,2025-07-28 00:07:44,en,18_24,intermediate,5,3,5,True,True,True,5,5,3,True,True,True,5,4,5,True,True,True
|
| 4 |
+
2,2025-07-28 21:03:37,en,18_24,novice,3,5,3,True,True,True,5,5,3,True,True,True,4,5,3,True,True,True
|
| 5 |
+
3,2025-07-28 23:50:41,en,18_24,intermediate,4,2,2,True,True,False,4,5,5,True,True,True,2,4,2,True,True,True
|
| 6 |
+
4,2025-07-29 16:08:13,en,18_24,novice,4,4,5,True,True,True,4,5,3,True,True,True,4,5,5,True,True,True
|
| 7 |
+
5,2025-07-29 18:25:51,en,18_24,intermediate,3,4,5,True,True,True,5,5,5,True,True,True,5,5,4,True,True,True
|
| 8 |
+
6,2025-07-29 20:12:29,de,18_24,novice,4,2,3,True,True,True,2,5,4,True,True,True,5,5,5,True,True,True
|
| 9 |
+
7,2025-07-29 21:04:37,de,18_24,novice,2,2,3,True,False,False,4,5,3,True,True,True,4,2,1,True,True,True
|
| 10 |
+
8,2025-07-30 16:35:19,en,under_18,novice,3,3,4,True,True,True,5,5,3,True,True,True,4,3,4,True,True,True
|
| 11 |
+
9,2025-07-31 13:50:49,de,18_24,intermediate,5,3,2,True,True,True,5,5,4,True,True,True,3,3,3,True,True,True
|
| 12 |
+
10,2025-07-31 20:06:51,en,18_24,intermediate,2,3,5,True,True,True,5,5,2,True,True,True,3,4,4,True,True,True
|
| 13 |
+
11,2025-07-31 21:42:36,de,18_24,novice,4,3,5,True,True,True,3,4,3,True,True,True,3,3,3,True,True,True
|
| 14 |
+
12,2025-08-01 13:53:02,en,25_34,expert,2,3,2,True,True,True,5,3,4,True,True,True,4,4,4,True,True,True
|
| 15 |
+
13,2025-08-01 21:22:36,de,18_24,novice,3,3,5,True,True,False,4,5,2,True,True,True,3,4,4,True,True,True
|
| 16 |
+
14,2025-08-02 15:00:41,en,18_24,expert,4,2,5,True,True,True,5,5,4,True,True,True,4,5,4,True,True,True
|
| 17 |
+
15,2025-08-03 19:36:39,en,18_24,intermediate,4,3,5,True,True,True,3,5,5,True,True,True,4,5,5,True,True,True
|
| 18 |
+
16,2025-08-10 14:55:04,de,18_24,novice,4,2,5,True,True,True,5,5,4,True,False,True,4,5,5,True,True,True
|
| 19 |
+
17,2025-08-12 18:17:59,de,55_64,novice,3,5,3,True,True,True,4,5,3,True,True,True,3,4,2,True,True,True
|
utilities/utils.py
CHANGED
|
@@ -14,9 +14,10 @@ def set_seed(seed_value=42):
|
|
| 14 |
def init_qwen_api():
|
| 15 |
# Set up the API configuration for Qwen.
|
| 16 |
api_key = os.environ.get("QWEN_API_KEY")
|
|
|
|
| 17 |
if not api_key:
|
| 18 |
-
# Fallback
|
| 19 |
-
|
| 20 |
|
| 21 |
return {
|
| 22 |
"api_key": api_key,
|
|
|
|
| 14 |
def init_qwen_api():
|
| 15 |
# Set up the API configuration for Qwen.
|
| 16 |
api_key = os.environ.get("QWEN_API_KEY")
|
| 17 |
+
|
| 18 |
if not api_key:
|
| 19 |
+
# Fallback for local testing if env var is missing
|
| 20 |
+
api_key = "6e3def45d61b0b20547a1fcbab6464d8"
|
| 21 |
|
| 22 |
return {
|
| 23 |
"api_key": api_key,
|
web_app.py
CHANGED
|
@@ -102,13 +102,6 @@ def main():
|
|
| 102 |
if 'fv_feedback_submitted' not in st.session_state:
|
| 103 |
st.session_state.fv_feedback_submitted = False
|
| 104 |
|
| 105 |
-
st.set_page_config(
|
| 106 |
-
page_title="LLM Analysis Suite",
|
| 107 |
-
page_icon="🧠",
|
| 108 |
-
layout="wide",
|
| 109 |
-
initial_sidebar_state="expanded"
|
| 110 |
-
)
|
| 111 |
-
|
| 112 |
logo_path = Path(__file__).parent / "LOGO" / "Logo.png"
|
| 113 |
if logo_path.exists():
|
| 114 |
with open(logo_path, "rb") as logo_file:
|
|
|
|
| 102 |
if 'fv_feedback_submitted' not in st.session_state:
|
| 103 |
st.session_state.fv_feedback_submitted = False
|
| 104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
logo_path = Path(__file__).parent / "LOGO" / "Logo.png"
|
| 106 |
if logo_path.exists():
|
| 107 |
with open(logo_path, "rb") as logo_file:
|