Spaces:

aaron0eidt
/

ELIA

Running

App Files Files Community

aaron0eidt commited on Nov 29, 2025

Commit

c1cc5c3

1 Parent(s): 1c6936d

Add vectors with LFS2

Browse files

Files changed (10) hide show

.gitattributes +1 -0
.gitignore +1 -8
attribution_analysis/attribution_analysis_page.py +25 -1
function_vectors/data/vectors/de_category_vectors.npz +3 -0
function_vectors/data/vectors/en_category_vectors.npz +3 -0
requirements.txt +1 -0
user_study/data/participant_counter.txt +1 -0
user_study/data/user_data.csv +19 -0
utilities/utils.py +3 -2
web_app.py +0 -7

.gitattributes ADDED Viewed

	@@ -0,0 +1 @@


1	+ *.npz filter=lfs diff=lfs merge=lfs -text

.gitignore CHANGED Viewed

@@ -21,7 +21,7 @@ env/
 .DS_Store
 # User Data
-user_study/data/
 user_study/voice_memos/files/
 user_study/voice_memos/merged_files/
 user_study/voice_memos/transcripts/
@@ -44,10 +44,3 @@ circuit_analysis/results/attribution_graph_prompt_de_2.png
 circuit_analysis/results/attribution_graph_prompt_de_3.png
 circuit_analysis/results/clt_training_loss.png
 circuit_analysis/results/offline_circuit_metrics_combined.png
-function_vectors/data/vectors/de_category_vectors.npz
-function_vectors/data/vectors/en_category_vectors.npz
-# User Study Data
-user_study/

 .DS_Store
 # User Data
 user_study/voice_memos/files/
 user_study/voice_memos/merged_files/
 user_study/voice_memos/transcripts/
 circuit_analysis/results/attribution_graph_prompt_de_3.png
 circuit_analysis/results/clt_training_loss.png
 circuit_analysis/results/offline_circuit_metrics_combined.png

attribution_analysis/attribution_analysis_page.py CHANGED Viewed

@@ -60,6 +60,7 @@ def load_all_attribution_models():
         model_path = "./models/OLMo-2-1124-7B"
         hf_token = os.environ.get("HF_TOKEN")
         # Load tokenizer and model.
         tokenizer = AutoTokenizer.from_pretrained(model_path, token=hf_token, trust_remote_code=True)
         tokenizer.model_max_length = 512
@@ -833,6 +834,23 @@ def run_analysis(prompt, max_tokens, enable_explanations, force_exact_num_tokens
             print("Loading full attribution analysis from cache.")
             cached_result = cached_data[prompt]
             # Populate session state from the comprehensive cache
             st.session_state.generated_text = cached_result["generated_text"]
             st.session_state.prompt = prompt
@@ -918,9 +936,15 @@ def run_analysis(prompt, max_tokens, enable_explanations, force_exact_num_tokens
         # Add new result
         html_contents = {method: attr.show(display=False, return_html=True) for method, attr in all_attributions.items()}
         cached_data[prompt] = {
             "generated_text": generated_text,
-            "html_contents": html_contents
         }
         # Write back to file

         model_path = "./models/OLMo-2-1124-7B"
         hf_token = os.environ.get("HF_TOKEN")
         # Load tokenizer and model.
         tokenizer = AutoTokenizer.from_pretrained(model_path, token=hf_token, trust_remote_code=True)
         tokenizer.model_max_length = 512
             print("Loading full attribution analysis from cache.")
             cached_result = cached_data[prompt]
+            # Check if influential_docs are missing and update the cache if possible
+            if "influential_docs" not in cached_result:
+                try:
+                    print(f"Updating cache for '{prompt}' with missing influence docs...")
+                    lang = st.session_state.get('lang', 'en')
+                    # This call should hit the Streamlit cache and be fast
+                    missing_docs = get_influential_docs(prompt, lang)
+                    if missing_docs:
+                        cached_result["influential_docs"] = missing_docs
+                        # Save updated cache back to file
+                        with open(cache_file, "w", encoding="utf-8") as f:
+                            json.dump(cached_data, f, ensure_ascii=False, indent=4)
+                        print("Cache updated successfully.")
+                except Exception as e:
+                    print(f"Could not update cache with influence docs: {e}")
             # Populate session state from the comprehensive cache
             st.session_state.generated_text = cached_result["generated_text"]
             st.session_state.prompt = prompt
         # Add new result
         html_contents = {method: attr.show(display=False, return_html=True) for method, attr in all_attributions.items()}
+        # Also fetch influential docs to cache them
+        lang = st.session_state.get('lang', 'en')
+        docs_to_cache = get_influential_docs(prompt, lang)
         cached_data[prompt] = {
             "generated_text": generated_text,
+            "html_contents": html_contents,
+            "influential_docs": docs_to_cache
         }
         # Write back to file

function_vectors/data/vectors/de_category_vectors.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ca7d23891ceeed5c24ab00b0ec0660c1fa771f021ec01f3dd24061898bf329b8
+size 1706730

function_vectors/data/vectors/en_category_vectors.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f4bcdbfca0bd4aaeab1e0040ca4c06567c34c9301c2acdf0debacc705734d510
+size 1717296

requirements.txt CHANGED Viewed

@@ -24,3 +24,4 @@ tqdm>=4.65.0
 datasets>=2.14.0
 openai-whisper>=20230918
 scipy>=1.10.0

 datasets>=2.14.0
 openai-whisper>=20230918
 scipy>=1.10.0
+kaleido>=0.2.1

user_study/data/participant_counter.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ 589

user_study/data/user_data.csv ADDED Viewed

	@@ -0,0 +1,19 @@

+participant_id,feedback_timestamp,language,age,llm_experience,attr_q_visual_clarity,attr_q_cognitive_load,attr_q_influencer_plausibility,attr_s1_correct,attr_s2_correct,attr_s3_correct,fv_q_pca_clarity,fv_q_type_attribution_clarity,fv_q_layer_evolution_plausibility,fv_q1_correct,fv_q2_correct,fv_q3_correct,ct_q_main_graph_clarity,ct_q_feature_explorer_usefulness,ct_q_subnetwork_clarity,ct_q1_correct,ct_q2_correct,ct_q3_correct
+0,2025-07-27 19:08:56,en,35_44,intermediate,4,4,5,True,True,True,4,4,4,True,True,True,5,5,5,True,True,True
+1,2025-07-28 00:07:44,en,18_24,intermediate,5,3,5,True,True,True,5,5,3,True,True,True,5,4,5,True,True,True
+2,2025-07-28 21:03:37,en,18_24,novice,3,5,3,True,True,True,5,5,3,True,True,True,4,5,3,True,True,True
+3,2025-07-28 23:50:41,en,18_24,intermediate,4,2,2,True,True,False,4,5,5,True,True,True,2,4,2,True,True,True
+4,2025-07-29 16:08:13,en,18_24,novice,4,4,5,True,True,True,4,5,3,True,True,True,4,5,5,True,True,True
+5,2025-07-29 18:25:51,en,18_24,intermediate,3,4,5,True,True,True,5,5,5,True,True,True,5,5,4,True,True,True
+6,2025-07-29 20:12:29,de,18_24,novice,4,2,3,True,True,True,2,5,4,True,True,True,5,5,5,True,True,True
+7,2025-07-29 21:04:37,de,18_24,novice,2,2,3,True,False,False,4,5,3,True,True,True,4,2,1,True,True,True
+8,2025-07-30 16:35:19,en,under_18,novice,3,3,4,True,True,True,5,5,3,True,True,True,4,3,4,True,True,True
+9,2025-07-31 13:50:49,de,18_24,intermediate,5,3,2,True,True,True,5,5,4,True,True,True,3,3,3,True,True,True
+10,2025-07-31 20:06:51,en,18_24,intermediate,2,3,5,True,True,True,5,5,2,True,True,True,3,4,4,True,True,True
+11,2025-07-31 21:42:36,de,18_24,novice,4,3,5,True,True,True,3,4,3,True,True,True,3,3,3,True,True,True
+12,2025-08-01 13:53:02,en,25_34,expert,2,3,2,True,True,True,5,3,4,True,True,True,4,4,4,True,True,True
+13,2025-08-01 21:22:36,de,18_24,novice,3,3,5,True,True,False,4,5,2,True,True,True,3,4,4,True,True,True
+14,2025-08-02 15:00:41,en,18_24,expert,4,2,5,True,True,True,5,5,4,True,True,True,4,5,4,True,True,True
+15,2025-08-03 19:36:39,en,18_24,intermediate,4,3,5,True,True,True,3,5,5,True,True,True,4,5,5,True,True,True
+16,2025-08-10 14:55:04,de,18_24,novice,4,2,5,True,True,True,5,5,4,True,False,True,4,5,5,True,True,True
+17,2025-08-12 18:17:59,de,55_64,novice,3,5,3,True,True,True,4,5,3,True,True,True,3,4,2,True,True,True

utilities/utils.py CHANGED Viewed

@@ -14,9 +14,10 @@ def set_seed(seed_value=42):
 def init_qwen_api():
     # Set up the API configuration for Qwen.
     api_key = os.environ.get("QWEN_API_KEY")
     if not api_key:
-        # Fallback or warning could go here, but for now we rely on the env var
-        print("Warning: QWEN_API_KEY not set in environment variables.")
     return {
         "api_key": api_key,

 def init_qwen_api():
     # Set up the API configuration for Qwen.
     api_key = os.environ.get("QWEN_API_KEY")
     if not api_key:
+        # Fallback for local testing if env var is missing
+        api_key = "6e3def45d61b0b20547a1fcbab6464d8"
     return {
         "api_key": api_key,

web_app.py CHANGED Viewed

@@ -102,13 +102,6 @@ def main():
         if 'fv_feedback_submitted' not in st.session_state:
             st.session_state.fv_feedback_submitted = False
-        st.set_page_config(
-            page_title="LLM Analysis Suite",
-            page_icon="🧠",
-            layout="wide",
-            initial_sidebar_state="expanded"
-        )
         logo_path = Path(__file__).parent / "LOGO" / "Logo.png"
         if logo_path.exists():
             with open(logo_path, "rb") as logo_file:

         if 'fv_feedback_submitted' not in st.session_state:
             st.session_state.fv_feedback_submitted = False
         logo_path = Path(__file__).parent / "LOGO" / "Logo.png"
         if logo_path.exists():
             with open(logo_path, "rb") as logo_file: