Spaces:

ExplainabiliyForAATeam
/

explainability-tool-for-aa

Running

App Files Files Community

Anisha Bhatnagar commited on Sep 9, 2025

Commit

3ad08b5

1 Parent(s): 9a097e7

fixed bug in region computation for all task authors and reduced logging

Browse files

Files changed (3) hide show

app.py +1 -0
utils/interp_space_utils.py +17 -17
utils/llm_feat_utils.py +7 -2

app.py CHANGED Viewed

@@ -22,6 +22,7 @@ def load_config(path="config/config.yaml"):
         return yaml.safe_load(f)
 # A comment to trigger change in spaces
 cfg = load_config()

         return yaml.safe_load(f)
 # A comment to trigger change in spaces
+# comment 2
 cfg = load_config()

utils/interp_space_utils.py CHANGED Viewed

@@ -697,19 +697,19 @@ def compute_clusters_g2v_representation(
     filtered_features = []
     for feature, score, z_score in top_g2v_feats:
         # DEBUG: Print what we're checking for this feature
-        print(f"[DEBUG] Checking feature: {feature}")
-        print(f"[DEBUG] Feature score: {score}, z_score: {z_score}")
         # Check if the feature has a non-zero value in all of the selected authors
         feature_presence = []
         for i, author_g2v_feats in enumerate(selected_authors_g2v_data):
             feature_value = author_g2v_feats.get(feature, 0)
             feature_presence.append(feature_value)
-            print(f"[DEBUG] Author {i} has feature '{feature}' = {feature_value}")
-        print(f"[DEBUG] All feature values: {feature_presence}")
-        print(f"[DEBUG] All values > 0? {[v > 0 for v in feature_presence]}")
-        print(f"[DEBUG] All values > 0? {all(v > 0 for v in feature_presence)}")
         # First check: feature must be present in Gram2Vec vectors
         vector_present = all(author_g2v_feats.get(feature, 0) > 0 for author_g2v_feats in selected_authors_g2v_data)
@@ -727,11 +727,11 @@ def compute_clusters_g2v_representation(
                     spans = find_feature_spans(doc_text, feature)
                     if not spans:  # No spans found in this document
-                        print(f"[DEBUG] ✗ Feature '{feature}' not found in document {i} of selected author")
                         text_present = False
                         break
-                    else:
-                        print(f"[DEBUG] ✓ Feature '{feature}' found in document {i} with {len(spans)} spans")
             except Exception as e:
                 print(f"[WARNING] Error checking text presence for feature '{feature}': {e}")
                 # Fall back to vector-based filtering if text checking fails
@@ -740,13 +740,13 @@ def compute_clusters_g2v_representation(
         # Feature must pass BOTH checks
         if vector_present and text_present:
             filtered_features.append((feature, score, z_score))
-            print(f"[DEBUG] ✓ Feature '{feature}' PASSED both vector and text checks")
-        else:
-            if not vector_present:
-                print(f"[DEBUG] ✗ Feature '{feature}' FAILED vector check")
-            if not text_present:
-                print(f"[DEBUG] ✗ Feature '{feature}' FAILED text check")
-            print(f"[DEBUG] ✗ Feature '{feature}' FAILED the filter")
     print('Filtered G2V features: ', [(f[0], f[2]) for f in filtered_features])  # Print feature names and z-scores
@@ -947,7 +947,7 @@ def compute_precomputed_regions(bg_proj, bg_ids, q_proj, c_proj, model_name, n_n
     # Region 11: Centroid of all task authors (mystery + 3 candidates)
     task_centroid = np.mean(np.vstack([q_proj, c_proj]), axis=0)
     regions["All Task Authors Centroid"] = get_region_around_point(
-        task_centroid, "All Task Authors"
     )
     def serialize_numpy_dtypes(obj):

     filtered_features = []
     for feature, score, z_score in top_g2v_feats:
         # DEBUG: Print what we're checking for this feature
+        # print(f"[DEBUG] Checking feature: {feature}")
+        # print(f"[DEBUG] Feature score: {score}, z_score: {z_score}")
         # Check if the feature has a non-zero value in all of the selected authors
         feature_presence = []
         for i, author_g2v_feats in enumerate(selected_authors_g2v_data):
             feature_value = author_g2v_feats.get(feature, 0)
             feature_presence.append(feature_value)
+            # print(f"[DEBUG] Author {i} has feature '{feature}' = {feature_value}")
+        # print(f"[DEBUG] All feature values: {feature_presence}")
+        # print(f"[DEBUG] All values > 0? {[v > 0 for v in feature_presence]}")
+        # print(f"[DEBUG] All values > 0? {all(v > 0 for v in feature_presence)}")
         # First check: feature must be present in Gram2Vec vectors
         vector_present = all(author_g2v_feats.get(feature, 0) > 0 for author_g2v_feats in selected_authors_g2v_data)
                     spans = find_feature_spans(doc_text, feature)
                     if not spans:  # No spans found in this document
+                        # print(f"[DEBUG] ✗ Feature '{feature}' not found in document {i} of selected author")
                         text_present = False
                         break
+                    # else:
+                        # print(f"[DEBUG] ✓ Feature '{feature}' found in document {i} with {len(spans)} spans")
             except Exception as e:
                 print(f"[WARNING] Error checking text presence for feature '{feature}': {e}")
                 # Fall back to vector-based filtering if text checking fails
         # Feature must pass BOTH checks
         if vector_present and text_present:
             filtered_features.append((feature, score, z_score))
+            # print(f"[DEBUG] ✓ Feature '{feature}' PASSED both vector and text checks")
+        # else:
+        #     if not vector_present:
+        #         # print(f"[DEBUG] ✗ Feature '{feature}' FAILED vector check")
+        #     if not text_present:
+        #         # print(f"[DEBUG] ✗ Feature '{feature}' FAILED text check")
+        #     # print(f"[DEBUG] ✗ Feature '{feature}' FAILED the filter")
     print('Filtered G2V features: ', [(f[0], f[2]) for f in filtered_features])  # Print feature names and z-scores
     # Region 11: Centroid of all task authors (mystery + 3 candidates)
     task_centroid = np.mean(np.vstack([q_proj, c_proj]), axis=0)
     regions["All Task Authors Centroid"] = get_region_around_point(
+        task_centroid, "All Task Authors", include_points=np.vstack([q_proj, c_proj])
     )
     def serialize_numpy_dtypes(obj):

utils/llm_feat_utils.py CHANGED Viewed

@@ -100,6 +100,8 @@ def generate_feature_spans_cached(client, text: str, features: list[str], role:
         cache = {}
     result: dict[str, list[str]] = {}
     missing_feats: list[str] = []
     for feat in features:
         if feat == "None":
@@ -108,12 +110,15 @@ def generate_feature_spans_cached(client, text: str, features: list[str], role:
         h = _feat_hash(feat, text)
         if h in cache:
-            print(f"Found feature: {feat}")
             result[feat] = cache[h]["spans"]
         else:
-            print(f"Missing feature: {feat}")
             missing_feats.append(feat)
     if missing_feats:
         mapping = generate_feature_spans_with_retries(client, text, missing_feats)

         cache = {}
     result: dict[str, list[str]] = {}
     missing_feats: list[str] = []
+    missing_feats_count = 0
+    found_feats_count = 0
     for feat in features:
         if feat == "None":
         h = _feat_hash(feat, text)
         if h in cache:
+            # print(f"Found feature: {feat}")
+            found_feats_count += 1
             result[feat] = cache[h]["spans"]
         else:
+            # print(f"Missing feature: {feat}")
+            missing_feats_count += 1
             missing_feats.append(feat)
+    print(f"Found {found_feats_count} features in cache, {missing_feats_count} missing")
     if missing_feats:
         mapping = generate_feature_spans_with_retries(client, text, missing_feats)