Spaces:

adnlp
/

MulTiCast

Sleeping

App Files Files Community

adnlp commited on Nov 21, 2025

Commit

38518f4

verified ·

1 Parent(s): e9e368b

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -6

app.py CHANGED Viewed

@@ -149,7 +149,7 @@ def color_token(token, weight):
     return f'<span style="background-color: rgba({color[0]}, {color[1]}, {color[2]}, 0.6); padding:2px; border-radius:3px;">{token}</span>'
 def to_html(attention, clean_text_tokens):
-    ca = np.array(attention).mean(axis=0)
     weights = ca / ca.max()
     html_text = " ".join(
         color_token(tok, w) for tok, w in zip(clean_text_tokens, weights)
@@ -305,20 +305,24 @@ def predict(dataset, text, example_index, file, vision_encoder, text_encoder, ts
     # Text Heatmap
     text_tokens = res_json['text_tokens']
-    text_attentions = res_json['text_attentions']
     if text_encoder == "Qwen":
         clean_text_tokens = [t.replace("Ġ", "") for t in text_tokens]
     elif text_encoder == "LLaMA":
         clean_text_tokens = [t.replace("▁", "") for t in text_tokens]
     else:
         pass
     vision_heatmap_html_text = '<div class="gallery-container"><div class="grid-wrap svelte-1atirkn"><div class="grid-container svelte-1atirkn pt-6">'
-    for i in range(0, 12, 3):
         vision_heatmap_html_text += f'<button class="thumbnail-item thumbnail-lg svelte-1atirkn"><div class="svelte-1pijsyv">'
-        vision_heatmap_html_text += to_html(text_attentions[i:i+3], clean_text_tokens)
-        vision_heatmap_html_text += f'</div><div class="caption-label svelte-1atirkn">Heatmap from Layer{i}:{i+3}</div></button>'
     vision_heatmap_html_text += '</div></div></div>'
     # Time Series Heatmap

     return f'<span style="background-color: rgba({color[0]}, {color[1]}, {color[2]}, 0.6); padding:2px; border-radius:3px;">{token}</span>'
 def to_html(attention, clean_text_tokens):
+    ca = attention.sum(axis=0)
     weights = ca / ca.max()
     html_text = " ".join(
         color_token(tok, w) for tok, w in zip(clean_text_tokens, weights)
     # Text Heatmap
     text_tokens = res_json['text_tokens']
+    text_attentions = np.array(res_json['text_attentions'])
     if text_encoder == "Qwen":
         clean_text_tokens = [t.replace("Ġ", "") for t in text_tokens]
+        text_attentions = text_attentions.mean(axis=1) # 28 * seq * seq
+        text_attentions = text_attentions * np.arange(1, 1+text_attentions.shape[1]).reshape((text_attentions.shape[1], 1)) # 28 * seq * seq
+        text_attentions = text_attentions.mean(axis=1)[:, -len(text_tokens):]
     elif text_encoder == "LLaMA":
         clean_text_tokens = [t.replace("▁", "") for t in text_tokens]
+        text_attentions = text_attentions.mean(axis=(1,2))[:, -len(text_tokens):]
     else:
         pass
+    text_attentions_stride = text_attentions.shape[0]//4
     vision_heatmap_html_text = '<div class="gallery-container"><div class="grid-wrap svelte-1atirkn"><div class="grid-container svelte-1atirkn pt-6">'
+    for i in range(4):
         vision_heatmap_html_text += f'<button class="thumbnail-item thumbnail-lg svelte-1atirkn"><div class="svelte-1pijsyv">'
+        vision_heatmap_html_text += to_html(text_attentions[text_attentions_stride*i:text_attentions_stride*(i+1)], clean_text_tokens)
+        vision_heatmap_html_text += f'</div><div class="caption-label svelte-1atirkn">Heatmap from Layer{text_attentions_stride*i}:{text_attentions_stride*(i+1)}</div></button>'
     vision_heatmap_html_text += '</div></div></div>'
     # Time Series Heatmap