Spaces:

cifkao
/

context-probing

Running

cifkao commited on Jun 11, 2023

Commit

106dd6f

1 Parent(s): e4bf282

Add/improve help strings

Files changed (1) hide show

app.py CHANGED Viewed

@@ -36,7 +36,7 @@ generation_mode = st.radio(
 st.caption(
     "In basic mode, we analyze the model's one-step-ahead predictions on the input text. "
     "In generation mode, we generate a continuation of the input text (prompt) "
-    "and visualize the contributions of different contexts to each generated token."
 )
 model_name = st.selectbox(
@@ -50,7 +50,15 @@ model_name = st.selectbox(
     ]
 )
 metric_name = st.radio(
-    "Metric", (["KL divergence"] if not generation_mode else []) + ["NLL loss"], index=0, horizontal=True
 )
 tokenizer = st.cache_resource(AutoTokenizer.from_pretrained, show_spinner=False)(model_name, use_fast=False)
@@ -68,7 +76,9 @@ window_len_options = [
 window_len = st.select_slider(
     r"Window size ($c_\text{max}$)",
     options=window_len_options,
-    value=min(128, window_len_options[-1])
 )
 # Now figure out how many tokens we are allowed to use:
 # window_len * (num_tokens + window_len) * vocab_size <= MAX_MEM

 st.caption(
     "In basic mode, we analyze the model's one-step-ahead predictions on the input text. "
     "In generation mode, we generate a continuation of the input text (prompt) "
+    "and analyze the model's predictions influencing the generated tokens."
 )
 model_name = st.selectbox(
     ]
 )
 metric_name = st.radio(
+    "Metric",
+    (["KL divergence"] if not generation_mode else []) + ["NLL loss"],
+    index=0,
+    horizontal=True,
+    help="**KL divergence** is computed between the predictions with the reduced context "
+         "(corresponding to the highlighted token) and the predictions with the full context "
+         "($c_\\text{max}$ tokens).  \n"
+         "**NLL loss** is the negative log-likelihood loss (a.k.a. cross entropy) for the target "
+         "token."
 )
 tokenizer = st.cache_resource(AutoTokenizer.from_pretrained, show_spinner=False)(model_name, use_fast=False)
 window_len = st.select_slider(
     r"Window size ($c_\text{max}$)",
     options=window_len_options,
+    value=min(128, window_len_options[-1]),
+    help="The maximum context length $c_\text{max}$ for which we compute the scores. Smaller "
+         "windows are less computationally intensive, allowing for longer inputs."
 )
 # Now figure out how many tokens we are allowed to use:
 # window_len * (num_tokens + window_len) * vocab_size <= MAX_MEM