Spaces:

cifkao
/

context-probing

Running

App Files Files Community

cifkao commited on May 31, 2023

Commit

ab89a9d

1 Parent(s): c868028

Use caching when possible

Browse files

Files changed (1) hide show

app.py +14 -11

app.py CHANGED Viewed

@@ -90,7 +90,7 @@ generation_mode = st.radio(
     horizontal=True, label_visibility="collapsed"
 ) == "Generation mode"
 st.caption(
-    "In standard mode, we analyze the model's predictions on the input text. "
     "In generation mode, we generate a continuation of the input text (prompt) "
     "and visualize the contributions of different contexts to each generated token."
 )
@@ -128,7 +128,7 @@ with st.empty():
         with st.expander("Generation options", expanded=False):
             generate_kwargs["max_new_tokens"] = st.slider(
                 "Max. number of generated tokens",
-                min_value=8, max_value=min(1024, max_tokens), value=min(128, max_tokens)
             )
             col1, col2, col3, col4 = st.columns(4)
             with col1:
@@ -222,8 +222,7 @@ def get_logits_processor(temperature, top_p, typical_p, repetition_penalty) -> L
 def generate(model, inputs, metric, window_len, max_new_tokens, **kwargs):
     assert metric == "NLL loss"
     start = max(0, inputs["input_ids"].shape[1] - window_len + 1)
-    inputs_window = {k: v[:, start:] for k, v in inputs.items()}
-    del inputs_window["labels"]
     logits_warper = get_logits_processor(**kwargs)
@@ -231,13 +230,16 @@ def generate(model, inputs, metric, window_len, max_new_tokens, **kwargs):
     eos_idx = None
     pbar = st.progress(0)
     max_steps = max_new_tokens + window_len - 1
     for i in range(max_steps):
         pbar.progress(i / max_steps, f"{i}/{max_steps}")
-        inputs_window["attention_mask"] = torch.ones_like(inputs_window["input_ids"], dtype=torch.long)
-        logits_window = model(**inputs_window).logits.squeeze(0)
         logprobs_window = logits_window.log_softmax(dim=-1)
         if eos_idx is None:
-            probs_next = logits_warper(inputs_window["input_ids"], logits_window[[-1]]).softmax(dim=-1)
             next_token = torch.multinomial(probs_next, num_samples=1).item()
             if next_token == tokenizer.eos_token_id or i >= max_new_tokens - 1:
                 eos_idx = i
@@ -245,12 +247,13 @@ def generate(model, inputs, metric, window_len, max_new_tokens, **kwargs):
             next_token = tokenizer.eos_token_id
         new_ids.append(next_token)
-        inputs_window["input_ids"] = torch.cat([inputs_window["input_ids"], torch.tensor([[next_token]])], dim=1)
-        if inputs_window["input_ids"].shape[1] > window_len:
-            inputs_window["input_ids"] = inputs_window["input_ids"][:, 1:]
         if logprobs_window.shape[0] == window_len:
             logprobs.append(
-                logprobs_window[torch.arange(window_len), inputs_window["input_ids"].squeeze(0)]
             )
         if eos_idx is not None and i - eos_idx >= window_len - 1:

     horizontal=True, label_visibility="collapsed"
 ) == "Generation mode"
 st.caption(
+    "In standard mode, we analyze the model's one-step-ahead predictions on the input text. "
     "In generation mode, we generate a continuation of the input text (prompt) "
     "and visualize the contributions of different contexts to each generated token."
 )
         with st.expander("Generation options", expanded=False):
             generate_kwargs["max_new_tokens"] = st.slider(
                 "Max. number of generated tokens",
+                min_value=8, max_value=min(1024, max_tokens), step=8, value=min(128, max_tokens)
             )
             col1, col2, col3, col4 = st.columns(4)
             with col1:
 def generate(model, inputs, metric, window_len, max_new_tokens, **kwargs):
     assert metric == "NLL loss"
     start = max(0, inputs["input_ids"].shape[1] - window_len + 1)
+    input_ids = inputs["input_ids"][:, start:]
     logits_warper = get_logits_processor(**kwargs)
     eos_idx = None
     pbar = st.progress(0)
     max_steps = max_new_tokens + window_len - 1
+    model_kwargs = dict(use_cache=True)
     for i in range(max_steps):
         pbar.progress(i / max_steps, f"{i}/{max_steps}")
+        model_inputs = model.prepare_inputs_for_generation(input_ids, **model_kwargs)
+        model_outputs = model(**model_inputs)
+        model_kwargs = model._update_model_kwargs_for_generation(model_outputs, model_kwargs, is_encoder_decoder=False)
+        logits_window = model_outputs.logits.squeeze(0)
         logprobs_window = logits_window.log_softmax(dim=-1)
         if eos_idx is None:
+            probs_next = logits_warper(input_ids, logits_window[[-1]]).softmax(dim=-1)
             next_token = torch.multinomial(probs_next, num_samples=1).item()
             if next_token == tokenizer.eos_token_id or i >= max_new_tokens - 1:
                 eos_idx = i
             next_token = tokenizer.eos_token_id
         new_ids.append(next_token)
+        input_ids = torch.cat([input_ids, torch.tensor([[next_token]])], dim=1)
+        if input_ids.shape[1] > window_len:
+            input_ids = input_ids[:, 1:]
+            model_kwargs.update(use_cache=False, past_key_values=None)
         if logprobs_window.shape[0] == window_len:
             logprobs.append(
+                logprobs_window[torch.arange(window_len), input_ids.squeeze(0)]
             )
         if eos_idx is not None and i - eos_idx >= window_len - 1: