Spaces:

shftan
/

llm-thinking

Runtime error

App Files Files Community

shftan commited on Sep 19, 2025

Commit

af4a860

1 Parent(s): 685c726

Clean up code

Browse files

Files changed (4) hide show

.gitignore +1 -0
__pycache__/utils.cpython-312.pyc +0 -0
app.py +127 -7
utils.py +80 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ *.pyc

__pycache__/utils.cpython-312.pyc ADDED Viewed

Binary file (4.32 kB). View file

app.py CHANGED Viewed

@@ -1,14 +1,134 @@
 import gradio as gr
 import spaces
 import torch
-zero = torch.Tensor([0]).cuda()
-print(zero.device) # <-- 'cpu' 🤔
 @spaces.GPU
-def greet(n):
-    print(zero.device) # <-- 'cuda:0' 🤗
-    return f"Hello {zero + n} Tensor"
-demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text())
-demo.launch()

 import gradio as gr
 import spaces
+from huggingface_hub import hf_hub_download
 import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+import pyvene as pv
+from utils import get_tokens, select_concepts, get_concepts_dictionary, get_response, plot_tokens_with_highlights
+#zero = torch.Tensor([0]).cuda()
+#print(zero.device) # <-- 'cpu'
+#@spaces.GPU
+#def greet(n):
+#    print(zero.device) # <-- 'cuda:0'
+#    return f"Hello {zero + n} Tensor"
+# Set model, interpreter, dictionary choices
+model_name = "google/gemma-2-2b-it"
+interpreter_name = "pyvene/gemma-reft-r1-2b-it-res"
+interpreter_path = "l20/weight.pt"
+interpreter_component = "model.layers[20].output"
+dictionary_url = "https://huggingface.co/pyvene/gemma-reft-r1-2b-it-res/raw/main/l20/metadata.jsonl"
+# Interpreter class
+class Encoder(pv.CollectIntervention):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs, keep_last_dim=True)
+        self.proj = torch.nn.Linear(
+                self.embed_dim, kwargs["latent_dim"], bias=False)
+    def forward(self, base, source=None, subspaces=None):
+        return torch.relu(self.proj(base))
+# Load tokenizer and model
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(model_name, device_map='auto').to('cuda')
+# Load fast model inference pipeline
+pipe = pipeline(
+    task="text-generation",
+    model=model_name,
+    use_fast=True
+)
+path_to_params = hf_hub_download(
+    repo_id=interpreter_name,
+    filename=interpreter_path,
+    force_download=False,
+)
+params = torch.load(path_to_params)
+encoder = Encoder(embed_dim=params.shape[0], latent_dim=params.shape[1]).cuda()
+encoder.proj.weight.data = params.float()
+pv_model = pv.IntervenableModel({
+    "component": interpreter_component,
+    "intervention": encoder}, model=model).cuda()
+# Load dictionary
+all_concepts = get_concepts_dictionary(dictionary_url)
 @spaces.GPU
+# Function to process user input to the app
+def process_user_input(prompt, concept):
+    # Check if prompt or concept are empty
+    if not prompt or not concept:
+        return f"<h3>Please provide both a prompt and a concept</h3>"
+    # Convert prompt to tokens
+    tokens, token_ids = get_tokens(tokenizer, prompt)
+    # Get concept IDs and names
+    concept_ids, concept_df = select_concepts(all_concepts, concept)
+    if len(concept_ids) == 0:
+        concepts_html = f"<h3>No relevant concepts found for '{concept}' in LLM thoughts dictionary. Try another concept.</h3>"
+    else:
+        concepts_html = f"<h3>using the following in the LLM thoughts dictionary relevant to '{concept}' ({len(concept_ids)} out of {len(all_concepts)} concepts):</h3>"
+        styled_table = concept_df.style.hide(axis="index").set_properties(**{'background-color': '#f0f0f0', 'color': 'black', 'border-color': 'white'}).to_html()
+        concepts_html += f'<div style="height: 200px; overflow-y: scroll;">{styled_table}</div>'
+    # Get activations
+    if len(concept_ids) > 0:
+        acts = pv_model.forward({"input_ids": token_ids}, return_dict=True).collected_activations[0]
+        vals = acts[0, :, concept_ids].sum(-1).cpu()
+        # Get highlighted tokens
+        highlighted_tokens_html = plot_tokens_with_highlights(tokens, vals, concept)
+    else:
+        highlighted_tokens_html = ""
+    # Get LLM response
+    response = get_response(pipe, prompt)
+    response_html = f"""<h3>LLM response to your prompt:</h3>
+        {response}
+    """
+    # Write documentation
+    documentation_html = f"""<h3>How does this work?</h3>
+        <ul>
+        <li>The LLM model is an instruction-tuned model, <a href="https://huggingface.co/google/gemma-2-2b-it">Google gemma-2-2b-it</a>.
+        <li>The LLM interpreter, <a href="https://huggingface.co/pyvene/gemma-reft-r1-2b-it-res">gemma-reft-r1-2b-it-res</a> (not from Google) is trained on the LLM model's layer 20's residual stream. The choices of layer 20 and the residual stream are arbitrary.
+        <li>The LLM interpreter decomposes the layer 20 residual stream activations into a <a href="https://huggingface.co/pyvene/gemma-reft-r1-2b-it-res/raw/main/l20/metadata.jsonl">dictionary</a> of {len(all_concepts)} human-understandable concepts. This dictionary is not comprehensive; it is possible for a concept you input to not be present in this dictionary.
+        <li>Each token is highlighted in terms of how much information about a given concept is carried in each token.
+        <li>Do you find the results surprising? Any feedback? Any ideas on how I can make this app more useful? Please let me know! Contact: Sarah Tan.
+        </ul>
+    """
+    # Combine HTMLs
+    output_html = highlighted_tokens_html + concepts_html + "<p>&nbsp;</p>" + response_html + "<p>&nbsp;</p>" + documentation_html
+    return output_html
+if __name__ == "__main__":
+    description_text = """
+    ## Does an LLM Think Like You?
+    Input a prompt and a concept that you think is most relevant for your prompt. See how much (if at all) the LLM uses that concept when processing your prompt.
+    Examples:
+    - **Prompt**: What is 2+2? **Concept**: math
+    - **Prompt**: I really like anchovies on pizza but I know a lot of people don't. **Concept**: food
+    """
+    with gr.Blocks() as demo:
+        gr.Markdown(description_text)
+        with gr.Row():
+            prompt_input = gr.Textbox(label="Enter a prompt", value="I really like anchovies on pizza but I know a lot of people don't.")
+            concept_input = gr.Textbox(label="Enter a concept that you think is most relevant for your prompt", value="food")
+        process_button = gr.Button("See if an LLM thinks like you!")
+        output_html = gr.HTML()
+        process_button.click(
+            process_user_input,
+            inputs=[prompt_input, concept_input],
+            outputs=output_html
+        )
+    demo.launch()

utils.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import matplotlib.pyplot as plt
+import matplotlib.cm as cm
+import matplotlib.colors as clrs
+import requests
+import json
+import pandas as pd
+import torch
+# Function to get tokens given text
+def get_tokens(tokenizer, text):
+  token_ids = tokenizer.encode(text, return_tensors="pt", add_special_tokens=False).to("cuda")
+  tokens = tokenizer.convert_ids_to_tokens(token_ids[0])
+  return tokens, token_ids
+# Function to apply chat template to prompt
+def decorate_prompt(tokenizer, prompt):
+  chat = [
+    {"role": "user", "content": prompt},
+    {"role": "assistant", "content": ""},
+  ]
+  text = tokenizer.apply_chat_template(chat, tokenize=False)
+  token_ids = tokenizer.encode(text, return_tensors="pt", add_special_tokens=False).to("cuda")
+  return token_ids
+# Function to get response to prompt
+def get_response(model_pipe, prompt):
+   response = model_pipe(prompt)[0]['generated_text']
+   return response
+# Function to highlight tokens based on given values
+def plot_tokens_with_highlights(tokens, values, concept, cmap_name='Oranges', vmin=None, vmax=None):
+    if len(tokens) != len(values):
+        raise ValueError("The number of tokens and values must be the same.")
+    # Set color map
+    cmap = cm.get_cmap(cmap_name)
+    norm = clrs.Normalize(vmin=vmin if vmin is not None else values.detach().min(),
+                          vmax=vmax if vmax is not None else values.detach().max())
+    html_output = f"<h3>How much information about the concept '{concept}' is carried in each token:</h3>"
+    for token, value in zip(tokens, values.detach().numpy()):
+        rgba_color = cmap(norm(value))
+        hex_color = '#%02x%02x%02x' % (int(rgba_color[0]*255), int(rgba_color[1]*255), int(rgba_color[2]*255))
+        html_output += f'<span style="background-color: {hex_color};" title="{value:.4f}">{token}</span> '
+    return html_output
+# Function to get concepts dictionary
+def get_concepts_dictionary(dictionary_url):
+    response = requests.get(dictionary_url, stream=True)
+    response.raise_for_status()
+    data_dict = {}
+    for line in response.iter_lines(decode_unicode=True):
+        if line:
+            obj = json.loads(line)
+            concept_id = obj.get("concept_id")
+            concept = obj.get("concept")
+            if concept_id and concept:
+                data_dict[concept_id] = concept.capitalize()
+    return data_dict
+# Function to get matching concepts
+def select_concepts(all_concepts, desired_concept):
+  concept_ids = []
+  for k, v in all_concepts.items():
+    if desired_concept.lower() in v.lower():
+      concept_ids.append(k)
+  concept_data = []
+  for concept_id in concept_ids:
+      concept_name = all_concepts.get(concept_id, "Unknown Concept")
+      concept_data.append({"Concept ID": concept_id, "Concept Name": concept_name})
+  concept_df = pd.DataFrame(concept_data)
+  return torch.tensor(concept_ids), concept_df