Delete app
Browse files- app/agents.py +0 -23
- app/api.py +0 -33
- app/app.py +0 -25
- app/db.py +0 -40
- app/model_utils.py +0 -36
app/agents.py
DELETED
|
@@ -1,23 +0,0 @@
|
|
| 1 |
-
# This is a conceptual setup; use LangGraph syntax
|
| 2 |
-
from langgraph import Agent, Workflow
|
| 3 |
-
|
| 4 |
-
# Experiment Agent: runs GPT-2 + activation patching
|
| 5 |
-
class ExperimentAgent(Agent):
|
| 6 |
-
def run(self, prompt):
|
| 7 |
-
from model_utils import generate_text, run_activation_patching
|
| 8 |
-
text = generate_text(prompt)
|
| 9 |
-
activations = run_activation_patching(prompt)
|
| 10 |
-
return {"generated_text": text, "activations": activations}
|
| 11 |
-
|
| 12 |
-
# Explanation Agent: converts traces to NL explanation
|
| 13 |
-
class ExplanationAgent(Agent):
|
| 14 |
-
def run(self, activations):
|
| 15 |
-
# placeholder: summarize important layers
|
| 16 |
-
explanation = "Layer 5 and 7 had the most influence on next token"
|
| 17 |
-
return explanation
|
| 18 |
-
|
| 19 |
-
# Workflow connecting agents
|
| 20 |
-
workflow = Workflow()
|
| 21 |
-
workflow.add_agent("experiment", ExperimentAgent())
|
| 22 |
-
workflow.add_agent("explanation", ExplanationAgent())
|
| 23 |
-
workflow.connect("experiment", "explanation", lambda result: result["activations"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/api.py
DELETED
|
@@ -1,33 +0,0 @@
|
|
| 1 |
-
from fastapi import FastAPI
|
| 2 |
-
from pydantic import BaseModel
|
| 3 |
-
from model_utils import generate_text, run_activation_patching
|
| 4 |
-
from db import init_db, save_experiment, get_experiment
|
| 5 |
-
|
| 6 |
-
app = FastAPI()
|
| 7 |
-
init_db()
|
| 8 |
-
|
| 9 |
-
class ExperimentInput(BaseModel):
|
| 10 |
-
prompt: str
|
| 11 |
-
|
| 12 |
-
@app.post("/generate")
|
| 13 |
-
def generate(input: ExperimentInput):
|
| 14 |
-
text = generate_text(input.prompt)
|
| 15 |
-
activations = run_activation_patching(input.prompt)
|
| 16 |
-
# Placeholder for explanation (use LangGraph later)
|
| 17 |
-
explanation = "Explanation will be generated by agent"
|
| 18 |
-
exp_id = save_experiment(input.prompt, text, str(activations), explanation)
|
| 19 |
-
return {"id": exp_id, "generated_text": text, "activations": activations, "explanation": explanation}
|
| 20 |
-
|
| 21 |
-
@app.get("/results/{id}")
|
| 22 |
-
def get_results(id: int):
|
| 23 |
-
row = get_experiment(id)
|
| 24 |
-
if row:
|
| 25 |
-
return {
|
| 26 |
-
"id": row[0],
|
| 27 |
-
"prompt": row[1],
|
| 28 |
-
"generated_text": row[2],
|
| 29 |
-
"activation_traces": row[3],
|
| 30 |
-
"explanation": row[4],
|
| 31 |
-
"timestamp": row[5]
|
| 32 |
-
}
|
| 33 |
-
return {"error": "Experiment not found"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/app.py
DELETED
|
@@ -1,25 +0,0 @@
|
|
| 1 |
-
import streamlit as st
|
| 2 |
-
import requests
|
| 3 |
-
|
| 4 |
-
API_URL = "http://127.0.0.1:8000"
|
| 5 |
-
|
| 6 |
-
st.title("Mechanistic Analysis Interface")
|
| 7 |
-
|
| 8 |
-
prompt = st.text_area("Enter your sentence:")
|
| 9 |
-
|
| 10 |
-
if st.button("Run Experiment"):
|
| 11 |
-
if prompt:
|
| 12 |
-
with st.spinner("Running GPT-2 + Activation Patching..."):
|
| 13 |
-
response = requests.post(f"{API_URL}/generate", json={"prompt": prompt})
|
| 14 |
-
data = response.json()
|
| 15 |
-
|
| 16 |
-
st.subheader("Generated Text")
|
| 17 |
-
st.write(data["generated_text"])
|
| 18 |
-
|
| 19 |
-
st.subheader("Activation Patching Traces")
|
| 20 |
-
st.write(data["activations"])
|
| 21 |
-
|
| 22 |
-
st.subheader("Explanation")
|
| 23 |
-
st.write(data["explanation"])
|
| 24 |
-
|
| 25 |
-
st.success(f"Experiment saved with ID: {data['id']}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/db.py
DELETED
|
@@ -1,40 +0,0 @@
|
|
| 1 |
-
import sqlite3
|
| 2 |
-
from datetime import datetime
|
| 3 |
-
|
| 4 |
-
DB_PATH = "results.db"
|
| 5 |
-
|
| 6 |
-
def init_db():
|
| 7 |
-
conn = sqlite3.connect(DB_PATH)
|
| 8 |
-
c = conn.cursor()
|
| 9 |
-
c.execute("""
|
| 10 |
-
CREATE TABLE IF NOT EXISTS experiments (
|
| 11 |
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 12 |
-
prompt TEXT,
|
| 13 |
-
generated_text TEXT,
|
| 14 |
-
activation_traces TEXT,
|
| 15 |
-
explanation TEXT,
|
| 16 |
-
timestamp TEXT
|
| 17 |
-
)
|
| 18 |
-
""")
|
| 19 |
-
conn.commit()
|
| 20 |
-
conn.close()
|
| 21 |
-
|
| 22 |
-
def save_experiment(prompt, generated_text, activation_traces, explanation):
|
| 23 |
-
conn = sqlite3.connect(DB_PATH)
|
| 24 |
-
c = conn.cursor()
|
| 25 |
-
c.execute("""
|
| 26 |
-
INSERT INTO experiments (prompt, generated_text, activation_traces, explanation, timestamp)
|
| 27 |
-
VALUES (?, ?, ?, ?, ?)
|
| 28 |
-
""", (prompt, generated_text, activation_traces, explanation, datetime.now().isoformat()))
|
| 29 |
-
conn.commit()
|
| 30 |
-
experiment_id = c.lastrowid
|
| 31 |
-
conn.close()
|
| 32 |
-
return experiment_id
|
| 33 |
-
|
| 34 |
-
def get_experiment(exp_id):
|
| 35 |
-
conn = sqlite3.connect(DB_PATH)
|
| 36 |
-
c = conn.cursor()
|
| 37 |
-
c.execute("SELECT * FROM experiments WHERE id=?", (exp_id,))
|
| 38 |
-
row = c.fetchone()
|
| 39 |
-
conn.close()
|
| 40 |
-
return row
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/model_utils.py
DELETED
|
@@ -1,36 +0,0 @@
|
|
| 1 |
-
from transformers import GPT2LMHeadModel, GPT2Tokenizer
|
| 2 |
-
import torch
|
| 3 |
-
from transformer_lens import HookedTransformer
|
| 4 |
-
|
| 5 |
-
def load_gpt2():
|
| 6 |
-
model = GPT2LMHeadModel.from_pretrained("gpt2")
|
| 7 |
-
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
| 8 |
-
return model, tokenizer
|
| 9 |
-
|
| 10 |
-
def generate_text(prompt, max_length=50):
|
| 11 |
-
model, tokenizer = load_gpt2()
|
| 12 |
-
inputs = tokenizer(prompt, return_tensors="pt")
|
| 13 |
-
output = model.generate(**inputs, max_length=max_length)
|
| 14 |
-
text = tokenizer.decode(output[0], skip_special_tokens=True)
|
| 15 |
-
return text
|
| 16 |
-
|
| 17 |
-
# Example: HookedTransformer for activation patching
|
| 18 |
-
def run_activation_patching(prompt):
|
| 19 |
-
model = HookedTransformer.from_pretrained("gpt2-small")
|
| 20 |
-
tokens = model.to_tokens(prompt)
|
| 21 |
-
|
| 22 |
-
# Example: capture activations per layer
|
| 23 |
-
activations = {}
|
| 24 |
-
def hook_fn_hook(value, hook):
|
| 25 |
-
activations[hook.name] = value.detach().cpu().numpy()
|
| 26 |
-
|
| 27 |
-
hooks = []
|
| 28 |
-
for i in range(model.cfg.n_layers):
|
| 29 |
-
hooks.append(model.hook.add_hook(f"blocks.{i}.mlp.hook_post", hook_fn_hook))
|
| 30 |
-
|
| 31 |
-
logits = model(tokens)
|
| 32 |
-
# Remove hooks
|
| 33 |
-
for h in hooks:
|
| 34 |
-
h.remove()
|
| 35 |
-
|
| 36 |
-
return activations
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|