Hardcode study HTML to reduce API load
Browse files- main.py +140 -6
- static/index.html +4 -25
main.py
CHANGED
|
@@ -6,6 +6,8 @@ from pydantic import BaseModel
|
|
| 6 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 7 |
import uvicorn
|
| 8 |
import os
|
|
|
|
|
|
|
| 9 |
|
| 10 |
app = FastAPI()
|
| 11 |
|
|
@@ -19,6 +21,123 @@ app.add_middleware(
|
|
| 19 |
|
| 20 |
app.mount("/static", StaticFiles(directory="static"), name="static")
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
model_id = "google/gemma-2b"
|
| 23 |
|
| 24 |
# Load the model and tokenizer globally.
|
|
@@ -43,14 +162,21 @@ except Exception as e:
|
|
| 43 |
|
| 44 |
class TextRequest(BaseModel):
|
| 45 |
text: str
|
|
|
|
|
|
|
| 46 |
|
| 47 |
@app.post("/analyze")
|
| 48 |
async def analyze_text(request: TextRequest):
|
| 49 |
text = request.text
|
|
|
|
|
|
|
| 50 |
if not text.strip():
|
| 51 |
return {"tokens": [], "scores": []}
|
| 52 |
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
with torch.no_grad():
|
| 56 |
# Ensure we ask the model to output attentions explicitly
|
|
@@ -61,12 +187,20 @@ async def analyze_text(request: TextRequest):
|
|
| 61 |
print("Warning: Model did not return attentions.")
|
| 62 |
return {"words": []}
|
| 63 |
|
| 64 |
-
|
| 65 |
-
# Get the last layer's attention
|
| 66 |
-
attentions = outputs.attentions[-1]
|
| 67 |
|
| 68 |
-
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
# Calculate importance: sum of attention each token *receives* from the sequence
|
| 72 |
importance = avg_attention.sum(dim=0).cpu().float().numpy()
|
|
|
|
| 6 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 7 |
import uvicorn
|
| 8 |
import os
|
| 9 |
+
import sqlite3
|
| 10 |
+
from typing import List, Optional
|
| 11 |
|
| 12 |
app = FastAPI()
|
| 13 |
|
|
|
|
| 21 |
|
| 22 |
app.mount("/static", StaticFiles(directory="static"), name="static")
|
| 23 |
|
| 24 |
+
# --- SQLite Database Setup ---
|
| 25 |
+
DB_FILE = "study.db"
|
| 26 |
+
|
| 27 |
+
def init_db():
|
| 28 |
+
conn = sqlite3.connect(DB_FILE)
|
| 29 |
+
c = conn.cursor()
|
| 30 |
+
c.execute('''
|
| 31 |
+
CREATE TABLE IF NOT EXISTS study_results (
|
| 32 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 33 |
+
user_id TEXT,
|
| 34 |
+
text_id INTEGER,
|
| 35 |
+
condition TEXT, -- "plain" or "flowread"
|
| 36 |
+
reading_time_ms INTEGER,
|
| 37 |
+
score INTEGER,
|
| 38 |
+
total_questions INTEGER,
|
| 39 |
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
| 40 |
+
)
|
| 41 |
+
''')
|
| 42 |
+
conn.commit()
|
| 43 |
+
conn.close()
|
| 44 |
+
|
| 45 |
+
init_db()
|
| 46 |
+
|
| 47 |
+
# --- Study Content ---
|
| 48 |
+
STUDY_TEXTS = [
|
| 49 |
+
{
|
| 50 |
+
"id": 1,
|
| 51 |
+
"topic": "Science",
|
| 52 |
+
"text": "The human brain is a marvel of biological engineering, containing approximately 86 billion neurons interconnected by trillions of synapses. These neural networks are responsible for everything from basic autonomic functions, like breathing and heart rate, to complex cognitive processes such as memory, emotion, and problem-solving. Neuroplasticity, the brain's ability to reorganize itself by forming new neural connections throughout life, allows humans to learn new skills, recover from injuries, and adapt to changing environments. This extraordinary adaptability is what makes our species so resilient and capable of continuous intellectual growth.",
|
| 53 |
+
"flowread_html": '<span class="token">The</span><span class="token"> human</span><span class="token highlighted"> brain</span><span class="token"> is</span><span class="token"> a</span><span class="token"> marvel</span><span class="token"> of</span><span class="token"> biological</span><span class="token"> engineering</span><span class="token">,</span><span class="token"> containing</span><span class="token"> approximately</span><span class="token"> </span><span class="token">8</span><span class="token">6</span><span class="token"> billion</span><span class="token highlighted"> neurons</span><span class="token"> interconnected</span><span class="token"> by</span><span class="token"> tri</span><span class="token">lli</span><span class="token">ons</span><span class="token"> of</span><span class="token"> synapses</span><span class="token highlighted">.</span><span class="token"> These</span><span class="token"> neural</span><span class="token"> networks</span><span class="token"> are</span><span class="token"> responsible</span><span class="token"> for</span><span class="token"> everything</span><span class="token"> from</span><span class="token"> basic</span><span class="token"> autonomic</span><span class="token"> functions</span><span class="token">,</span><span class="token"> like</span><span class="token"> breathing</span><span class="token"> and</span><span class="token"> heart</span><span class="token"> rate</span><span class="token">,</span><span class="token"> to</span><span class="token"> complex</span><span class="token"> cognitive</span><span class="token"> processes</span><span class="token"> such</span><span class="token"> as</span><span class="token"> memory</span><span class="token">,</span><span class="token"> emotion</span><span class="token">,</span><span class="token"> and</span><span class="token"> problem</span><span class="token">-</span><span class="token">solving</span><span class="token highlighted">.</span><span class="token"> Neurop</span><span class="token highlighted">lastic</span><span class="token highlighted">ity</span><span class="token">,</span><span class="token"> the</span><span class="token"> brain</span><span class="token">\'</span><span class="token">s</span><span class="token"> ability</span><span class="token"> to</span><span class="token"> reorgan</span><span class="token">ize</span><span class="token"> itself</span><span class="token"> by</span><span class="token"> forming</span><span class="token"> new</span><span class="token"> neural</span><span class="token"> connections</span><span class="token"> throughout</span><span class="token"> life</span><span class="token">,</span><span class="token"> allows</span><span class="token"> humans</span><span class="token"> to</span><span class="token"> learn</span><span class="token"> new</span><span class="token"> skills</span><span class="token">,</span><span class="token"> recover</span><span class="token"> from</span><span class="token"> injuries</span><span class="token">,</span><span class="token"> and</span><span class="token"> adapt</span><span class="token"> to</span><span class="token"> changing</span><span class="token"> environments</span><span class="token">.</span><span class="token"> This</span><span class="token"> extraordinary</span><span class="token"> adaptability</span><span class="token"> is</span><span class="token"> what</span><span class="token"> makes</span><span class="token"> our</span><span class="token"> species</span><span class="token"> so</span><span class="token"> resilient</span><span class="token"> and</span><span class="token"> capable</span><span class="token"> of</span><span class="token"> continuous</span><span class="token"> intellectual</span><span class="token"> growth</span><span class="token">.</span>',
|
| 54 |
+
"questions": [
|
| 55 |
+
{
|
| 56 |
+
"question": "Approximately how many neurons are in the human brain?",
|
| 57 |
+
"options": ["86 million", "86 billion", "100 trillion", "50 billion"],
|
| 58 |
+
"correct": 1
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"question": "What is the term for the brain's ability to reorganize itself?",
|
| 62 |
+
"options": ["Synaptic generation", "Neurogenesis", "Neuroplasticity", "Cognitive adaptation"],
|
| 63 |
+
"correct": 2
|
| 64 |
+
}
|
| 65 |
+
]
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"id": 2,
|
| 69 |
+
"topic": "History",
|
| 70 |
+
"text": "The Industrial Revolution, which began in Britain in the late 18th century, marked a profound turning point in human history. It initiated the transition from agrarian, handicraft economies to industry and machine manufacturing. The invention of the steam engine, pioneered by figures like James Watt, dramatically increased the efficiency of factories and transportation, revolutionizing the textile industry and leading to the expansion of railways. This era brought about unprecedented economic growth and urbanization, fundamentally altering social structures and paving the way for the modern capitalist system, despite also causing significant social inequalities and poor working conditions initially.",
|
| 71 |
+
"flowread_html": '<span class="token">The</span><span class="token"> Industrial</span><span class="token highlighted"> Revolution</span><span class="token">,</span><span class="token"> which</span><span class="token"> began</span><span class="token"> in</span><span class="token"> Britain</span><span class="token"> in</span><span class="token"> the</span><span class="token"> late</span><span class="token"> </span><span class="token">1</span><span class="token">8</span><span class="token">th</span><span class="token"> century</span><span class="token">,</span><span class="token"> marked</span><span class="token"> a</span><span class="token"> profound</span><span class="token"> turning</span><span class="token"> point</span><span class="token"> in</span><span class="token"> human</span><span class="token"> history</span><span class="token">.</span><span class="token"> It</span><span class="token"> initiated</span><span class="token"> the</span><span class="token"> transition</span><span class="token"> from</span><span class="token"> agrarian</span><span class="token">,</span><span class="token"> handic</span><span class="token">raft</span><span class="token"> economies</span><span class="token"> to</span><span class="token"> industry</span><span class="token"> and</span><span class="token"> machine</span><span class="token"> manufacturing</span><span class="token">.</span><span class="token"> The</span><span class="token"> invention</span><span class="token"> of</span><span class="token"> the</span><span class="token"> steam</span><span class="token"> engine</span><span class="token">,</span><span class="token"> pioneered</span><span class="token"> by</span><span class="token"> figures</span><span class="token"> like</span><span class="token"> James</span><span class="token"> Watt</span><span class="token">,</span><span class="token"> dramatically</span><span class="token"> increased</span><span class="token"> the</span><span class="token"> efficiency</span><span class="token"> of</span><span class="token"> factories</span><span class="token"> and</span><span class="token"> transportation</span><span class="token">,</span><span class="token"> revolution</span><span class="token">izing</span><span class="token"> the</span><span class="token"> textile</span><span class="token"> industry</span><span class="token"> and</span><span class="token"> leading</span><span class="token"> to</span><span class="token"> the</span><span class="token"> expansion</span><span class="token"> of</span><span class="token"> railways</span><span class="token">.</span><span class="token"> This</span><span class="token"> era</span><span class="token"> brought</span><span class="token"> about</span><span class="token"> unprecedented</span><span class="token"> economic</span><span class="token"> growth</span><span class="token"> and</span><span class="token"> urbanization</span><span class="token">,</span><span class="token"> fundamentally</span><span class="token"> altering</span><span class="token"> social</span><span class="token"> structures</span><span class="token"> and</span><span class="token"> paving</span><span class="token"> the</span><span class="token"> way</span><span class="token"> for</span><span class="token"> the</span><span class="token"> modern</span><span class="token"> capitalist</span><span class="token"> system</span><span class="token">,</span><span class="token"> despite</span><span class="token"> also</span><span class="token"> causing</span><span class="token"> significant</span><span class="token"> social</span><span class="token"> inequalities</span><span class="token"> and</span><span class="token"> poor</span><span class="token"> working</span><span class="token"> conditions</span><span class="token"> initially</span><span class="token">.</span>',
|
| 72 |
+
"questions": [
|
| 73 |
+
{
|
| 74 |
+
"question": "Where did the Industrial Revolution begin?",
|
| 75 |
+
"options": ["United States", "France", "Germany", "Britain"],
|
| 76 |
+
"correct": 3
|
| 77 |
+
},
|
| 78 |
+
{
|
| 79 |
+
"question": "Which invention dramatically increased factory efficiency?",
|
| 80 |
+
"options": ["The cotton gin", "The telegraph", "The steam engine", "The assembly line"],
|
| 81 |
+
"correct": 2
|
| 82 |
+
}
|
| 83 |
+
]
|
| 84 |
+
}
|
| 85 |
+
]
|
| 86 |
+
|
| 87 |
+
# --- Study API Endpoints ---
|
| 88 |
+
@app.get("/api/study/texts")
|
| 89 |
+
def get_study_texts():
|
| 90 |
+
return {"texts": STUDY_TEXTS}
|
| 91 |
+
|
| 92 |
+
class StudySubmission(BaseModel):
|
| 93 |
+
user_id: str
|
| 94 |
+
text_id: int
|
| 95 |
+
condition: str
|
| 96 |
+
reading_time_ms: int
|
| 97 |
+
score: int
|
| 98 |
+
total_questions: int
|
| 99 |
+
|
| 100 |
+
@app.post("/api/study/submit")
|
| 101 |
+
def submit_study_result(submission: StudySubmission):
|
| 102 |
+
conn = sqlite3.connect(DB_FILE)
|
| 103 |
+
c = conn.cursor()
|
| 104 |
+
c.execute(
|
| 105 |
+
"INSERT INTO study_results (user_id, text_id, condition, reading_time_ms, score, total_questions) VALUES (?, ?, ?, ?, ?, ?)",
|
| 106 |
+
(submission.user_id, submission.text_id, submission.condition, submission.reading_time_ms, submission.score, submission.total_questions)
|
| 107 |
+
)
|
| 108 |
+
conn.commit()
|
| 109 |
+
conn.close()
|
| 110 |
+
return {"status": "success"}
|
| 111 |
+
|
| 112 |
+
@app.get("/api/study/stats")
|
| 113 |
+
def get_study_stats():
|
| 114 |
+
conn = sqlite3.connect(DB_FILE)
|
| 115 |
+
c = conn.cursor()
|
| 116 |
+
|
| 117 |
+
# Calculate stats for plain
|
| 118 |
+
c.execute("SELECT AVG(reading_time_ms), AVG(CAST(score AS FLOAT) / total_questions) * 100, COUNT(*) FROM study_results WHERE condition = 'plain'")
|
| 119 |
+
plain_stats = c.fetchone()
|
| 120 |
+
|
| 121 |
+
# Calculate stats for flowread
|
| 122 |
+
c.execute("SELECT AVG(reading_time_ms), AVG(CAST(score AS FLOAT) / total_questions) * 100, COUNT(*) FROM study_results WHERE condition = 'flowread'")
|
| 123 |
+
flowread_stats = c.fetchone()
|
| 124 |
+
|
| 125 |
+
conn.close()
|
| 126 |
+
|
| 127 |
+
return {
|
| 128 |
+
"plain": {
|
| 129 |
+
"avg_reading_time_ms": plain_stats[0] or 0,
|
| 130 |
+
"avg_accuracy_percent": plain_stats[1] or 0,
|
| 131 |
+
"sample_size": plain_stats[2]
|
| 132 |
+
},
|
| 133 |
+
"flowread": {
|
| 134 |
+
"avg_reading_time_ms": flowread_stats[0] or 0,
|
| 135 |
+
"avg_accuracy_percent": flowread_stats[1] or 0,
|
| 136 |
+
"sample_size": flowread_stats[2]
|
| 137 |
+
}
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
# --- Saliency API (Existing) ---
|
| 141 |
model_id = "google/gemma-2b"
|
| 142 |
|
| 143 |
# Load the model and tokenizer globally.
|
|
|
|
| 162 |
|
| 163 |
class TextRequest(BaseModel):
|
| 164 |
text: str
|
| 165 |
+
layers: Optional[List[int]] = None # List of layer indices to average
|
| 166 |
+
preprompt: str = "" # Optional task-driven intent
|
| 167 |
|
| 168 |
@app.post("/analyze")
|
| 169 |
async def analyze_text(request: TextRequest):
|
| 170 |
text = request.text
|
| 171 |
+
preprompt = request.preprompt.strip()
|
| 172 |
+
|
| 173 |
if not text.strip():
|
| 174 |
return {"tokens": [], "scores": []}
|
| 175 |
|
| 176 |
+
# Combine preprompt and text if preprompt exists
|
| 177 |
+
full_text = f"{preprompt}\n\n{text}" if preprompt else text
|
| 178 |
+
|
| 179 |
+
inputs = tokenizer(full_text, return_tensors="pt").to(device)
|
| 180 |
|
| 181 |
with torch.no_grad():
|
| 182 |
# Ensure we ask the model to output attentions explicitly
|
|
|
|
| 187 |
print("Warning: Model did not return attentions.")
|
| 188 |
return {"words": []}
|
| 189 |
|
| 190 |
+
num_layers = len(outputs.attentions)
|
|
|
|
|
|
|
| 191 |
|
| 192 |
+
selected_layers = request.layers
|
| 193 |
+
if not selected_layers:
|
| 194 |
+
start_layer = num_layers // 4
|
| 195 |
+
end_layer = num_layers - (num_layers // 4)
|
| 196 |
+
selected_layers = list(range(start_layer, end_layer))
|
| 197 |
+
|
| 198 |
+
selected_layers = [l for l in selected_layers if 0 <= l < num_layers]
|
| 199 |
+
if not selected_layers:
|
| 200 |
+
selected_layers = [num_layers - 1]
|
| 201 |
+
|
| 202 |
+
stacked_attentions = torch.stack([outputs.attentions[l] for l in selected_layers])
|
| 203 |
+
avg_attention = stacked_attentions.mean(dim=(0, 2))[0]
|
| 204 |
|
| 205 |
# Calculate importance: sum of attention each token *receives* from the sequence
|
| 206 |
importance = avg_attention.sum(dim=0).cpu().float().numpy()
|
static/index.html
CHANGED
|
@@ -20,11 +20,7 @@
|
|
| 20 |
margin-bottom: 0.5rem;
|
| 21 |
text-align: center;
|
| 22 |
font-weight: 800;
|
| 23 |
-
|
| 24 |
-
-webkit-background-clip: text;
|
| 25 |
-
-webkit-text-fill-color: transparent;
|
| 26 |
-
background-clip: text;
|
| 27 |
-
color: transparent;
|
| 28 |
}
|
| 29 |
|
| 30 |
p.subtitle {
|
|
@@ -211,7 +207,7 @@
|
|
| 211 |
</head>
|
| 212 |
<body>
|
| 213 |
|
| 214 |
-
<h1>
|
| 215 |
<p class="subtitle">Accelerate reading comprehension using LLM attention vectors.</p>
|
| 216 |
|
| 217 |
<div class="tabs">
|
|
@@ -514,26 +510,9 @@
|
|
| 514 |
// 2. Randomize condition order for A/B testing
|
| 515 |
conditionOrder = Math.random() > 0.5 ? ['plain', 'flowread'] : ['flowread', 'plain'];
|
| 516 |
|
| 517 |
-
// 3.
|
| 518 |
const flowReadTextIndex = conditionOrder.indexOf('flowread');
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
const analyzeRes = await fetch('/analyze', {
|
| 522 |
-
method: 'POST',
|
| 523 |
-
headers: { 'Content-Type': 'application/json' },
|
| 524 |
-
body: JSON.stringify({ text: textToHighlight, layer: 'middle' })
|
| 525 |
-
});
|
| 526 |
-
const analyzeData = await analyzeRes.json();
|
| 527 |
-
|
| 528 |
-
let html = '';
|
| 529 |
-
const threshold = 0.35; // Fixed threshold for the study
|
| 530 |
-
(analyzeData.words || []).forEach((item, index) => {
|
| 531 |
-
if (index === 0 && (item.token.includes('<bos>') || item.word.includes('<bos>'))) return;
|
| 532 |
-
let className = 'token';
|
| 533 |
-
if (item.score >= threshold) className += ' highlighted';
|
| 534 |
-
html += `<span class="${className}">${item.token}</span>`;
|
| 535 |
-
});
|
| 536 |
-
flowReadHTMLs[studyTexts[flowReadTextIndex].id] = html;
|
| 537 |
|
| 538 |
// 4. Start the first reading task
|
| 539 |
document.getElementById('study-loading').style.display = 'none';
|
|
|
|
| 20 |
margin-bottom: 0.5rem;
|
| 21 |
text-align: center;
|
| 22 |
font-weight: 800;
|
| 23 |
+
color: #1c1917;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
}
|
| 25 |
|
| 26 |
p.subtitle {
|
|
|
|
| 207 |
</head>
|
| 208 |
<body>
|
| 209 |
|
| 210 |
+
<h1>Flow<span style="color: #a8a29e; font-weight: 500;">Read</span></h1>
|
| 211 |
<p class="subtitle">Accelerate reading comprehension using LLM attention vectors.</p>
|
| 212 |
|
| 213 |
<div class="tabs">
|
|
|
|
| 510 |
// 2. Randomize condition order for A/B testing
|
| 511 |
conditionOrder = Math.random() > 0.5 ? ['plain', 'flowread'] : ['flowread', 'plain'];
|
| 512 |
|
| 513 |
+
// 3. Since the HTML is pre-calculated by the backend, just load it into our dict
|
| 514 |
const flowReadTextIndex = conditionOrder.indexOf('flowread');
|
| 515 |
+
flowReadHTMLs[studyTexts[flowReadTextIndex].id] = studyTexts[flowReadTextIndex].flowread_html;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 516 |
|
| 517 |
// 4. Start the first reading task
|
| 518 |
document.getElementById('study-loading').style.display = 'none';
|