jobbler commited on
Commit
a8dd995
·
1 Parent(s): 0fb266c

Hardcode study HTML to reduce API load

Browse files
Files changed (2) hide show
  1. main.py +140 -6
  2. static/index.html +4 -25
main.py CHANGED
@@ -6,6 +6,8 @@ from pydantic import BaseModel
6
  from transformers import AutoTokenizer, AutoModelForCausalLM
7
  import uvicorn
8
  import os
 
 
9
 
10
  app = FastAPI()
11
 
@@ -19,6 +21,123 @@ app.add_middleware(
19
 
20
  app.mount("/static", StaticFiles(directory="static"), name="static")
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  model_id = "google/gemma-2b"
23
 
24
  # Load the model and tokenizer globally.
@@ -43,14 +162,21 @@ except Exception as e:
43
 
44
  class TextRequest(BaseModel):
45
  text: str
 
 
46
 
47
  @app.post("/analyze")
48
  async def analyze_text(request: TextRequest):
49
  text = request.text
 
 
50
  if not text.strip():
51
  return {"tokens": [], "scores": []}
52
 
53
- inputs = tokenizer(text, return_tensors="pt").to(device)
 
 
 
54
 
55
  with torch.no_grad():
56
  # Ensure we ask the model to output attentions explicitly
@@ -61,12 +187,20 @@ async def analyze_text(request: TextRequest):
61
  print("Warning: Model did not return attentions.")
62
  return {"words": []}
63
 
64
- # outputs.attentions is a tuple of (batch_size, num_heads, sequence_length, sequence_length)
65
- # Get the last layer's attention
66
- attentions = outputs.attentions[-1]
67
 
68
- # Average across all heads
69
- avg_attention = attentions[0].mean(dim=0) # shape: (seq_len, seq_len)
 
 
 
 
 
 
 
 
 
 
70
 
71
  # Calculate importance: sum of attention each token *receives* from the sequence
72
  importance = avg_attention.sum(dim=0).cpu().float().numpy()
 
6
  from transformers import AutoTokenizer, AutoModelForCausalLM
7
  import uvicorn
8
  import os
9
+ import sqlite3
10
+ from typing import List, Optional
11
 
12
  app = FastAPI()
13
 
 
21
 
22
  app.mount("/static", StaticFiles(directory="static"), name="static")
23
 
24
+ # --- SQLite Database Setup ---
25
+ DB_FILE = "study.db"
26
+
27
+ def init_db():
28
+ conn = sqlite3.connect(DB_FILE)
29
+ c = conn.cursor()
30
+ c.execute('''
31
+ CREATE TABLE IF NOT EXISTS study_results (
32
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
33
+ user_id TEXT,
34
+ text_id INTEGER,
35
+ condition TEXT, -- "plain" or "flowread"
36
+ reading_time_ms INTEGER,
37
+ score INTEGER,
38
+ total_questions INTEGER,
39
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
40
+ )
41
+ ''')
42
+ conn.commit()
43
+ conn.close()
44
+
45
+ init_db()
46
+
47
+ # --- Study Content ---
48
+ STUDY_TEXTS = [
49
+ {
50
+ "id": 1,
51
+ "topic": "Science",
52
+ "text": "The human brain is a marvel of biological engineering, containing approximately 86 billion neurons interconnected by trillions of synapses. These neural networks are responsible for everything from basic autonomic functions, like breathing and heart rate, to complex cognitive processes such as memory, emotion, and problem-solving. Neuroplasticity, the brain's ability to reorganize itself by forming new neural connections throughout life, allows humans to learn new skills, recover from injuries, and adapt to changing environments. This extraordinary adaptability is what makes our species so resilient and capable of continuous intellectual growth.",
53
+ "flowread_html": '<span class="token">The</span><span class="token"> human</span><span class="token highlighted"> brain</span><span class="token"> is</span><span class="token"> a</span><span class="token"> marvel</span><span class="token"> of</span><span class="token"> biological</span><span class="token"> engineering</span><span class="token">,</span><span class="token"> containing</span><span class="token"> approximately</span><span class="token"> </span><span class="token">8</span><span class="token">6</span><span class="token"> billion</span><span class="token highlighted"> neurons</span><span class="token"> interconnected</span><span class="token"> by</span><span class="token"> tri</span><span class="token">lli</span><span class="token">ons</span><span class="token"> of</span><span class="token"> synapses</span><span class="token highlighted">.</span><span class="token"> These</span><span class="token"> neural</span><span class="token"> networks</span><span class="token"> are</span><span class="token"> responsible</span><span class="token"> for</span><span class="token"> everything</span><span class="token"> from</span><span class="token"> basic</span><span class="token"> autonomic</span><span class="token"> functions</span><span class="token">,</span><span class="token"> like</span><span class="token"> breathing</span><span class="token"> and</span><span class="token"> heart</span><span class="token"> rate</span><span class="token">,</span><span class="token"> to</span><span class="token"> complex</span><span class="token"> cognitive</span><span class="token"> processes</span><span class="token"> such</span><span class="token"> as</span><span class="token"> memory</span><span class="token">,</span><span class="token"> emotion</span><span class="token">,</span><span class="token"> and</span><span class="token"> problem</span><span class="token">-</span><span class="token">solving</span><span class="token highlighted">.</span><span class="token"> Neurop</span><span class="token highlighted">lastic</span><span class="token highlighted">ity</span><span class="token">,</span><span class="token"> the</span><span class="token"> brain</span><span class="token">\'</span><span class="token">s</span><span class="token"> ability</span><span class="token"> to</span><span class="token"> reorgan</span><span class="token">ize</span><span class="token"> itself</span><span class="token"> by</span><span class="token"> forming</span><span class="token"> new</span><span class="token"> neural</span><span class="token"> connections</span><span class="token"> throughout</span><span class="token"> life</span><span class="token">,</span><span class="token"> allows</span><span class="token"> humans</span><span class="token"> to</span><span class="token"> learn</span><span class="token"> new</span><span class="token"> skills</span><span class="token">,</span><span class="token"> recover</span><span class="token"> from</span><span class="token"> injuries</span><span class="token">,</span><span class="token"> and</span><span class="token"> adapt</span><span class="token"> to</span><span class="token"> changing</span><span class="token"> environments</span><span class="token">.</span><span class="token"> This</span><span class="token"> extraordinary</span><span class="token"> adaptability</span><span class="token"> is</span><span class="token"> what</span><span class="token"> makes</span><span class="token"> our</span><span class="token"> species</span><span class="token"> so</span><span class="token"> resilient</span><span class="token"> and</span><span class="token"> capable</span><span class="token"> of</span><span class="token"> continuous</span><span class="token"> intellectual</span><span class="token"> growth</span><span class="token">.</span>',
54
+ "questions": [
55
+ {
56
+ "question": "Approximately how many neurons are in the human brain?",
57
+ "options": ["86 million", "86 billion", "100 trillion", "50 billion"],
58
+ "correct": 1
59
+ },
60
+ {
61
+ "question": "What is the term for the brain's ability to reorganize itself?",
62
+ "options": ["Synaptic generation", "Neurogenesis", "Neuroplasticity", "Cognitive adaptation"],
63
+ "correct": 2
64
+ }
65
+ ]
66
+ },
67
+ {
68
+ "id": 2,
69
+ "topic": "History",
70
+ "text": "The Industrial Revolution, which began in Britain in the late 18th century, marked a profound turning point in human history. It initiated the transition from agrarian, handicraft economies to industry and machine manufacturing. The invention of the steam engine, pioneered by figures like James Watt, dramatically increased the efficiency of factories and transportation, revolutionizing the textile industry and leading to the expansion of railways. This era brought about unprecedented economic growth and urbanization, fundamentally altering social structures and paving the way for the modern capitalist system, despite also causing significant social inequalities and poor working conditions initially.",
71
+ "flowread_html": '<span class="token">The</span><span class="token"> Industrial</span><span class="token highlighted"> Revolution</span><span class="token">,</span><span class="token"> which</span><span class="token"> began</span><span class="token"> in</span><span class="token"> Britain</span><span class="token"> in</span><span class="token"> the</span><span class="token"> late</span><span class="token"> </span><span class="token">1</span><span class="token">8</span><span class="token">th</span><span class="token"> century</span><span class="token">,</span><span class="token"> marked</span><span class="token"> a</span><span class="token"> profound</span><span class="token"> turning</span><span class="token"> point</span><span class="token"> in</span><span class="token"> human</span><span class="token"> history</span><span class="token">.</span><span class="token"> It</span><span class="token"> initiated</span><span class="token"> the</span><span class="token"> transition</span><span class="token"> from</span><span class="token"> agrarian</span><span class="token">,</span><span class="token"> handic</span><span class="token">raft</span><span class="token"> economies</span><span class="token"> to</span><span class="token"> industry</span><span class="token"> and</span><span class="token"> machine</span><span class="token"> manufacturing</span><span class="token">.</span><span class="token"> The</span><span class="token"> invention</span><span class="token"> of</span><span class="token"> the</span><span class="token"> steam</span><span class="token"> engine</span><span class="token">,</span><span class="token"> pioneered</span><span class="token"> by</span><span class="token"> figures</span><span class="token"> like</span><span class="token"> James</span><span class="token"> Watt</span><span class="token">,</span><span class="token"> dramatically</span><span class="token"> increased</span><span class="token"> the</span><span class="token"> efficiency</span><span class="token"> of</span><span class="token"> factories</span><span class="token"> and</span><span class="token"> transportation</span><span class="token">,</span><span class="token"> revolution</span><span class="token">izing</span><span class="token"> the</span><span class="token"> textile</span><span class="token"> industry</span><span class="token"> and</span><span class="token"> leading</span><span class="token"> to</span><span class="token"> the</span><span class="token"> expansion</span><span class="token"> of</span><span class="token"> railways</span><span class="token">.</span><span class="token"> This</span><span class="token"> era</span><span class="token"> brought</span><span class="token"> about</span><span class="token"> unprecedented</span><span class="token"> economic</span><span class="token"> growth</span><span class="token"> and</span><span class="token"> urbanization</span><span class="token">,</span><span class="token"> fundamentally</span><span class="token"> altering</span><span class="token"> social</span><span class="token"> structures</span><span class="token"> and</span><span class="token"> paving</span><span class="token"> the</span><span class="token"> way</span><span class="token"> for</span><span class="token"> the</span><span class="token"> modern</span><span class="token"> capitalist</span><span class="token"> system</span><span class="token">,</span><span class="token"> despite</span><span class="token"> also</span><span class="token"> causing</span><span class="token"> significant</span><span class="token"> social</span><span class="token"> inequalities</span><span class="token"> and</span><span class="token"> poor</span><span class="token"> working</span><span class="token"> conditions</span><span class="token"> initially</span><span class="token">.</span>',
72
+ "questions": [
73
+ {
74
+ "question": "Where did the Industrial Revolution begin?",
75
+ "options": ["United States", "France", "Germany", "Britain"],
76
+ "correct": 3
77
+ },
78
+ {
79
+ "question": "Which invention dramatically increased factory efficiency?",
80
+ "options": ["The cotton gin", "The telegraph", "The steam engine", "The assembly line"],
81
+ "correct": 2
82
+ }
83
+ ]
84
+ }
85
+ ]
86
+
87
+ # --- Study API Endpoints ---
88
+ @app.get("/api/study/texts")
89
+ def get_study_texts():
90
+ return {"texts": STUDY_TEXTS}
91
+
92
+ class StudySubmission(BaseModel):
93
+ user_id: str
94
+ text_id: int
95
+ condition: str
96
+ reading_time_ms: int
97
+ score: int
98
+ total_questions: int
99
+
100
+ @app.post("/api/study/submit")
101
+ def submit_study_result(submission: StudySubmission):
102
+ conn = sqlite3.connect(DB_FILE)
103
+ c = conn.cursor()
104
+ c.execute(
105
+ "INSERT INTO study_results (user_id, text_id, condition, reading_time_ms, score, total_questions) VALUES (?, ?, ?, ?, ?, ?)",
106
+ (submission.user_id, submission.text_id, submission.condition, submission.reading_time_ms, submission.score, submission.total_questions)
107
+ )
108
+ conn.commit()
109
+ conn.close()
110
+ return {"status": "success"}
111
+
112
+ @app.get("/api/study/stats")
113
+ def get_study_stats():
114
+ conn = sqlite3.connect(DB_FILE)
115
+ c = conn.cursor()
116
+
117
+ # Calculate stats for plain
118
+ c.execute("SELECT AVG(reading_time_ms), AVG(CAST(score AS FLOAT) / total_questions) * 100, COUNT(*) FROM study_results WHERE condition = 'plain'")
119
+ plain_stats = c.fetchone()
120
+
121
+ # Calculate stats for flowread
122
+ c.execute("SELECT AVG(reading_time_ms), AVG(CAST(score AS FLOAT) / total_questions) * 100, COUNT(*) FROM study_results WHERE condition = 'flowread'")
123
+ flowread_stats = c.fetchone()
124
+
125
+ conn.close()
126
+
127
+ return {
128
+ "plain": {
129
+ "avg_reading_time_ms": plain_stats[0] or 0,
130
+ "avg_accuracy_percent": plain_stats[1] or 0,
131
+ "sample_size": plain_stats[2]
132
+ },
133
+ "flowread": {
134
+ "avg_reading_time_ms": flowread_stats[0] or 0,
135
+ "avg_accuracy_percent": flowread_stats[1] or 0,
136
+ "sample_size": flowread_stats[2]
137
+ }
138
+ }
139
+
140
+ # --- Saliency API (Existing) ---
141
  model_id = "google/gemma-2b"
142
 
143
  # Load the model and tokenizer globally.
 
162
 
163
  class TextRequest(BaseModel):
164
  text: str
165
+ layers: Optional[List[int]] = None # List of layer indices to average
166
+ preprompt: str = "" # Optional task-driven intent
167
 
168
  @app.post("/analyze")
169
  async def analyze_text(request: TextRequest):
170
  text = request.text
171
+ preprompt = request.preprompt.strip()
172
+
173
  if not text.strip():
174
  return {"tokens": [], "scores": []}
175
 
176
+ # Combine preprompt and text if preprompt exists
177
+ full_text = f"{preprompt}\n\n{text}" if preprompt else text
178
+
179
+ inputs = tokenizer(full_text, return_tensors="pt").to(device)
180
 
181
  with torch.no_grad():
182
  # Ensure we ask the model to output attentions explicitly
 
187
  print("Warning: Model did not return attentions.")
188
  return {"words": []}
189
 
190
+ num_layers = len(outputs.attentions)
 
 
191
 
192
+ selected_layers = request.layers
193
+ if not selected_layers:
194
+ start_layer = num_layers // 4
195
+ end_layer = num_layers - (num_layers // 4)
196
+ selected_layers = list(range(start_layer, end_layer))
197
+
198
+ selected_layers = [l for l in selected_layers if 0 <= l < num_layers]
199
+ if not selected_layers:
200
+ selected_layers = [num_layers - 1]
201
+
202
+ stacked_attentions = torch.stack([outputs.attentions[l] for l in selected_layers])
203
+ avg_attention = stacked_attentions.mean(dim=(0, 2))[0]
204
 
205
  # Calculate importance: sum of attention each token *receives* from the sequence
206
  importance = avg_attention.sum(dim=0).cpu().float().numpy()
static/index.html CHANGED
@@ -20,11 +20,7 @@
20
  margin-bottom: 0.5rem;
21
  text-align: center;
22
  font-weight: 800;
23
- background: linear-gradient(to right, #1c1917 0%, #1c1917 40%, #a8a29e 100%);
24
- -webkit-background-clip: text;
25
- -webkit-text-fill-color: transparent;
26
- background-clip: text;
27
- color: transparent;
28
  }
29
 
30
  p.subtitle {
@@ -211,7 +207,7 @@
211
  </head>
212
  <body>
213
 
214
- <h1>FlowRead</h1>
215
  <p class="subtitle">Accelerate reading comprehension using LLM attention vectors.</p>
216
 
217
  <div class="tabs">
@@ -514,26 +510,9 @@
514
  // 2. Randomize condition order for A/B testing
515
  conditionOrder = Math.random() > 0.5 ? ['plain', 'flowread'] : ['flowread', 'plain'];
516
 
517
- // 3. Pre-calculate FlowRead AI highlighting so the timer isn't affected by network latency
518
  const flowReadTextIndex = conditionOrder.indexOf('flowread');
519
- const textToHighlight = studyTexts[flowReadTextIndex].text;
520
-
521
- const analyzeRes = await fetch('/analyze', {
522
- method: 'POST',
523
- headers: { 'Content-Type': 'application/json' },
524
- body: JSON.stringify({ text: textToHighlight, layer: 'middle' })
525
- });
526
- const analyzeData = await analyzeRes.json();
527
-
528
- let html = '';
529
- const threshold = 0.35; // Fixed threshold for the study
530
- (analyzeData.words || []).forEach((item, index) => {
531
- if (index === 0 && (item.token.includes('<bos>') || item.word.includes('<bos>'))) return;
532
- let className = 'token';
533
- if (item.score >= threshold) className += ' highlighted';
534
- html += `<span class="${className}">${item.token}</span>`;
535
- });
536
- flowReadHTMLs[studyTexts[flowReadTextIndex].id] = html;
537
 
538
  // 4. Start the first reading task
539
  document.getElementById('study-loading').style.display = 'none';
 
20
  margin-bottom: 0.5rem;
21
  text-align: center;
22
  font-weight: 800;
23
+ color: #1c1917;
 
 
 
 
24
  }
25
 
26
  p.subtitle {
 
207
  </head>
208
  <body>
209
 
210
+ <h1>Flow<span style="color: #a8a29e; font-weight: 500;">Read</span></h1>
211
  <p class="subtitle">Accelerate reading comprehension using LLM attention vectors.</p>
212
 
213
  <div class="tabs">
 
510
  // 2. Randomize condition order for A/B testing
511
  conditionOrder = Math.random() > 0.5 ? ['plain', 'flowread'] : ['flowread', 'plain'];
512
 
513
+ // 3. Since the HTML is pre-calculated by the backend, just load it into our dict
514
  const flowReadTextIndex = conditionOrder.indexOf('flowread');
515
+ flowReadHTMLs[studyTexts[flowReadTextIndex].id] = studyTexts[flowReadTextIndex].flowread_html;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
516
 
517
  // 4. Start the first reading task
518
  document.getElementById('study-loading').style.display = 'none';