rairo commited on
Commit
5c85174
·
verified ·
1 Parent(s): 29d83b8

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +181 -477
main.py CHANGED
@@ -1,508 +1,212 @@
1
- # main.py
2
- #
3
- # DreamBiomeMCP – Flask server exposing HTTP "tools" over your processed JSON
4
- # Files expected in the same directory:
5
- # - dream_entries.json
6
- # - sleep_profiles.json
7
- #
8
- # Endpoints (all JSON):
9
- # GET /health
10
- # GET /dream/series
11
- # POST /dream/samples
12
- # POST /dream/cluster-stats
13
- # GET /sleep/profiles
14
- # GET /sleep/profile/<profile_id>
15
- # POST /sleep/profile-arc
16
- # POST /seed/random-dream-biome
17
- #
18
- # NEW (LlamaIndex, backwards compatible):
19
- # POST /llama/query
20
- # body: { "query": "...", "series": "jasmine1" | null, "top_k": 5 }
21
- # reply: { "enabled": bool, "answer": str, "sources": [ {id, series, text}, ... ], "error": optional }
22
-
23
  import json
24
- import random
25
- import statistics
26
- from pathlib import Path
27
- from typing import Any, Dict, List, Optional
28
-
29
- from flask import Flask, jsonify, request
30
-
31
- # -------------------------------------------------------------------
32
- # Data loading
33
- # -------------------------------------------------------------------
34
-
35
- BASE_DIR = Path(__file__).resolve().parent
36
- DREAM_FILE = BASE_DIR / "dream_entries.json"
37
- SLEEP_FILE = BASE_DIR / "sleep_profiles.json"
38
-
39
- def load_json(path: Path) -> Any:
40
- with path.open("r", encoding="utf-8") as f:
41
- return json.load(f)
42
-
43
- print(f"[DreamBiomeMCP] Loading data from {DREAM_FILE} and {SLEEP_FILE} ...")
44
- ALL_DREAMS: List[Dict[str, Any]] = load_json(DREAM_FILE)
45
- ALL_SLEEP_PROFILES: List[Dict[str, Any]] = load_json(SLEEP_FILE)
46
- print(f"[DreamBiomeMCP] Loaded {len(ALL_DREAMS)} dreams, {len(ALL_SLEEP_PROFILES)} sleep profiles.")
47
-
48
- # Build quick lookup index for sleep profiles by id
49
- SLEEP_INDEX: Dict[str, Dict[str, Any]] = {p["id"]: p for p in ALL_SLEEP_PROFILES}
50
-
51
-
52
- # -------------------------------------------------------------------
53
- # Helper functions
54
- # -------------------------------------------------------------------
55
-
56
- def filter_dreams(
57
- source: Optional[str] = None,
58
- series: Optional[str] = None,
59
- min_words: Optional[int] = None,
60
- max_words: Optional[int] = None,
61
- ) -> List[Dict[str, Any]]:
62
- dreams = ALL_DREAMS
63
- if source:
64
- dreams = [d for d in dreams if str(d.get("source")) == source]
65
- if series:
66
- dreams = [d for d in dreams if str(d.get("series")) == series]
67
- if min_words is not None:
68
- dreams = [d for d in dreams if d.get("length_words", 0) >= min_words]
69
- if max_words is not None:
70
- dreams = [d for d in dreams if d.get("length_words", 0) <= max_words]
71
- return dreams
72
-
73
-
74
- def summarise_dream_cluster(dreams: List[Dict[str, Any]]) -> Dict[str, Any]:
75
- if not dreams:
76
- return {
77
- "count": 0,
78
- "avg_length_words": 0,
79
- "length_words_std": 0,
80
- "metrics_means": {},
81
- }
82
 
83
- lengths = [d.get("length_words", 0) for d in dreams]
84
- avg_len = statistics.mean(lengths)
85
- std_len = statistics.pstdev(lengths) if len(lengths) > 1 else 0.0
86
-
87
- # Aggregate any numeric metrics if present (from Dryad)
88
- numeric_keys = set()
89
- for d in dreams:
90
- metrics = d.get("metrics") or {}
91
- for k, v in metrics.items():
92
- if isinstance(v, (int, float)):
93
- numeric_keys.add(k)
94
-
95
- metrics_means: Dict[str, float] = {}
96
- for key in sorted(numeric_keys):
97
- vals = []
98
- for d in dreams:
99
- m = (d.get("metrics") or {})
100
- v = m.get(key)
101
- if isinstance(v, (int, float)):
102
- vals.append(float(v))
103
- if vals:
104
- metrics_means[key] = statistics.mean(vals)
105
-
106
- return {
107
- "count": len(dreams),
108
- "avg_length_words": round(avg_len, 2),
109
- "length_words_std": round(std_len, 2),
110
- "metrics_means": metrics_means,
111
- }
112
-
113
-
114
- def compute_sleep_arc(profile: Dict[str, Any]) -> Dict[str, Any]:
115
- """Derive a simple "arc" from the stage sequence for storytelling."""
116
- stages = profile.get("stages", [])
117
- if not stages:
118
- return {"segments": []}
119
-
120
- total = len(stages)
121
- step = max(total // 4, 1)
122
- segments = []
123
- for i in range(4):
124
- start = i * step
125
- end = min((i + 1) * step, total)
126
- if start >= end:
127
- break
128
- segment = stages[start:end]
129
- # simple dominant stage for this quarter
130
- counts: Dict[str, int] = {}
131
- for s in segment:
132
- counts[s] = counts.get(s, 0) + 1
133
- dominant = max(counts.items(), key=lambda kv: kv[1])[0]
134
- segments.append(
135
- {
136
- "index": i,
137
- "start_epoch": start,
138
- "end_epoch": end - 1,
139
- "dominant_stage": dominant,
140
- "counts": counts,
141
- }
142
- )
143
 
144
- return {"segments": segments, "total_epochs": total}
145
-
146
-
147
- def pick_random_region_context() -> Dict[str, Any]:
148
- # Extremely small, hard-coded region stats for now.
149
- # You can expand this or swap to a JSON file later.
150
- regions = [
151
- {
152
- "region": "Global",
153
- "insomnia_prevalence": 0.16,
154
- "severe_insomnia": 0.08,
155
- "notes": "Global adult insomnia meta-analysis.",
156
- "reference": "global_insomnia_meta_2016",
157
- },
158
- {
159
- "region": "UK",
160
- "insomnia_prevalence": 0.29,
161
- "severe_insomnia": 0.06,
162
- "notes": "Symptom-based estimate from large UK cohort.",
163
- "reference": "uk_biobank_insomnia",
164
- },
165
- {
166
- "region": "East Asia",
167
- "insomnia_prevalence": 0.20,
168
- "severe_insomnia": 0.07,
169
- "notes": "Approximate pooled prevalence from regional studies.",
170
- "reference": "east_asia_insomnia_review",
171
- },
172
- ]
173
- return random.choice(regions)
174
-
175
-
176
- # -------------------------------------------------------------------
177
- # OPTIONAL: LlamaIndex RAG over dreams
178
- # -------------------------------------------------------------------
179
-
180
- LLAMA_ENABLED: bool = False
181
- LLAMA_INDEX = None
182
- LLAMA_INIT_ERROR: Optional[str] = None
183
-
184
- def init_llama_index() -> None:
185
  """
186
- Build a lightweight in-memory LlamaIndex over ALL_DREAMS.
187
- Retrieval-only (no LLM generation) so we don't need extra keys.
188
  """
189
- global LLAMA_ENABLED, LLAMA_INDEX, LLAMA_INIT_ERROR
 
190
 
191
- print("[LlamaIndex] Initialising dream index ...")
192
  try:
193
- from llama_index.core import VectorStoreIndex, Document, Settings
194
- from llama_index.embeddings.huggingface import HuggingFaceEmbedding
195
- # Use a small local embedding model (downloads once on first run).
196
- embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
197
- Settings.embed_model = embed_model
198
- Settings.llm = None # retrieval-only; no built-in LLM
199
-
200
- docs = []
201
- for d in ALL_DREAMS:
202
- text = d.get("text") or d.get("dream") or ""
203
- if not text:
204
- continue
205
- metadata = {
206
- "id": d.get("id"),
207
- "series": str(d.get("series") or ""),
208
- "source": str(d.get("source") or ""),
209
- }
210
- docs.append(Document(text=text, metadata=metadata))
211
-
212
- if not docs:
213
- LLAMA_INIT_ERROR = "No dream texts found to index."
214
- print("[LlamaIndex][WARN]", LLAMA_INIT_ERROR)
215
- return
216
-
217
- LLAMA_INDEX = VectorStoreIndex.from_documents(docs)
218
- LLAMA_ENABLED = True
219
- LLAMA_INIT_ERROR = None
220
- print(f"[LlamaIndex] Index built over {len(docs)} dreams.")
221
-
222
- except ImportError as e:
223
- LLAMA_ENABLED = False
224
- LLAMA_INDEX = None
225
- LLAMA_INIT_ERROR = f"ImportError: {e}"
226
- print("[LlamaIndex][WARN] LlamaIndex not installed. Skipping RAG layer.")
227
- except Exception as e:
228
- LLAMA_ENABLED = False
229
- LLAMA_INDEX = None
230
- LLAMA_INIT_ERROR = f"{type(e).__name__}: {e}"
231
- print("[LlamaIndex][ERROR] Failed to build index:", e)
232
-
233
-
234
- # Initialise LlamaIndex once at startup (but don't crash if it fails).
235
- init_llama_index()
236
-
237
-
238
- def llama_query_impl(query: str, series: Optional[str], top_k: int = 5) -> Dict[str, Any]:
239
- """
240
- Internal helper: run a similarity search over dream texts.
241
- Returns answer+sources, but still completely side-channel to the existing API.
242
- """
243
- if not LLAMA_ENABLED or LLAMA_INDEX is None:
244
- return {
245
- "enabled": False,
246
- "answer": "",
247
- "sources": [],
248
- "error": LLAMA_INIT_ERROR or "LlamaIndex is not enabled on this server.",
249
- }
250
-
251
- from llama_index.core import VectorStoreIndex # type: ignore
252
-
253
- # Basic safety
254
- query = (query or "").strip()
255
- if not query:
256
- return {
257
- "enabled": True,
258
- "answer": "No query text provided.",
259
- "sources": [],
260
- "error": None,
261
  }
262
 
263
- top_k = max(1, min(int(top_k or 5), 10))
264
-
265
- try:
266
- filters = None
267
- if series:
268
- try:
269
- # Newer LlamaIndex metadata filter API
270
- from llama_index.core.vector_stores.types import (
271
- MetadataFilters,
272
- ExactMatchFilter,
273
- )
274
- filters = MetadataFilters(
275
- filters=[ExactMatchFilter(key="series", value=str(series))]
276
- )
277
- except Exception:
278
- # If metadata filter types move around, just ignore filters and search globally.
279
- filters = None
280
-
281
- # Retrieval-only
282
- retriever = LLAMA_INDEX.as_retriever(
283
- similarity_top_k=top_k,
284
- filters=filters,
285
- )
286
- nodes = retriever.retrieve(query)
287
-
288
- sources = []
289
- answer_snippets = []
290
- for n in nodes:
291
- meta = getattr(n, "metadata", {}) or {}
292
- text = getattr(n, "text", "") or ""
293
- sources.append(
294
- {
295
- "id": meta.get("id"),
296
- "series": meta.get("series"),
297
- "source": meta.get("source"),
298
- "text": text,
299
- }
300
- )
301
- answer_snippets.append(text[:500])
302
-
303
- if not answer_snippets:
304
- answer = "No relevant dreams were retrieved for this query."
305
- else:
306
- answer = (
307
- "Top matching dream snippets (retrieved via LlamaIndex):\n\n"
308
- + "\n\n---\n\n".join(answer_snippets)
309
  )
 
310
 
311
- return {
312
- "enabled": True,
313
- "answer": answer,
314
- "sources": sources,
315
- "error": None,
316
- }
317
 
318
  except Exception as e:
319
- return {
320
- "enabled": True,
321
- "answer": "",
322
- "sources": [],
323
- "error": f"LlamaIndex retrieval failed: {e}",
324
- }
325
-
326
 
327
- # -------------------------------------------------------------------
328
- # Flask app
329
- # -------------------------------------------------------------------
330
 
331
- app = Flask(__name__)
 
 
 
 
 
 
 
 
 
 
 
332
 
333
- # ---------------- HOME ROUTE (NEW) ----------------
334
-
335
- @app.route("/", methods=["GET"])
336
- def home() -> Any:
337
- """Landing page describing the MCP server and its tools."""
338
- return jsonify({
339
- "status": "DreamBiomeMCP is running",
340
- "description": "Flask server exposing real dream and sleep data via HTTP tools.",
341
- "endpoints": [
342
- {"method": "GET", "path": "/health", "desc": "Server health and data counts"},
343
- {"method": "GET", "path": "/dream/series", "desc": "List available dream series"},
344
- {"method": "POST", "path": "/dream/samples", "desc": "Get random dream samples (filters: series, min/max words)"},
345
- {"method": "POST", "path": "/dream/cluster-stats", "desc": "Get statistical summary of a dream cluster"},
346
- {"method": "GET", "path": "/sleep/profiles", "desc": "List available sleep profile IDs"},
347
- {"method": "GET", "path": "/sleep/profile/<id>", "desc": "Get full hypnogram data for a specific profile"},
348
- {"method": "POST", "path": "/sleep/profile-arc", "desc": "Get narrative 4-act arc derived from sleep stages"},
349
- {"method": "POST", "path": "/seed/random-dream-biome", "desc": "Get a random seed (dream + sleep + region)"},
350
- {"method": "POST", "path": "/llama/query", "desc": "RAG search over dream texts"},
351
- ]
352
- })
353
-
354
-
355
- @app.route("/health", methods=["GET"])
356
- def health() -> Any:
357
- return jsonify({
358
- "status": "ok",
359
- "dreams": len(ALL_DREAMS),
360
- "sleep_profiles": len(ALL_SLEEP_PROFILES),
361
- "llama_enabled": LLAMA_ENABLED,
362
- "llama_error": LLAMA_INIT_ERROR,
363
- })
364
-
365
-
366
- # ---------------- DREAM ENDPOINTS ----------------
367
-
368
- @app.route("/dream/series", methods=["GET"])
369
- def list_dream_series() -> Any:
370
- series_counts: Dict[str, int] = {}
371
- for d in ALL_DREAMS:
372
- series = str(d.get("series") or "unknown")
373
- series_counts[series] = series_counts.get(series, 0) + 1
374
-
375
- data = [{"series": s, "count": c} for s, c in sorted(series_counts.items(), key=lambda kv: kv[0])]
376
- return jsonify({"series": data})
377
-
378
-
379
- @app.route("/dream/samples", methods=["POST"])
380
- def dream_samples() -> Any:
381
- payload = request.get_json(force=True, silent=True) or {}
382
- source = payload.get("source")
383
- series = payload.get("series")
384
- n = int(payload.get("n", 5))
385
- min_words = payload.get("min_words")
386
- max_words = payload.get("max_words")
387
-
388
- dreams = filter_dreams(
389
- source=source,
390
- series=series,
391
- min_words=int(min_words) if min_words is not None else None,
392
- max_words=int(max_words) if max_words is not None else None,
393
- )
394
-
395
- if not dreams:
396
- return jsonify({"samples": [], "note": "No dreams matched the filter."})
397
-
398
- random.shuffle(dreams)
399
- samples = dreams[: max(1, n)]
400
- return jsonify({"samples": samples})
401
-
402
-
403
- @app.route("/dream/cluster-stats", methods=["POST"])
404
- def dream_cluster_stats() -> Any:
405
- payload = request.get_json(force=True, silent=True) or {}
406
- source = payload.get("source")
407
- series = payload.get("series")
408
- min_words = payload.get("min_words")
409
- max_words = payload.get("max_words")
410
-
411
- dreams = filter_dreams(
412
- source=source,
413
- series=series,
414
- min_words=int(min_words) if min_words is not None else None,
415
- max_words=int(max_words) if max_words is not None else None,
416
- )
417
-
418
- stats = summarise_dream_cluster(dreams)
419
- stats["source"] = source
420
- stats["series"] = series
421
- return jsonify(stats)
422
-
423
-
424
- # ---------------- SLEEP ENDPOINTS ----------------
425
-
426
- @app.route("/sleep/profiles", methods=["GET"])
427
- def list_sleep_profiles() -> Any:
428
- # Return only IDs + high-level summary to keep payload small
429
- summaries = []
430
- for p in ALL_SLEEP_PROFILES:
431
- summaries.append(
432
- {
433
- "id": p["id"],
434
- "total_sleep_time_min": p.get("total_sleep_time_min"),
435
- "sleep_efficiency": p.get("sleep_efficiency"),
436
- "rem_percentage": p.get("rem_percentage"),
437
- "awakenings": p.get("awakenings"),
438
- }
439
  )
440
- return jsonify({"profiles": summaries})
441
-
442
 
443
- @app.route("/sleep/profile/<profile_id>", methods=["GET"])
444
- def get_sleep_profile(profile_id: str) -> Any:
445
- profile = SLEEP_INDEX.get(profile_id)
446
- if not profile:
447
- return jsonify({"error": f"Sleep profile '{profile_id}' not found."}), 404
448
- return jsonify(profile)
449
 
 
 
450
 
451
- @app.route("/sleep/profile-arc", methods=["POST"])
452
- def sleep_profile_arc() -> Any:
453
- payload = request.get_json(force=True, silent=True) or {}
454
- profile_id = payload.get("profile_id")
455
- if not profile_id:
456
- return jsonify({"error": "profile_id is required"}), 400
457
 
458
- profile = SLEEP_INDEX.get(profile_id)
459
- if not profile:
460
- return jsonify({"error": f"Sleep profile '{profile_id}' not found."}), 404
461
-
462
- arc = compute_sleep_arc(profile)
463
- return jsonify({"profile_id": profile_id, "arc": arc})
 
 
 
 
 
 
 
464
 
 
 
 
 
 
465
 
466
- # ---------------- COMBINED SEED ----------------
467
 
468
- @app.route("/seed/random-dream-biome", methods=["POST", "GET"])
469
- def random_dream_biome_seed() -> Any:
470
- """Return a bundle: one random dream, one random sleep profile, and one region context."""
471
- dream = random.choice(ALL_DREAMS) if ALL_DREAMS else None
472
- sleep_profile = random.choice(ALL_SLEEP_PROFILES) if ALL_SLEEP_PROFILES else None
473
- region = pick_random_region_context()
 
 
 
 
 
474
 
475
- return jsonify(
476
- {
477
- "dream_sample": dream,
478
- "sleep_profile": sleep_profile,
479
- "region_sleep": region,
 
 
 
 
 
 
 
 
 
480
  }
481
- )
482
-
483
-
484
- # ---------------- LLAMAINDEX QUERY (NEW) ----------------
485
 
486
- @app.route("/llama/query", methods=["POST"])
487
- def llama_query() -> Any:
488
- """
489
- LlamaIndex-backed retrieval over dream texts.
490
- This is additive – frontend doesn't need to change unless you want to use it.
491
- """
492
- payload = request.get_json(force=True, silent=True) or {}
493
- query = payload.get("query", "")
494
- series = payload.get("series")
495
- top_k = payload.get("top_k", 5)
 
 
 
 
496
 
497
- result = llama_query_impl(query, series, top_k=top_k)
498
- status = 200 if result.get("error") is None else 500
499
- return jsonify(result), status
500
 
 
 
 
501
 
502
- # -------------------------------------------------------------------
503
- # Entry point
504
- # -------------------------------------------------------------------
505
 
506
- if __name__ == "__main__":
507
- # For local testing; HF Spaces will just run `python main.py`
508
- app.run(host="0.0.0.0", port=7860, debug=False)
 
1
+ import os
2
+ import base64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import json
4
+ import io
5
+ import tempfile
6
+ import cv2
7
+ import numpy as np
8
+ from flask import Flask
9
+ from flask_socketio import SocketIO, emit
10
+ from PIL import Image
11
+
12
+ # --- 2025 AI STANDARDS ---
13
+ from google import genai
14
+ from google.genai import types
15
+ import azure.cognitiveservices.speech as speechsdk
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ app = Flask(__name__)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ # CONFIG: Hugging Face runs on port 7860 internally
20
+ # CORS: Allow '*' so your Unity APK can connect from anywhere
21
+ socketio = SocketIO(app, cors_allowed_origins="*", async_mode='eventlet')
22
+
23
+ # --- SECRETS (Load from Hugging Face Environment Variables) ---
24
+ GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
25
+ AZURE_SPEECH_KEY = os.environ.get("AZURE_SPEECH_KEY")
26
+ AZURE_SPEECH_REGION = os.environ.get("AZURE_SPEECH_REGION")
27
+
28
+ # Initialize Gemini Client
29
+ client = genai.Client(api_key=GEMINI_API_KEY)
30
+
31
+ # --- HELPER: Base64 to PIL Image ---
32
+ def decode_image(base64_string):
33
+ img_bytes = base64.b64decode(base64_string)
34
+ np_arr = np.frombuffer(img_bytes, np.uint8)
35
+ frame = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
36
+ return Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
37
+
38
+ # ==========================================
39
+ # 1. VISUAL RECOGNITION (Wand/Pen)
40
+ # ==========================================
41
+ @socketio.on('verify_object')
42
+ def handle_object_verification(data):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  """
44
+ Called by Unity (either as fallback or primary).
45
+ Payload: { 'image': 'base64...', 'target': 'pen' }
46
  """
47
+ target = data.get('target', 'magic wand')
48
+ print(f"👁️ Vision Check: Looking for {target}")
49
 
 
50
  try:
51
+ pil_image = decode_image(data['image'])
52
+
53
+ # Optimize for Gemini 2.0 Flash (JPEG, Quality 80)
54
+ img_byte_arr = io.BytesIO()
55
+ pil_image.save(img_byte_arr, format='JPEG', quality=80)
56
+ img_bytes = img_byte_arr.getvalue()
57
+
58
+ # Strict Schema: Unity needs a boolean, not a chat
59
+ schema = {
60
+ "type": "OBJECT",
61
+ "properties": {
62
+ "verified": {"type": "BOOLEAN"},
63
+ "confidence": {"type": "NUMBER"},
64
+ "feedback": {"type": "STRING"}
65
+ },
66
+ "required": ["verified", "feedback"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  }
68
 
69
+ prompt = f"""
70
+ You are the 'Eye of the Spellbook'.
71
+ Look at this image. Is the user holding a '{target}'?
72
+ Note: If the target is 'wand', accept a pen, pencil, or stick.
73
+ Return JSON.
74
+ """
75
+
76
+ response = client.models.generate_content(
77
+ model="gemini-2.0-flash",
78
+ contents=[prompt, types.Part.from_bytes(data=img_bytes, mime_type="image/jpeg")],
79
+ config=types.GenerateContentConfig(
80
+ response_mime_type="application/json",
81
+ response_schema=schema,
82
+ temperature=0.1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  )
84
+ )
85
 
86
+ result = json.loads(response.text)
87
+ emit('vision_result', result)
 
 
 
 
88
 
89
  except Exception as e:
90
+ print(f"Vision Error: {e}")
91
+ emit('vision_result', {"verified": False, "feedback": "Server vision error."})
 
 
 
 
 
92
 
 
 
 
93
 
94
+ # ==========================================
95
+ # 2. PRONUNCIATION ASSESSMENT (The Spell)
96
+ # ==========================================
97
+ @socketio.on('assess_pronunciation')
98
+ def handle_pronunciation(data):
99
+ """
100
+ Called when user speaks the spell.
101
+ Payload: { 'audio': 'base64_wav...', 'text': 'Turn this pencil into a wand', 'lang': 'en-US' }
102
+ """
103
+ ref_text = data.get('text')
104
+ lang = data.get('lang', 'en-US')
105
+ print(f"🎤 Audio Check: '{ref_text}' in {lang}")
106
 
107
+ temp_wav_path = None
108
+ try:
109
+ # Save Base64 to Temp File
110
+ audio_bytes = base64.b64decode(data['audio'])
111
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav:
112
+ temp_wav.write(audio_bytes)
113
+ temp_wav_path = temp_wav.name
114
+
115
+ # Azure Config
116
+ speech_config = speechsdk.SpeechConfig(subscription=AZURE_SPEECH_KEY, region=AZURE_SPEECH_REGION)
117
+ speech_config.speech_recognition_language = lang
118
+ audio_config = speechsdk.audio.AudioConfig(filename=temp_wav_path)
119
+
120
+ # Config Assessment (Phoneme level for strictness)
121
+ pronunciation_config = speechsdk.PronunciationAssessmentConfig(
122
+ reference_text=ref_text,
123
+ grading_system=speechsdk.PronunciationAssessmentGradingSystem.HundredMark,
124
+ granularity=speechsdk.PronunciationAssessmentGranularity.Phoneme,
125
+ enable_miscue=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  )
 
 
127
 
128
+ recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
129
+ pronunciation_config.apply_to(recognizer)
 
 
 
 
130
 
131
+ # Recognize
132
+ result = recognizer.recognize_once_async().get()
133
 
134
+ # Cleanup
135
+ if os.path.exists(temp_wav_path):
136
+ os.remove(temp_wav_path)
 
 
 
137
 
138
+ # Process Results
139
+ if result.reason == speechsdk.ResultReason.RecognizedSpeech:
140
+ pron_result = speechsdk.PronunciationAssessmentResult(result)
141
+ response = {
142
+ "success": True,
143
+ "score": pron_result.accuracy_score,
144
+ "fluency": pron_result.fluency_score,
145
+ "recognized_text": result.text
146
+ }
147
+ else:
148
+ response = {"success": False, "score": 0, "recognized_text": "Silence or Noise"}
149
+
150
+ emit('pronunciation_result', response)
151
 
152
+ except Exception as e:
153
+ print(f"Audio Error: {e}")
154
+ if temp_wav_path and os.path.exists(temp_wav_path):
155
+ os.remove(temp_wav_path)
156
+ emit('pronunciation_result', {"success": False, "score": 0, "error": str(e)})
157
 
 
158
 
159
+ # ==========================================
160
+ # 3. HANDWRITING/OCR (The Book Task)
161
+ # ==========================================
162
+ @socketio.on('verify_writing')
163
+ def handle_writing_verification(data):
164
+ """
165
+ Called when user writes on the book.
166
+ Payload: { 'image': 'base64...', 'expected_word': 'of' }
167
+ """
168
+ expected = data.get('expected_word', 'of')
169
+ print(f"📖 Book Check: Looking for word '{expected}'")
170
 
171
+ try:
172
+ pil_image = decode_image(data['image'])
173
+
174
+ img_byte_arr = io.BytesIO()
175
+ pil_image.save(img_byte_arr, format='JPEG', quality=80)
176
+ img_bytes = img_byte_arr.getvalue()
177
+
178
+ schema = {
179
+ "type": "OBJECT",
180
+ "properties": {
181
+ "correct": {"type": "BOOLEAN"},
182
+ "detected_text": {"type": "STRING"}
183
+ },
184
+ "required": ["correct", "detected_text"]
185
  }
 
 
 
 
186
 
187
+ prompt = f"""
188
+ Analyze the handwriting or text on the book cover in this image.
189
+ Does it say "{expected}"? (Ignore capitalization).
190
+ Return JSON.
191
+ """
192
+
193
+ response = client.models.generate_content(
194
+ model="gemini-2.0-flash",
195
+ contents=[prompt, types.Part.from_bytes(data=img_bytes, mime_type="image/jpeg")],
196
+ config=types.GenerateContentConfig(
197
+ response_mime_type="application/json",
198
+ response_schema=schema
199
+ )
200
+ )
201
 
202
+ result = json.loads(response.text)
203
+ emit('writing_result', result)
 
204
 
205
+ except Exception as e:
206
+ print(f"OCR Error: {e}")
207
+ emit('writing_result', {"correct": False, "detected_text": "Error"})
208
 
 
 
 
209
 
210
+ if __name__ == '__main__':
211
+ # Standard entry point for Gunicorn (handled in Dockerfile)
212
+ socketio.run(app, host='0.0.0.0', port=7860)